File size: 3,601 Bytes
f56a29b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | import type { TTSVoiceInfo } from '@/lib/audio/types';
export const VOXCPM_TTS_PROVIDER_ID = 'voxcpm-tts' as const;
export const VOXCPM_MODEL_ID = 'VoxCPM2';
export const VOXCPM_VLLM_MODEL_ID = 'voxcpm2';
export const VOXCPM_AUTO_VOICE_ID = 'voxcpm:auto';
export const VOXCPM_PROFILE_VOICE_PREFIX = 'voxcpm:profile:';
const VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS = 200;
export const VOXCPM_BACKENDS = [
{
id: 'vllm-omni',
name: 'vLLM-Omni',
endpoint: '/v1/audio/speech',
description: 'OpenAI-compatible speech endpoint',
},
{
id: 'python-api',
name: 'Python API',
endpoint: '/tts/upload',
description: 'FastAPI deployment backed by the VoxCPM Python runtime',
},
{
id: 'nano-vllm',
name: 'Nano-vLLM',
endpoint: '/generate',
description: 'Nano-vLLM VoxCPM FastAPI deployment',
},
] as const;
export type VoxCPMBackendType = (typeof VOXCPM_BACKENDS)[number]['id'];
export const DEFAULT_VOXCPM_BACKEND: VoxCPMBackendType = 'vllm-omni';
export interface VoxCPMVoicePromptContext {
agentName?: string;
role?: string;
persona?: string;
language?: string;
locale?: string;
}
export interface VoxCPMProviderOptions {
backend?: VoxCPMBackendType;
voiceMode?: 'auto' | 'prompt' | 'clone';
voicePrompt?: string;
promptText?: string;
referenceAudioBase64?: string;
referenceAudioMimeType?: string;
referenceAudioName?: string;
cfgValue?: number;
inferenceTimesteps?: number;
normalize?: boolean;
denoise?: boolean;
}
export const VOXCPM_AUTO_VOICE: TTSVoiceInfo = {
id: VOXCPM_AUTO_VOICE_ID,
name: 'Auto Voice',
language: 'auto',
gender: 'neutral',
description: 'Generate a voice prompt from agent metadata',
};
export function normalizeVoxCPMBackend(value: unknown): VoxCPMBackendType {
return VOXCPM_BACKENDS.some((backend) => backend.id === value)
? (value as VoxCPMBackendType)
: DEFAULT_VOXCPM_BACKEND;
}
export function getVoxCPMBackendEndpoint(backend: VoxCPMBackendType): string {
return VOXCPM_BACKENDS.find((item) => item.id === backend)?.endpoint || '/v1/audio/speech';
}
export function voxCPMBackendSupportsReferenceAudio(backend: VoxCPMBackendType): boolean {
return backend === 'vllm-omni' || backend === 'python-api' || backend === 'nano-vllm';
}
export function buildVoxCPMBackendUrl(baseUrl: string, backend: VoxCPMBackendType): string {
const cleanBaseUrl = baseUrl.replace(/\/$/, '');
if (backend === 'vllm-omni' && cleanBaseUrl.endsWith('/v1')) {
return `${cleanBaseUrl}/audio/speech`;
}
return `${cleanBaseUrl}${getVoxCPMBackendEndpoint(backend)}`;
}
export function getVoxCPMProfileVoiceId(profileId: string): string {
return `${VOXCPM_PROFILE_VOICE_PREFIX}${profileId}`;
}
export function getVoxCPMProfileIdFromVoiceId(voiceId: string): string | null {
if (!voiceId.startsWith(VOXCPM_PROFILE_VOICE_PREFIX)) return null;
return voiceId.slice(VOXCPM_PROFILE_VOICE_PREFIX.length);
}
function sanitizeAutoVoicePromptPart(value?: string): string {
return (value || '')
.replace(/[\p{C}]+/gu, ' ')
.replace(/\s+/gu, ' ')
.trim()
.slice(0, VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS)
.trim();
}
export function buildAutoVoxCPMVoicePrompt(context: VoxCPMVoicePromptContext = {}): string {
const persona = sanitizeAutoVoicePromptPart(context.persona);
if (persona) return persona;
const fallbackParts = [context.role, context.agentName]
.map(sanitizeAutoVoicePromptPart)
.filter(Boolean);
const fallbackPrompt = sanitizeAutoVoicePromptPart(fallbackParts.join(' '));
return fallbackPrompt || 'natural classroom voice';
}
|