import type { TTSVoiceInfo } from '@/lib/audio/types'; export const VOXCPM_TTS_PROVIDER_ID = 'voxcpm-tts' as const; export const VOXCPM_MODEL_ID = 'VoxCPM2'; export const VOXCPM_VLLM_MODEL_ID = 'voxcpm2'; export const VOXCPM_AUTO_VOICE_ID = 'voxcpm:auto'; export const VOXCPM_PROFILE_VOICE_PREFIX = 'voxcpm:profile:'; const VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS = 200; export const VOXCPM_BACKENDS = [ { id: 'vllm-omni', name: 'vLLM-Omni', endpoint: '/v1/audio/speech', description: 'OpenAI-compatible speech endpoint', }, { id: 'python-api', name: 'Python API', endpoint: '/tts/upload', description: 'FastAPI deployment backed by the VoxCPM Python runtime', }, { id: 'nano-vllm', name: 'Nano-vLLM', endpoint: '/generate', description: 'Nano-vLLM VoxCPM FastAPI deployment', }, ] as const; export type VoxCPMBackendType = (typeof VOXCPM_BACKENDS)[number]['id']; export const DEFAULT_VOXCPM_BACKEND: VoxCPMBackendType = 'vllm-omni'; export interface VoxCPMVoicePromptContext { agentName?: string; role?: string; persona?: string; language?: string; locale?: string; } export interface VoxCPMProviderOptions { backend?: VoxCPMBackendType; voiceMode?: 'auto' | 'prompt' | 'clone'; voicePrompt?: string; promptText?: string; referenceAudioBase64?: string; referenceAudioMimeType?: string; referenceAudioName?: string; cfgValue?: number; inferenceTimesteps?: number; normalize?: boolean; denoise?: boolean; } export const VOXCPM_AUTO_VOICE: TTSVoiceInfo = { id: VOXCPM_AUTO_VOICE_ID, name: 'Auto Voice', language: 'auto', gender: 'neutral', description: 'Generate a voice prompt from agent metadata', }; export function normalizeVoxCPMBackend(value: unknown): VoxCPMBackendType { return VOXCPM_BACKENDS.some((backend) => backend.id === value) ? (value as VoxCPMBackendType) : DEFAULT_VOXCPM_BACKEND; } export function getVoxCPMBackendEndpoint(backend: VoxCPMBackendType): string { return VOXCPM_BACKENDS.find((item) => item.id === backend)?.endpoint || '/v1/audio/speech'; } export function voxCPMBackendSupportsReferenceAudio(backend: VoxCPMBackendType): boolean { return backend === 'vllm-omni' || backend === 'python-api' || backend === 'nano-vllm'; } export function buildVoxCPMBackendUrl(baseUrl: string, backend: VoxCPMBackendType): string { const cleanBaseUrl = baseUrl.replace(/\/$/, ''); if (backend === 'vllm-omni' && cleanBaseUrl.endsWith('/v1')) { return `${cleanBaseUrl}/audio/speech`; } return `${cleanBaseUrl}${getVoxCPMBackendEndpoint(backend)}`; } export function getVoxCPMProfileVoiceId(profileId: string): string { return `${VOXCPM_PROFILE_VOICE_PREFIX}${profileId}`; } export function getVoxCPMProfileIdFromVoiceId(voiceId: string): string | null { if (!voiceId.startsWith(VOXCPM_PROFILE_VOICE_PREFIX)) return null; return voiceId.slice(VOXCPM_PROFILE_VOICE_PREFIX.length); } function sanitizeAutoVoicePromptPart(value?: string): string { return (value || '') .replace(/[\p{C}]+/gu, ' ') .replace(/\s+/gu, ' ') .trim() .slice(0, VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS) .trim(); } export function buildAutoVoxCPMVoicePrompt(context: VoxCPMVoicePromptContext = {}): string { const persona = sanitizeAutoVoicePromptPart(context.persona); if (persona) return persona; const fallbackParts = [context.role, context.agentName] .map(sanitizeAutoVoicePromptPart) .filter(Boolean); const fallbackPrompt = sanitizeAutoVoicePromptPart(fallbackParts.join(' ')); return fallbackPrompt || 'natural classroom voice'; }