File size: 3,601 Bytes
f56a29b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import type { TTSVoiceInfo } from '@/lib/audio/types';

export const VOXCPM_TTS_PROVIDER_ID = 'voxcpm-tts' as const;
export const VOXCPM_MODEL_ID = 'VoxCPM2';
export const VOXCPM_VLLM_MODEL_ID = 'voxcpm2';
export const VOXCPM_AUTO_VOICE_ID = 'voxcpm:auto';
export const VOXCPM_PROFILE_VOICE_PREFIX = 'voxcpm:profile:';
const VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS = 200;

export const VOXCPM_BACKENDS = [
  {
    id: 'vllm-omni',
    name: 'vLLM-Omni',
    endpoint: '/v1/audio/speech',
    description: 'OpenAI-compatible speech endpoint',
  },
  {
    id: 'python-api',
    name: 'Python API',
    endpoint: '/tts/upload',
    description: 'FastAPI deployment backed by the VoxCPM Python runtime',
  },
  {
    id: 'nano-vllm',
    name: 'Nano-vLLM',
    endpoint: '/generate',
    description: 'Nano-vLLM VoxCPM FastAPI deployment',
  },
] as const;

export type VoxCPMBackendType = (typeof VOXCPM_BACKENDS)[number]['id'];

export const DEFAULT_VOXCPM_BACKEND: VoxCPMBackendType = 'vllm-omni';

export interface VoxCPMVoicePromptContext {
  agentName?: string;
  role?: string;
  persona?: string;
  language?: string;
  locale?: string;
}

export interface VoxCPMProviderOptions {
  backend?: VoxCPMBackendType;
  voiceMode?: 'auto' | 'prompt' | 'clone';
  voicePrompt?: string;
  promptText?: string;
  referenceAudioBase64?: string;
  referenceAudioMimeType?: string;
  referenceAudioName?: string;
  cfgValue?: number;
  inferenceTimesteps?: number;
  normalize?: boolean;
  denoise?: boolean;
}

export const VOXCPM_AUTO_VOICE: TTSVoiceInfo = {
  id: VOXCPM_AUTO_VOICE_ID,
  name: 'Auto Voice',
  language: 'auto',
  gender: 'neutral',
  description: 'Generate a voice prompt from agent metadata',
};

export function normalizeVoxCPMBackend(value: unknown): VoxCPMBackendType {
  return VOXCPM_BACKENDS.some((backend) => backend.id === value)
    ? (value as VoxCPMBackendType)
    : DEFAULT_VOXCPM_BACKEND;
}

export function getVoxCPMBackendEndpoint(backend: VoxCPMBackendType): string {
  return VOXCPM_BACKENDS.find((item) => item.id === backend)?.endpoint || '/v1/audio/speech';
}

export function voxCPMBackendSupportsReferenceAudio(backend: VoxCPMBackendType): boolean {
  return backend === 'vllm-omni' || backend === 'python-api' || backend === 'nano-vllm';
}

export function buildVoxCPMBackendUrl(baseUrl: string, backend: VoxCPMBackendType): string {
  const cleanBaseUrl = baseUrl.replace(/\/$/, '');
  if (backend === 'vllm-omni' && cleanBaseUrl.endsWith('/v1')) {
    return `${cleanBaseUrl}/audio/speech`;
  }
  return `${cleanBaseUrl}${getVoxCPMBackendEndpoint(backend)}`;
}

export function getVoxCPMProfileVoiceId(profileId: string): string {
  return `${VOXCPM_PROFILE_VOICE_PREFIX}${profileId}`;
}

export function getVoxCPMProfileIdFromVoiceId(voiceId: string): string | null {
  if (!voiceId.startsWith(VOXCPM_PROFILE_VOICE_PREFIX)) return null;
  return voiceId.slice(VOXCPM_PROFILE_VOICE_PREFIX.length);
}

function sanitizeAutoVoicePromptPart(value?: string): string {
  return (value || '')
    .replace(/[\p{C}]+/gu, ' ')
    .replace(/\s+/gu, ' ')
    .trim()
    .slice(0, VOXCPM_AUTO_VOICE_PROMPT_MAX_CHARS)
    .trim();
}

export function buildAutoVoxCPMVoicePrompt(context: VoxCPMVoicePromptContext = {}): string {
  const persona = sanitizeAutoVoicePromptPart(context.persona);
  if (persona) return persona;

  const fallbackParts = [context.role, context.agentName]
    .map(sanitizeAutoVoicePromptPart)
    .filter(Boolean);
  const fallbackPrompt = sanitizeAutoVoicePromptPart(fallbackParts.join(' '));
  return fallbackPrompt || 'natural classroom voice';
}