| /** | |
| * Audio Provider Type Definitions | |
| * | |
| * Unified types for TTS (Text-to-Speech) and ASR (Automatic Speech Recognition) | |
| * with extensible architecture to support multiple providers. | |
| * | |
| * Currently Supported TTS Providers: | |
| * - OpenAI TTS (https://platform.openai.com/docs/guides/text-to-speech) | |
| * - Azure TTS (https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech) | |
| * - GLM TTS (https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts) | |
| * - Qwen TTS (https://bailian.console.aliyun.com/) | |
| * - Doubao TTS (https://www.volcengine.com/docs/6561/1257543) | |
| * - Browser Native TTS (Web Speech API, client-side only) | |
| * | |
| * Currently Supported ASR Providers: | |
| * - OpenAI Whisper (https://platform.openai.com/docs/guides/speech-to-text) | |
| * - Browser Native (Web Speech API, client-side only) | |
| * - Qwen ASR (DashScope API) | |
| * | |
| * Future Provider Support (extensible): | |
| * - ElevenLabs TTS/ASR (https://elevenlabs.io/docs) | |
| * - Fish Audio TTS (https://fish.audio/docs) | |
| * - Cartesia TTS (https://cartesia.ai/docs) | |
| * - PlayHT TTS (https://docs.play.ht/) | |
| * - AssemblyAI ASR (https://www.assemblyai.com/docs) | |
| * - Deepgram ASR (https://developers.deepgram.com/docs) | |
| * | |
| * HOW TO ADD A NEW PROVIDER: | |
| * | |
| * Step 1: Add provider ID to the union type | |
| * - For TTS: Add to TTSProviderId below | |
| * - For ASR: Add to ASRProviderId below | |
| * | |
| * Step 2: Add provider configuration to constants.ts | |
| * - Define provider metadata (name, icon, voices, formats, etc.) | |
| * - Add to TTS_PROVIDERS or ASR_PROVIDERS registry | |
| * | |
| * Step 3: Implement provider logic in tts-providers.ts or asr-providers.ts | |
| * - Add case to generateTTS() or transcribeAudio() switch statement | |
| * - Implement API call logic for the new provider | |
| * | |
| * Step 4: Add i18n translations | |
| * - Add provider name translations in lib/i18n.ts | |
| * - Format: `provider{ProviderName}TTS` or `provider{ProviderName}ASR` | |
| * | |
| * Step 5 (Optional): Create client-side hook if needed | |
| * - For browser-only providers, create hooks like use-browser-tts.ts | |
| * - Export from lib/hooks/ | |
| * | |
| * Example: Adding ElevenLabs TTS | |
| * ================================ | |
| * 1. Add 'elevenlabs-tts' to TTSProviderId union type | |
| * 2. In constants.ts: | |
| * TTS_PROVIDERS['elevenlabs-tts'] = { | |
| * id: 'elevenlabs-tts', | |
| * name: 'ElevenLabs', | |
| * requiresApiKey: true, | |
| * defaultBaseUrl: 'https://api.elevenlabs.io/v1', | |
| * icon: '/elevenlabs.svg', | |
| * voices: [...], | |
| * supportedFormats: ['mp3', 'pcm'], | |
| * speedRange: { min: 0.5, max: 2.0, default: 1.0 } | |
| * } | |
| * 3. In tts-providers.ts: | |
| * case 'elevenlabs-tts': | |
| * return await generateElevenLabsTTS(config, text); | |
| * 4. In i18n.ts: | |
| * providerElevenLabsTTS: 'ElevenLabs TTS' / 'ElevenLabs Text-to-Speech' | |
| */ | |
| // ============================================================================ | |
| // TTS (Text-to-Speech) Types | |
| // ============================================================================ | |
| /** | |
| * TTS Provider IDs | |
| * | |
| * Add new TTS providers here as union members. | |
| * Keep in sync with TTS_PROVIDERS registry in constants.ts | |
| */ | |
| export type BuiltInTTSProviderId = | |
| | 'openai-tts' | |
| | 'azure-tts' | |
| | 'glm-tts' | |
| | 'qwen-tts' | |
| | 'voxcpm-tts' | |
| | 'doubao-tts' | |
| | 'elevenlabs-tts' | |
| | 'minimax-tts' | |
| | 'browser-native-tts'; | |
| export type TTSProviderId = BuiltInTTSProviderId | `custom-tts-${string}`; | |
| /** | |
| * Voice information for TTS | |
| */ | |
| export interface TTSVoiceInfo { | |
| id: string; | |
| name: string; | |
| language: string; | |
| localeName?: string; // Language name in its native script (e.g., "中文(简体,中国)", "日本語") | |
| gender?: 'male' | 'female' | 'neutral'; | |
| description?: string; | |
| /** Model IDs this voice is compatible with. Undefined = all models. */ | |
| compatibleModels?: string[]; | |
| } | |
| /** | |
| * TTS Provider Configuration | |
| */ | |
| export interface TTSProviderConfig { | |
| id: TTSProviderId; | |
| name: string; | |
| requiresApiKey: boolean; | |
| defaultBaseUrl?: string; | |
| icon?: string; | |
| /** Available models. Empty array means provider has no model concept (e.g. Azure, Browser Native). */ | |
| models: Array<{ id: string; name: string }>; | |
| /** Default model ID used when user hasn't selected one. Empty string if no models. */ | |
| defaultModelId: string; | |
| voices: TTSVoiceInfo[]; | |
| supportedFormats: string[]; // ['mp3', 'wav', 'opus', etc.] | |
| speedRange?: { | |
| min: number; | |
| max: number; | |
| default: number; | |
| }; | |
| } | |
| /** | |
| * TTS Model Configuration for API calls | |
| */ | |
| export interface TTSModelConfig { | |
| providerId: TTSProviderId; | |
| modelId?: string; | |
| apiKey?: string; | |
| baseUrl?: string; | |
| voice: string; | |
| speed?: number; | |
| format?: string; | |
| providerOptions?: Record<string, unknown>; | |
| } | |
| // ============================================================================ | |
| // ASR (Automatic Speech Recognition) Types | |
| // ============================================================================ | |
| /** | |
| * ASR Provider IDs | |
| * | |
| * Add new ASR providers here as union members. | |
| * Keep in sync with ASR_PROVIDERS registry in constants.ts | |
| */ | |
| export type BuiltInASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr'; | |
| export type ASRProviderId = BuiltInASRProviderId | `custom-asr-${string}`; | |
| /** | |
| * ASR Provider Configuration | |
| */ | |
| export interface ASRProviderConfig { | |
| id: ASRProviderId; | |
| name: string; | |
| requiresApiKey: boolean; | |
| defaultBaseUrl?: string; | |
| icon?: string; | |
| models: Array<{ id: string; name: string }>; | |
| defaultModelId: string; | |
| supportedLanguages: string[]; | |
| supportedFormats: string[]; | |
| } | |
| /** | |
| * ASR Model Configuration for API calls | |
| */ | |
| export interface ASRModelConfig { | |
| providerId: ASRProviderId; | |
| modelId?: string; | |
| apiKey?: string; | |
| baseUrl?: string; | |
| language?: string; | |
| } | |
| /** Returns true if the provider ID is a user-defined custom TTS provider. */ | |
| export function isCustomTTSProvider(id: string): boolean { | |
| return id.startsWith('custom-tts-'); | |
| } | |
| /** Returns true if the provider ID is a user-defined custom ASR provider. */ | |
| export function isCustomASRProvider(id: string): boolean { | |
| return id.startsWith('custom-asr-'); | |
| } | |