File size: 6,059 Bytes

f56a29b

/**
 * Audio Provider Type Definitions
 *
 * Unified types for TTS (Text-to-Speech) and ASR (Automatic Speech Recognition)
 * with extensible architecture to support multiple providers.
 *
 * Currently Supported TTS Providers:
 * - OpenAI TTS (https://platform.openai.com/docs/guides/text-to-speech)
 * - Azure TTS (https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech)
 * - GLM TTS (https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts)
 * - Qwen TTS (https://bailian.console.aliyun.com/)
 * - Doubao TTS (https://www.volcengine.com/docs/6561/1257543)
 * - Browser Native TTS (Web Speech API, client-side only)
 *
 * Currently Supported ASR Providers:
 * - OpenAI Whisper (https://platform.openai.com/docs/guides/speech-to-text)
 * - Browser Native (Web Speech API, client-side only)
 * - Qwen ASR (DashScope API)
 *
 * Future Provider Support (extensible):
 * - ElevenLabs TTS/ASR (https://elevenlabs.io/docs)
 * - Fish Audio TTS (https://fish.audio/docs)
 * - Cartesia TTS (https://cartesia.ai/docs)
 * - PlayHT TTS (https://docs.play.ht/)
 * - AssemblyAI ASR (https://www.assemblyai.com/docs)
 * - Deepgram ASR (https://developers.deepgram.com/docs)
 *
 * HOW TO ADD A NEW PROVIDER:
 *
 * Step 1: Add provider ID to the union type
 *   - For TTS: Add to TTSProviderId below
 *   - For ASR: Add to ASRProviderId below
 *
 * Step 2: Add provider configuration to constants.ts
 *   - Define provider metadata (name, icon, voices, formats, etc.)
 *   - Add to TTS_PROVIDERS or ASR_PROVIDERS registry
 *
 * Step 3: Implement provider logic in tts-providers.ts or asr-providers.ts
 *   - Add case to generateTTS() or transcribeAudio() switch statement
 *   - Implement API call logic for the new provider
 *
 * Step 4: Add i18n translations
 *   - Add provider name translations in lib/i18n.ts
 *   - Format: `provider{ProviderName}TTS` or `provider{ProviderName}ASR`
 *
 * Step 5 (Optional): Create client-side hook if needed
 *   - For browser-only providers, create hooks like use-browser-tts.ts
 *   - Export from lib/hooks/
 *
 * Example: Adding ElevenLabs TTS
 * ================================
 * 1. Add 'elevenlabs-tts' to TTSProviderId union type
 * 2. In constants.ts:
 *    TTS_PROVIDERS['elevenlabs-tts'] = {
 *      id: 'elevenlabs-tts',
 *      name: 'ElevenLabs',
 *      requiresApiKey: true,
 *      defaultBaseUrl: 'https://api.elevenlabs.io/v1',
 *      icon: '/elevenlabs.svg',
 *      voices: [...],
 *      supportedFormats: ['mp3', 'pcm'],
 *      speedRange: { min: 0.5, max: 2.0, default: 1.0 }
 *    }
 * 3. In tts-providers.ts:
 *    case 'elevenlabs-tts':
 *      return await generateElevenLabsTTS(config, text);
 * 4. In i18n.ts:
 *    providerElevenLabsTTS: 'ElevenLabs TTS' / 'ElevenLabs Text-to-Speech'
 */

// ============================================================================
// TTS (Text-to-Speech) Types
// ============================================================================

/**
 * TTS Provider IDs
 *
 * Add new TTS providers here as union members.
 * Keep in sync with TTS_PROVIDERS registry in constants.ts
 */
export type BuiltInTTSProviderId =
  | 'openai-tts'
  | 'azure-tts'
  | 'glm-tts'
  | 'qwen-tts'
  | 'voxcpm-tts'
  | 'doubao-tts'
  | 'elevenlabs-tts'
  | 'minimax-tts'
  | 'browser-native-tts';

export type TTSProviderId = BuiltInTTSProviderId | `custom-tts-${string}`;

/**
 * Voice information for TTS
 */
export interface TTSVoiceInfo {
  id: string;
  name: string;
  language: string;
  localeName?: string; // Language name in its native script (e.g., "中文（简体，中国）", "日本語")
  gender?: 'male' | 'female' | 'neutral';
  description?: string;
  /** Model IDs this voice is compatible with. Undefined = all models. */
  compatibleModels?: string[];
}

/**
 * TTS Provider Configuration
 */
export interface TTSProviderConfig {
  id: TTSProviderId;
  name: string;
  requiresApiKey: boolean;
  defaultBaseUrl?: string;
  icon?: string;
  /** Available models. Empty array means provider has no model concept (e.g. Azure, Browser Native). */
  models: Array<{ id: string; name: string }>;
  /** Default model ID used when user hasn't selected one. Empty string if no models. */
  defaultModelId: string;
  voices: TTSVoiceInfo[];
  supportedFormats: string[]; // ['mp3', 'wav', 'opus', etc.]
  speedRange?: {
    min: number;
    max: number;
    default: number;
  };
}

/**
 * TTS Model Configuration for API calls
 */
export interface TTSModelConfig {
  providerId: TTSProviderId;
  modelId?: string;
  apiKey?: string;
  baseUrl?: string;
  voice: string;
  speed?: number;
  format?: string;
  providerOptions?: Record<string, unknown>;
}

// ============================================================================
// ASR (Automatic Speech Recognition) Types
// ============================================================================

/**
 * ASR Provider IDs
 *
 * Add new ASR providers here as union members.
 * Keep in sync with ASR_PROVIDERS registry in constants.ts
 */
export type BuiltInASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr';

export type ASRProviderId = BuiltInASRProviderId | `custom-asr-${string}`;

/**
 * ASR Provider Configuration
 */
export interface ASRProviderConfig {
  id: ASRProviderId;
  name: string;
  requiresApiKey: boolean;
  defaultBaseUrl?: string;
  icon?: string;
  models: Array<{ id: string; name: string }>;
  defaultModelId: string;
  supportedLanguages: string[];
  supportedFormats: string[];
}

/**
 * ASR Model Configuration for API calls
 */
export interface ASRModelConfig {
  providerId: ASRProviderId;
  modelId?: string;
  apiKey?: string;
  baseUrl?: string;
  language?: string;
}

/** Returns true if the provider ID is a user-defined custom TTS provider. */
export function isCustomTTSProvider(id: string): boolean {
  return id.startsWith('custom-tts-');
}

/** Returns true if the provider ID is a user-defined custom ASR provider. */
export function isCustomASRProvider(id: string): boolean {
  return id.startsWith('custom-asr-');
}