Convert OpenMAIC from Next.js to React (Vite)

f56a29b verified 10 days ago

6.06 kB

	/**
	* Audio Provider Type Definitions
	*
	* Unified types for TTS (Text-to-Speech) and ASR (Automatic Speech Recognition)
	* with extensible architecture to support multiple providers.
	*
	* Currently Supported TTS Providers:
	* - OpenAI TTS (https://platform.openai.com/docs/guides/text-to-speech)
	* - Azure TTS (https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech)
	* - GLM TTS (https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts)
	* - Qwen TTS (https://bailian.console.aliyun.com/)
	* - Doubao TTS (https://www.volcengine.com/docs/6561/1257543)
	* - Browser Native TTS (Web Speech API, client-side only)
	*
	* Currently Supported ASR Providers:
	* - OpenAI Whisper (https://platform.openai.com/docs/guides/speech-to-text)
	* - Browser Native (Web Speech API, client-side only)
	* - Qwen ASR (DashScope API)
	*
	* Future Provider Support (extensible):
	* - ElevenLabs TTS/ASR (https://elevenlabs.io/docs)
	* - Fish Audio TTS (https://fish.audio/docs)
	* - Cartesia TTS (https://cartesia.ai/docs)
	* - PlayHT TTS (https://docs.play.ht/)
	* - AssemblyAI ASR (https://www.assemblyai.com/docs)
	* - Deepgram ASR (https://developers.deepgram.com/docs)
	*
	* HOW TO ADD A NEW PROVIDER:
	*
	* Step 1: Add provider ID to the union type
	* - For TTS: Add to TTSProviderId below
	* - For ASR: Add to ASRProviderId below
	*
	* Step 2: Add provider configuration to constants.ts
	* - Define provider metadata (name, icon, voices, formats, etc.)
	* - Add to TTS_PROVIDERS or ASR_PROVIDERS registry
	*
	* Step 3: Implement provider logic in tts-providers.ts or asr-providers.ts
	* - Add case to generateTTS() or transcribeAudio() switch statement
	* - Implement API call logic for the new provider
	*
	* Step 4: Add i18n translations
	* - Add provider name translations in lib/i18n.ts
	* - Format: `provider{ProviderName}TTS` or `provider{ProviderName}ASR`
	*
	* Step 5 (Optional): Create client-side hook if needed
	* - For browser-only providers, create hooks like use-browser-tts.ts
	* - Export from lib/hooks/
	*
	* Example: Adding ElevenLabs TTS
	* ================================
	* 1. Add 'elevenlabs-tts' to TTSProviderId union type
	* 2. In constants.ts:
	* TTS_PROVIDERS['elevenlabs-tts'] = {
	* id: 'elevenlabs-tts',
	* name: 'ElevenLabs',
	* requiresApiKey: true,
	* defaultBaseUrl: 'https://api.elevenlabs.io/v1',
	* icon: '/elevenlabs.svg',
	* voices: [...],
	* supportedFormats: ['mp3', 'pcm'],
	* speedRange: { min: 0.5, max: 2.0, default: 1.0 }
	* }
	* 3. In tts-providers.ts:
	* case 'elevenlabs-tts':
	* return await generateElevenLabsTTS(config, text);
	* 4. In i18n.ts:
	* providerElevenLabsTTS: 'ElevenLabs TTS' / 'ElevenLabs Text-to-Speech'
	*/

	// ============================================================================
	// TTS (Text-to-Speech) Types
	// ============================================================================

	/**
	* TTS Provider IDs
	*
	* Add new TTS providers here as union members.
	* Keep in sync with TTS_PROVIDERS registry in constants.ts
	*/
	export type BuiltInTTSProviderId =
	\| 'openai-tts'
	\| 'azure-tts'
	\| 'glm-tts'
	\| 'qwen-tts'
	\| 'voxcpm-tts'
	\| 'doubao-tts'
	\| 'elevenlabs-tts'
	\| 'minimax-tts'
	\| 'browser-native-tts';

	export type TTSProviderId = BuiltInTTSProviderId \| `custom-tts-${string}`;

	/**
	* Voice information for TTS
	*/
	export interface TTSVoiceInfo {
	id: string;
	name: string;
	language: string;
	localeName?: string; // Language name in its native script (e.g., "中文（简体，中国）", "日本語")
	gender?: 'male' \| 'female' \| 'neutral';
	description?: string;
	/** Model IDs this voice is compatible with. Undefined = all models. */
	compatibleModels?: string[];
	}

	/**
	* TTS Provider Configuration
	*/
	export interface TTSProviderConfig {
	id: TTSProviderId;
	name: string;
	requiresApiKey: boolean;
	defaultBaseUrl?: string;
	icon?: string;
	/** Available models. Empty array means provider has no model concept (e.g. Azure, Browser Native). */
	models: Array<{ id: string; name: string }>;
	/** Default model ID used when user hasn't selected one. Empty string if no models. */
	defaultModelId: string;
	voices: TTSVoiceInfo[];
	supportedFormats: string[]; // ['mp3', 'wav', 'opus', etc.]
	speedRange?: {
	min: number;
	max: number;
	default: number;
	};
	}

	/**
	* TTS Model Configuration for API calls
	*/
	export interface TTSModelConfig {
	providerId: TTSProviderId;
	modelId?: string;
	apiKey?: string;
	baseUrl?: string;
	voice: string;
	speed?: number;
	format?: string;
	providerOptions?: Record<string, unknown>;
	}

	// ============================================================================
	// ASR (Automatic Speech Recognition) Types
	// ============================================================================

	/**
	* ASR Provider IDs
	*
	* Add new ASR providers here as union members.
	* Keep in sync with ASR_PROVIDERS registry in constants.ts
	*/
	export type BuiltInASRProviderId = 'openai-whisper' \| 'browser-native' \| 'qwen-asr';

	export type ASRProviderId = BuiltInASRProviderId \| `custom-asr-${string}`;

	/**
	* ASR Provider Configuration
	*/
	export interface ASRProviderConfig {
	id: ASRProviderId;
	name: string;
	requiresApiKey: boolean;
	defaultBaseUrl?: string;
	icon?: string;
	models: Array<{ id: string; name: string }>;
	defaultModelId: string;
	supportedLanguages: string[];
	supportedFormats: string[];
	}

	/**
	* ASR Model Configuration for API calls
	*/
	export interface ASRModelConfig {
	providerId: ASRProviderId;
	modelId?: string;
	apiKey?: string;
	baseUrl?: string;
	language?: string;
	}

	/** Returns true if the provider ID is a user-defined custom TTS provider. */
	export function isCustomTTSProvider(id: string): boolean {
	return id.startsWith('custom-tts-');
	}

	/** Returns true if the provider ID is a user-defined custom ASR provider. */
	export function isCustomASRProvider(id: string): boolean {
	return id.startsWith('custom-asr-');
	}