| |
| |
| |
|
|
| import type { TTSProviderId } from './types'; |
| import type { Action, SpeechAction } from '@/lib/types/action'; |
| import { createLogger } from '@/lib/logger'; |
|
|
| const log = createLogger('TTS'); |
|
|
| |
| export const TTS_MAX_TEXT_LENGTH: Partial<Record<TTSProviderId, number>> = { |
| 'glm-tts': 1024, |
| }; |
|
|
| |
| |
| |
| |
| |
| export function splitLongSpeechText(text: string, maxLength: number): string[] { |
| const normalized = text.trim(); |
| if (!normalized || normalized.length <= maxLength) return [normalized]; |
|
|
| const units = normalized |
| .split(/(?<=[。!?!?;;::\n])/u) |
| .map((part) => part.trim()) |
| .filter(Boolean); |
|
|
| const chunks: string[] = []; |
| let current = ''; |
|
|
| const pushChunk = (value: string) => { |
| const trimmed = value.trim(); |
| if (trimmed) chunks.push(trimmed); |
| }; |
|
|
| const appendUnit = (unit: string) => { |
| if (!current) { |
| current = unit; |
| return; |
| } |
| if ((current + unit).length <= maxLength) { |
| current += unit; |
| return; |
| } |
| pushChunk(current); |
| current = unit; |
| }; |
|
|
| const hardSplitUnit = (unit: string) => { |
| const parts = unit.split(/(?<=[,,、])/u).filter(Boolean); |
| if (parts.length > 1) { |
| for (const part of parts) { |
| if (part.length <= maxLength) appendUnit(part); |
| else hardSplitUnit(part); |
| } |
| return; |
| } |
|
|
| let start = 0; |
| while (start < unit.length) { |
| appendUnit(unit.slice(start, start + maxLength)); |
| start += maxLength; |
| } |
| }; |
|
|
| for (const unit of units.length > 0 ? units : [normalized]) { |
| if (unit.length <= maxLength) appendUnit(unit); |
| else hardSplitUnit(unit); |
| } |
|
|
| pushChunk(current); |
| return chunks; |
| } |
|
|
| |
| |
| |
| |
| |
| export function splitLongSpeechActions(actions: Action[], providerId: TTSProviderId): Action[] { |
| const maxLength = TTS_MAX_TEXT_LENGTH[providerId]; |
| if (!maxLength) return actions; |
|
|
| let didSplit = false; |
| const nextActions: Action[] = actions.flatMap((action) => { |
| if (action.type !== 'speech' || !action.text || action.text.length <= maxLength) |
| return [action]; |
|
|
| const chunks = splitLongSpeechText(action.text, maxLength); |
| if (chunks.length <= 1) return [action]; |
| didSplit = true; |
| const { audioId: _audioId, ...baseAction } = action as SpeechAction; |
|
|
| log.info( |
| `Split speech for ${providerId}: action=${action.id}, len=${action.text.length}, chunks=${chunks.length}`, |
| ); |
| return chunks.map((chunk, i) => ({ |
| ...baseAction, |
| id: `${action.id}_tts_${i + 1}`, |
| text: chunk, |
| })); |
| }); |
| return didSplit ? nextActions : actions; |
| } |
|
|