muthuk1
/

OpenMAIC-React

Model card Files Files and versions

OpenMAIC-React / src /lib /audio /tts-utils.ts

muthuk1's picture

Convert OpenMAIC from Next.js to React (Vite)

f56a29b verified 10 days ago

3.12 kB

	/**
	* Shared TTS utilities used by both client-side and server-side generation.
	*/

	import type { TTSProviderId } from './types';
	import type { Action, SpeechAction } from '@/lib/types/action';
	import { createLogger } from '@/lib/logger';

	const log = createLogger('TTS');

	/** Provider-specific max text length limits. */
	export const TTS_MAX_TEXT_LENGTH: Partial<Record<TTSProviderId, number>> = {
	'glm-tts': 1024,
	};

	/**
	* Split long text into chunks that respect sentence boundaries.
	* Tries splitting at sentence-ending punctuation first, then clause-level
	* punctuation, and finally hard-splits at maxLength as a last resort.
	*/
	export function splitLongSpeechText(text: string, maxLength: number): string[] {
	const normalized = text.trim();
	if (!normalized \|\| normalized.length <= maxLength) return [normalized];

	const units = normalized
	.split(/(?<=[。！？!?；;：:\n])/u)
	.map((part) => part.trim())
	.filter(Boolean);

	const chunks: string[] = [];
	let current = '';

	const pushChunk = (value: string) => {
	const trimmed = value.trim();
	if (trimmed) chunks.push(trimmed);
	};

	const appendUnit = (unit: string) => {
	if (!current) {
	current = unit;
	return;
	}
	if ((current + unit).length <= maxLength) {
	current += unit;
	return;
	}
	pushChunk(current);
	current = unit;
	};

	const hardSplitUnit = (unit: string) => {
	const parts = unit.split(/(?<=[，,、])/u).filter(Boolean);
	if (parts.length > 1) {
	for (const part of parts) {
	if (part.length <= maxLength) appendUnit(part);
	else hardSplitUnit(part);
	}
	return;
	}

	let start = 0;
	while (start < unit.length) {
	appendUnit(unit.slice(start, start + maxLength));
	start += maxLength;
	}
	};

	for (const unit of units.length > 0 ? units : [normalized]) {
	if (unit.length <= maxLength) appendUnit(unit);
	else hardSplitUnit(unit);
	}

	pushChunk(current);
	return chunks;
	}

	/**
	* Split long speech actions into multiple shorter actions so each stays
	* within the TTS provider's text length limit. Each sub-action gets its
	* own independent audio file — no byte concatenation needed.
	*/
	export function splitLongSpeechActions(actions: Action[], providerId: TTSProviderId): Action[] {
	const maxLength = TTS_MAX_TEXT_LENGTH[providerId];
	if (!maxLength) return actions;

	let didSplit = false;
	const nextActions: Action[] = actions.flatMap((action) => {
	if (action.type !== 'speech' \|\| !action.text \|\| action.text.length <= maxLength)
	return [action];

	const chunks = splitLongSpeechText(action.text, maxLength);
	if (chunks.length <= 1) return [action];
	didSplit = true;
	const { audioId: _audioId, ...baseAction } = action as SpeechAction;

	log.info(
	`Split speech for ${providerId}: action=${action.id}, len=${action.text.length}, chunks=${chunks.length}`,
	);
	return chunks.map((chunk, i) => ({
	...baseAction,
	id: `${action.id}_tts_${i + 1}`,
	text: chunk,
	}));
	});
	return didSplit ? nextActions : actions;
	}