OpenMAIC-React / src /lib /hooks /use-scene-generator.ts
muthuk1's picture
Convert OpenMAIC from Next.js to React (Vite)
f56a29b verified
import { useCallback, useRef } from 'react';
import { useStageStore } from '@/lib/store/stage';
import { getCurrentModelConfig } from '@/lib/utils/model-config';
import { useSettingsStore } from '@/lib/store/settings';
import { db } from '@/lib/utils/database';
import type { SceneOutline, PdfImage, ImageMapping } from '@/lib/types/generation';
import type { AgentInfo } from '@/lib/generation/generation-pipeline';
import type { Scene } from '@/lib/types/stage';
import type { SpeechAction } from '@/lib/types/action';
import { splitLongSpeechActions } from '@/lib/audio/tts-utils';
import { getVoxCPMProviderOptions } from '@/lib/audio/voxcpm-voices';
import { generateMediaForOutlines } from '@/lib/media/media-orchestrator';
import { createLogger } from '@/lib/logger';
const log = createLogger('SceneGenerator');
interface SceneContentResult {
success: boolean;
content?: unknown;
effectiveOutline?: SceneOutline;
error?: string;
}
interface SceneActionsResult {
success: boolean;
scene?: Scene;
previousSpeeches?: string[];
error?: string;
}
function getApiHeaders(): HeadersInit {
const config = getCurrentModelConfig();
const settings = useSettingsStore.getState();
const imageProviderConfig = settings.imageProvidersConfig?.[settings.imageProviderId];
const videoProviderConfig = settings.videoProvidersConfig?.[settings.videoProviderId];
return {
'Content-Type': 'application/json',
'x-model': config.modelString || '',
'x-api-key': config.apiKey || '',
'x-base-url': config.baseUrl || '',
'x-provider-type': config.providerType || '',
// Image generation provider
'x-image-provider': settings.imageProviderId || '',
'x-image-model': settings.imageModelId || '',
'x-image-api-key': imageProviderConfig?.apiKey || '',
'x-image-base-url': imageProviderConfig?.baseUrl || '',
// Video generation provider
'x-video-provider': settings.videoProviderId || '',
'x-video-model': settings.videoModelId || '',
'x-video-api-key': videoProviderConfig?.apiKey || '',
'x-video-base-url': videoProviderConfig?.baseUrl || '',
// Media generation toggles
'x-image-generation-enabled': String(settings.imageGenerationEnabled ?? false),
'x-video-generation-enabled': String(settings.videoGenerationEnabled ?? false),
};
}
function withThinkingConfig<T extends Record<string, unknown>>(body: T): T {
const { thinkingConfig } = getCurrentModelConfig();
return thinkingConfig ? ({ ...body, thinkingConfig } as T) : body;
}
/** Call POST /api/generate/scene-content (step 1) */
async function fetchSceneContent(
params: {
outline: SceneOutline;
allOutlines: SceneOutline[];
stageId: string;
pdfImages?: PdfImage[];
imageMapping?: ImageMapping;
stageInfo: {
name: string;
description?: string;
language?: string;
style?: string;
};
agents?: AgentInfo[];
languageDirective?: string;
},
signal?: AbortSignal,
): Promise<SceneContentResult> {
const response = await fetch('/api/generate/scene-content', {
method: 'POST',
headers: getApiHeaders(),
body: JSON.stringify(withThinkingConfig(params)),
signal,
});
if (!response.ok) {
const data = await response.json().catch(() => ({ error: 'Request failed' }));
return { success: false, error: data.error || `HTTP ${response.status}` };
}
return response.json();
}
/** Call POST /api/generate/scene-actions (step 2) */
async function fetchSceneActions(
params: {
outline: SceneOutline;
allOutlines: SceneOutline[];
content: unknown;
stageId: string;
agents?: AgentInfo[];
previousSpeeches?: string[];
userProfile?: string;
languageDirective?: string;
},
signal?: AbortSignal,
): Promise<SceneActionsResult> {
const response = await fetch('/api/generate/scene-actions', {
method: 'POST',
headers: getApiHeaders(),
body: JSON.stringify(withThinkingConfig(params)),
signal,
});
if (!response.ok) {
const data = await response.json().catch(() => ({ error: 'Request failed' }));
return { success: false, error: data.error || `HTTP ${response.status}` };
}
return response.json();
}
/** Generate TTS for one speech action and store in IndexedDB */
export async function generateAndStoreTTS(
audioId: string,
text: string,
language?: string,
signal?: AbortSignal,
): Promise<void> {
const settings = useSettingsStore.getState();
if (settings.ttsProviderId === 'browser-native-tts') return;
const ttsProviderConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId];
const providerOptions =
settings.ttsProviderId === 'voxcpm-tts'
? {
...(ttsProviderConfig?.providerOptions || {}),
...(await getVoxCPMProviderOptions(settings.ttsVoice, { role: 'teacher', language })),
}
: undefined;
const response = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text,
audioId,
ttsProviderId: settings.ttsProviderId,
ttsModelId: ttsProviderConfig?.modelId,
ttsVoice: settings.ttsVoice,
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
ttsBaseUrl:
ttsProviderConfig?.serverBaseUrl ||
ttsProviderConfig?.baseUrl ||
ttsProviderConfig?.customDefaultBaseUrl ||
undefined,
ttsProviderOptions: providerOptions,
}),
signal,
});
const data = await response
.json()
.catch(() => ({ success: false, error: response.statusText || 'Invalid TTS response' }));
if (!response.ok || !data.success || !data.base64 || !data.format) {
const err = new Error(
data.details || data.error || `TTS request failed: HTTP ${response.status}`,
);
log.warn('TTS failed for', audioId, ':', err);
throw err;
}
const binary = atob(data.base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
const blob = new Blob([bytes], { type: `audio/${data.format}` });
await db.audioFiles.put({
id: audioId,
blob,
format: data.format,
createdAt: Date.now(),
});
}
/** Generate TTS for all speech actions in a scene. Returns result. */
async function generateTTSForScene(
scene: Scene,
language?: string,
signal?: AbortSignal,
): Promise<{ success: boolean; failedCount: number; error?: string }> {
const providerId = useSettingsStore.getState().ttsProviderId;
scene.actions = splitLongSpeechActions(scene.actions || [], providerId);
const speechActions = scene.actions.filter(
(a): a is SpeechAction => a.type === 'speech' && !!a.text,
);
if (speechActions.length === 0) return { success: true, failedCount: 0 };
let failedCount = 0;
let lastError: string | undefined;
// Use scene order to make audio IDs unique across scenes
// This prevents audio collision when action IDs are sequential (e.g., action_1, action_2)
const sceneOrder = scene.order;
for (const action of speechActions) {
// Include scene order in audioId to prevent collision across scenes
const audioId = `tts_s${sceneOrder}_${action.id}`;
action.audioId = audioId;
try {
await generateAndStoreTTS(audioId, action.text, language, signal);
} catch (error) {
failedCount++;
lastError = error instanceof Error ? error.message : `TTS failed for action ${action.id}`;
log.warn('TTS generation failed:', {
providerId,
actionId: action.id,
sceneOrder,
audioId,
textLength: action.text.length,
error: lastError,
});
}
}
return {
success: failedCount === 0,
failedCount,
error: lastError,
};
}
export interface UseSceneGeneratorOptions {
onSceneGenerated?: (scene: Scene, index: number) => void;
onSceneFailed?: (outline: SceneOutline, error: string) => void;
onPhaseChange?: (phase: 'content' | 'actions', outline: SceneOutline) => void;
onComplete?: () => void;
}
export interface GenerationParams {
pdfImages?: PdfImage[];
imageMapping?: ImageMapping;
stageInfo: {
name: string;
description?: string;
language?: string;
style?: string;
};
agents?: AgentInfo[];
userProfile?: string;
languageDirective?: string;
}
export function useSceneGenerator(options: UseSceneGeneratorOptions = {}) {
const abortRef = useRef(false);
const generatingRef = useRef(false);
const mediaAbortRef = useRef<AbortController | null>(null);
const fetchAbortRef = useRef<AbortController | null>(null);
const lastParamsRef = useRef<GenerationParams | null>(null);
const generateRemainingRef = useRef<((params: GenerationParams) => Promise<void>) | null>(null);
const store = useStageStore;
const generateRemaining = useCallback(
async (params: GenerationParams) => {
lastParamsRef.current = params;
if (generatingRef.current) return;
generatingRef.current = true;
abortRef.current = false;
const removeGeneratingOutline = (outlineId: string) => {
const current = store.getState().generatingOutlines;
if (!current.some((o) => o.id === outlineId)) return;
store.getState().setGeneratingOutlines(current.filter((o) => o.id !== outlineId));
};
// Create a new AbortController for this generation run
fetchAbortRef.current = new AbortController();
const signal = fetchAbortRef.current.signal;
const state = store.getState();
const { outlines, scenes, stage } = state;
const startEpoch = state.generationEpoch;
if (!stage || outlines.length === 0) {
generatingRef.current = false;
return;
}
store.getState().setGenerationStatus('generating');
// Determine pending outlines
const completedOrders = new Set(scenes.map((s) => s.order));
const pending = outlines
.filter((o) => !completedOrders.has(o.order))
.sort((a, b) => a.order - b.order);
if (pending.length === 0) {
store.getState().setGenerationStatus('completed');
store.getState().setGeneratingOutlines([]);
options.onComplete?.();
generatingRef.current = false;
return;
}
store.getState().setGeneratingOutlines(pending);
// Launch media generation in parallel — does not block content/action generation
mediaAbortRef.current = new AbortController();
generateMediaForOutlines(outlines, stage.id, mediaAbortRef.current.signal).catch((err) => {
log.warn('Media generation error:', err);
});
// Get previousSpeeches from last completed scene
let previousSpeeches: string[] = [];
const sortedScenes = [...scenes].sort((a, b) => a.order - b.order);
if (sortedScenes.length > 0) {
const lastScene = sortedScenes[sortedScenes.length - 1];
previousSpeeches = (lastScene.actions || [])
.filter((a): a is SpeechAction => a.type === 'speech')
.map((a) => a.text);
}
// Serial generation loop — two-step per outline
try {
let pausedByFailureOrAbort = false;
for (const outline of pending) {
if (abortRef.current || store.getState().generationEpoch !== startEpoch) {
store.getState().setGenerationStatus('paused');
pausedByFailureOrAbort = true;
break;
}
store.getState().setCurrentGeneratingOrder(outline.order);
// Step 1: Generate content
options.onPhaseChange?.('content', outline);
const contentResult = await fetchSceneContent(
{
outline,
allOutlines: outlines,
stageId: stage.id,
pdfImages: params.pdfImages,
imageMapping: params.imageMapping,
stageInfo: params.stageInfo,
agents: params.agents,
languageDirective: params.languageDirective,
},
signal,
);
if (!contentResult.success || !contentResult.content) {
if (abortRef.current || store.getState().generationEpoch !== startEpoch) {
pausedByFailureOrAbort = true;
break;
}
store.getState().addFailedOutline(outline);
options.onSceneFailed?.(outline, contentResult.error || 'Content generation failed');
store.getState().setGenerationStatus('paused');
pausedByFailureOrAbort = true;
break;
}
if (abortRef.current || store.getState().generationEpoch !== startEpoch) {
store.getState().setGenerationStatus('paused');
pausedByFailureOrAbort = true;
break;
}
// Step 2: Generate actions + assemble scene
options.onPhaseChange?.('actions', outline);
const actionsResult = await fetchSceneActions(
{
outline: contentResult.effectiveOutline || outline,
allOutlines: outlines,
content: contentResult.content,
stageId: stage.id,
agents: params.agents,
previousSpeeches,
userProfile: params.userProfile,
languageDirective: params.languageDirective,
},
signal,
);
if (actionsResult.success && actionsResult.scene) {
const scene = actionsResult.scene;
const settings = useSettingsStore.getState();
// TTS generation — failure means the whole scene fails
if (settings.ttsEnabled && settings.ttsProviderId !== 'browser-native-tts') {
const ttsResult = await generateTTSForScene(
scene,
params.languageDirective || params.stageInfo.language,
signal,
);
if (!ttsResult.success) {
if (abortRef.current || store.getState().generationEpoch !== startEpoch) {
pausedByFailureOrAbort = true;
break;
}
store.getState().addFailedOutline(outline);
options.onSceneFailed?.(outline, ttsResult.error || 'TTS generation failed');
store.getState().setGenerationStatus('paused');
pausedByFailureOrAbort = true;
break;
}
}
// Epoch changed — stage switched, discard this scene
if (store.getState().generationEpoch !== startEpoch) {
pausedByFailureOrAbort = true;
break;
}
removeGeneratingOutline(outline.id);
store.getState().addScene(scene);
options.onSceneGenerated?.(scene, outline.order);
previousSpeeches = actionsResult.previousSpeeches || [];
} else {
if (abortRef.current || store.getState().generationEpoch !== startEpoch) {
pausedByFailureOrAbort = true;
break;
}
store.getState().addFailedOutline(outline);
options.onSceneFailed?.(outline, actionsResult.error || 'Actions generation failed');
store.getState().setGenerationStatus('paused');
pausedByFailureOrAbort = true;
break;
}
}
if (!abortRef.current && !pausedByFailureOrAbort) {
store.getState().setGenerationStatus('completed');
store.getState().setGeneratingOutlines([]);
options.onComplete?.();
}
} catch (err: unknown) {
// AbortError is expected when stop() is called — don't treat as failure
if (err instanceof DOMException && err.name === 'AbortError') {
log.info('Generation aborted');
store.getState().setGenerationStatus('paused');
} else {
throw err;
}
} finally {
generatingRef.current = false;
fetchAbortRef.current = null;
}
},
[options, store],
);
// Keep ref in sync so retrySingleOutline can call it
generateRemainingRef.current = generateRemaining;
const stop = useCallback(() => {
abortRef.current = true;
store.getState().bumpGenerationEpoch();
fetchAbortRef.current?.abort();
mediaAbortRef.current?.abort();
}, [store]);
const isGenerating = useCallback(() => generatingRef.current, []);
/** Retry a single failed outline from scratch (content → actions → TTS). */
const retrySingleOutline = useCallback(
async (outlineId: string) => {
const state = store.getState();
const outline = state.failedOutlines.find((o) => o.id === outlineId);
const params = lastParamsRef.current;
if (!outline || !state.stage || !params) return;
const removeGeneratingOutline = () => {
const current = store.getState().generatingOutlines;
if (!current.some((o) => o.id === outlineId)) return;
store.getState().setGeneratingOutlines(current.filter((o) => o.id !== outlineId));
};
// Remove from failed list and mark as generating
store.getState().retryFailedOutline(outlineId);
store.getState().setGenerationStatus('generating');
const currentGenerating = store.getState().generatingOutlines;
if (!currentGenerating.some((o) => o.id === outline.id)) {
store.getState().setGeneratingOutlines([...currentGenerating, outline]);
}
const abortController = new AbortController();
const signal = abortController.signal;
try {
// Step 1: Content
const contentResult = await fetchSceneContent(
{
outline,
allOutlines: state.outlines,
stageId: state.stage.id,
pdfImages: params.pdfImages,
imageMapping: params.imageMapping,
stageInfo: params.stageInfo,
agents: params.agents,
languageDirective: params.languageDirective,
},
signal,
);
if (!contentResult.success || !contentResult.content) {
store.getState().addFailedOutline(outline);
return;
}
// Step 2: Actions
const sortedScenes = [...store.getState().scenes].sort((a, b) => a.order - b.order);
const lastScene = sortedScenes[sortedScenes.length - 1];
const previousSpeeches = lastScene
? (lastScene.actions || [])
.filter((a): a is SpeechAction => a.type === 'speech')
.map((a) => a.text)
: [];
const actionsResult = await fetchSceneActions(
{
outline: contentResult.effectiveOutline || outline,
allOutlines: state.outlines,
content: contentResult.content,
stageId: state.stage.id,
agents: params.agents,
previousSpeeches,
userProfile: params.userProfile,
languageDirective: params.languageDirective,
},
signal,
);
if (!actionsResult.success || !actionsResult.scene) {
store.getState().addFailedOutline(outline);
return;
}
// Step 3: TTS
const settings = useSettingsStore.getState();
if (settings.ttsEnabled && settings.ttsProviderId !== 'browser-native-tts') {
const ttsResult = await generateTTSForScene(
actionsResult.scene,
params.languageDirective || params.stageInfo.language,
signal,
);
if (!ttsResult.success) {
store.getState().addFailedOutline(outline);
return;
}
}
removeGeneratingOutline();
store.getState().addScene(actionsResult.scene);
// Resume remaining generation if there are pending outlines
if (store.getState().generatingOutlines.length > 0 && lastParamsRef.current) {
generateRemainingRef.current?.(lastParamsRef.current);
}
} catch (err) {
if (!(err instanceof DOMException && err.name === 'AbortError')) {
store.getState().addFailedOutline(outline);
}
}
},
[store],
);
return { generateRemaining, retrySingleOutline, stop, isGenerating };
}