|
|
|
|
| import { useCallback, useRef } from 'react'; |
| import { useStageStore } from '@/lib/store/stage'; |
| import { getCurrentModelConfig } from '@/lib/utils/model-config'; |
| import { useSettingsStore } from '@/lib/store/settings'; |
| import { db } from '@/lib/utils/database'; |
| import type { SceneOutline, PdfImage, ImageMapping } from '@/lib/types/generation'; |
| import type { AgentInfo } from '@/lib/generation/generation-pipeline'; |
| import type { Scene } from '@/lib/types/stage'; |
| import type { SpeechAction } from '@/lib/types/action'; |
| import { splitLongSpeechActions } from '@/lib/audio/tts-utils'; |
| import { getVoxCPMProviderOptions } from '@/lib/audio/voxcpm-voices'; |
| import { generateMediaForOutlines } from '@/lib/media/media-orchestrator'; |
| import { createLogger } from '@/lib/logger'; |
|
|
| const log = createLogger('SceneGenerator'); |
|
|
| interface SceneContentResult { |
| success: boolean; |
| content?: unknown; |
| effectiveOutline?: SceneOutline; |
| error?: string; |
| } |
|
|
| interface SceneActionsResult { |
| success: boolean; |
| scene?: Scene; |
| previousSpeeches?: string[]; |
| error?: string; |
| } |
|
|
| function getApiHeaders(): HeadersInit { |
| const config = getCurrentModelConfig(); |
| const settings = useSettingsStore.getState(); |
| const imageProviderConfig = settings.imageProvidersConfig?.[settings.imageProviderId]; |
| const videoProviderConfig = settings.videoProvidersConfig?.[settings.videoProviderId]; |
|
|
| return { |
| 'Content-Type': 'application/json', |
| 'x-model': config.modelString || '', |
| 'x-api-key': config.apiKey || '', |
| 'x-base-url': config.baseUrl || '', |
| 'x-provider-type': config.providerType || '', |
| |
| 'x-image-provider': settings.imageProviderId || '', |
| 'x-image-model': settings.imageModelId || '', |
| 'x-image-api-key': imageProviderConfig?.apiKey || '', |
| 'x-image-base-url': imageProviderConfig?.baseUrl || '', |
| |
| 'x-video-provider': settings.videoProviderId || '', |
| 'x-video-model': settings.videoModelId || '', |
| 'x-video-api-key': videoProviderConfig?.apiKey || '', |
| 'x-video-base-url': videoProviderConfig?.baseUrl || '', |
| |
| 'x-image-generation-enabled': String(settings.imageGenerationEnabled ?? false), |
| 'x-video-generation-enabled': String(settings.videoGenerationEnabled ?? false), |
| }; |
| } |
|
|
| function withThinkingConfig<T extends Record<string, unknown>>(body: T): T { |
| const { thinkingConfig } = getCurrentModelConfig(); |
| return thinkingConfig ? ({ ...body, thinkingConfig } as T) : body; |
| } |
|
|
| |
| async function fetchSceneContent( |
| params: { |
| outline: SceneOutline; |
| allOutlines: SceneOutline[]; |
| stageId: string; |
| pdfImages?: PdfImage[]; |
| imageMapping?: ImageMapping; |
| stageInfo: { |
| name: string; |
| description?: string; |
| language?: string; |
| style?: string; |
| }; |
| agents?: AgentInfo[]; |
| languageDirective?: string; |
| }, |
| signal?: AbortSignal, |
| ): Promise<SceneContentResult> { |
| const response = await fetch('/api/generate/scene-content', { |
| method: 'POST', |
| headers: getApiHeaders(), |
| body: JSON.stringify(withThinkingConfig(params)), |
| signal, |
| }); |
|
|
| if (!response.ok) { |
| const data = await response.json().catch(() => ({ error: 'Request failed' })); |
| return { success: false, error: data.error || `HTTP ${response.status}` }; |
| } |
|
|
| return response.json(); |
| } |
|
|
| |
| async function fetchSceneActions( |
| params: { |
| outline: SceneOutline; |
| allOutlines: SceneOutline[]; |
| content: unknown; |
| stageId: string; |
| agents?: AgentInfo[]; |
| previousSpeeches?: string[]; |
| userProfile?: string; |
| languageDirective?: string; |
| }, |
| signal?: AbortSignal, |
| ): Promise<SceneActionsResult> { |
| const response = await fetch('/api/generate/scene-actions', { |
| method: 'POST', |
| headers: getApiHeaders(), |
| body: JSON.stringify(withThinkingConfig(params)), |
| signal, |
| }); |
|
|
| if (!response.ok) { |
| const data = await response.json().catch(() => ({ error: 'Request failed' })); |
| return { success: false, error: data.error || `HTTP ${response.status}` }; |
| } |
|
|
| return response.json(); |
| } |
|
|
| |
| export async function generateAndStoreTTS( |
| audioId: string, |
| text: string, |
| language?: string, |
| signal?: AbortSignal, |
| ): Promise<void> { |
| const settings = useSettingsStore.getState(); |
| if (settings.ttsProviderId === 'browser-native-tts') return; |
|
|
| const ttsProviderConfig = settings.ttsProvidersConfig?.[settings.ttsProviderId]; |
| const providerOptions = |
| settings.ttsProviderId === 'voxcpm-tts' |
| ? { |
| ...(ttsProviderConfig?.providerOptions || {}), |
| ...(await getVoxCPMProviderOptions(settings.ttsVoice, { role: 'teacher', language })), |
| } |
| : undefined; |
| const response = await fetch('/api/generate/tts', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ |
| text, |
| audioId, |
| ttsProviderId: settings.ttsProviderId, |
| ttsModelId: ttsProviderConfig?.modelId, |
| ttsVoice: settings.ttsVoice, |
| ttsSpeed: settings.ttsSpeed, |
| ttsApiKey: ttsProviderConfig?.apiKey || undefined, |
| ttsBaseUrl: |
| ttsProviderConfig?.serverBaseUrl || |
| ttsProviderConfig?.baseUrl || |
| ttsProviderConfig?.customDefaultBaseUrl || |
| undefined, |
| ttsProviderOptions: providerOptions, |
| }), |
| signal, |
| }); |
|
|
| const data = await response |
| .json() |
| .catch(() => ({ success: false, error: response.statusText || 'Invalid TTS response' })); |
| if (!response.ok || !data.success || !data.base64 || !data.format) { |
| const err = new Error( |
| data.details || data.error || `TTS request failed: HTTP ${response.status}`, |
| ); |
| log.warn('TTS failed for', audioId, ':', err); |
| throw err; |
| } |
|
|
| const binary = atob(data.base64); |
| const bytes = new Uint8Array(binary.length); |
| for (let i = 0; i < binary.length; i++) { |
| bytes[i] = binary.charCodeAt(i); |
| } |
| const blob = new Blob([bytes], { type: `audio/${data.format}` }); |
| await db.audioFiles.put({ |
| id: audioId, |
| blob, |
| format: data.format, |
| createdAt: Date.now(), |
| }); |
| } |
|
|
| |
| async function generateTTSForScene( |
| scene: Scene, |
| language?: string, |
| signal?: AbortSignal, |
| ): Promise<{ success: boolean; failedCount: number; error?: string }> { |
| const providerId = useSettingsStore.getState().ttsProviderId; |
| scene.actions = splitLongSpeechActions(scene.actions || [], providerId); |
| const speechActions = scene.actions.filter( |
| (a): a is SpeechAction => a.type === 'speech' && !!a.text, |
| ); |
| if (speechActions.length === 0) return { success: true, failedCount: 0 }; |
|
|
| let failedCount = 0; |
| let lastError: string | undefined; |
|
|
| |
| |
| const sceneOrder = scene.order; |
|
|
| for (const action of speechActions) { |
| |
| const audioId = `tts_s${sceneOrder}_${action.id}`; |
| action.audioId = audioId; |
| try { |
| await generateAndStoreTTS(audioId, action.text, language, signal); |
| } catch (error) { |
| failedCount++; |
| lastError = error instanceof Error ? error.message : `TTS failed for action ${action.id}`; |
| log.warn('TTS generation failed:', { |
| providerId, |
| actionId: action.id, |
| sceneOrder, |
| audioId, |
| textLength: action.text.length, |
| error: lastError, |
| }); |
| } |
| } |
|
|
| return { |
| success: failedCount === 0, |
| failedCount, |
| error: lastError, |
| }; |
| } |
|
|
| export interface UseSceneGeneratorOptions { |
| onSceneGenerated?: (scene: Scene, index: number) => void; |
| onSceneFailed?: (outline: SceneOutline, error: string) => void; |
| onPhaseChange?: (phase: 'content' | 'actions', outline: SceneOutline) => void; |
| onComplete?: () => void; |
| } |
|
|
| export interface GenerationParams { |
| pdfImages?: PdfImage[]; |
| imageMapping?: ImageMapping; |
| stageInfo: { |
| name: string; |
| description?: string; |
| language?: string; |
| style?: string; |
| }; |
| agents?: AgentInfo[]; |
| userProfile?: string; |
| languageDirective?: string; |
| } |
|
|
| export function useSceneGenerator(options: UseSceneGeneratorOptions = {}) { |
| const abortRef = useRef(false); |
| const generatingRef = useRef(false); |
| const mediaAbortRef = useRef<AbortController | null>(null); |
| const fetchAbortRef = useRef<AbortController | null>(null); |
| const lastParamsRef = useRef<GenerationParams | null>(null); |
| const generateRemainingRef = useRef<((params: GenerationParams) => Promise<void>) | null>(null); |
|
|
| const store = useStageStore; |
|
|
| const generateRemaining = useCallback( |
| async (params: GenerationParams) => { |
| lastParamsRef.current = params; |
| if (generatingRef.current) return; |
| generatingRef.current = true; |
| abortRef.current = false; |
| const removeGeneratingOutline = (outlineId: string) => { |
| const current = store.getState().generatingOutlines; |
| if (!current.some((o) => o.id === outlineId)) return; |
| store.getState().setGeneratingOutlines(current.filter((o) => o.id !== outlineId)); |
| }; |
|
|
| |
| fetchAbortRef.current = new AbortController(); |
| const signal = fetchAbortRef.current.signal; |
|
|
| const state = store.getState(); |
| const { outlines, scenes, stage } = state; |
| const startEpoch = state.generationEpoch; |
| if (!stage || outlines.length === 0) { |
| generatingRef.current = false; |
| return; |
| } |
|
|
| store.getState().setGenerationStatus('generating'); |
|
|
| |
| const completedOrders = new Set(scenes.map((s) => s.order)); |
| const pending = outlines |
| .filter((o) => !completedOrders.has(o.order)) |
| .sort((a, b) => a.order - b.order); |
|
|
| if (pending.length === 0) { |
| store.getState().setGenerationStatus('completed'); |
| store.getState().setGeneratingOutlines([]); |
| options.onComplete?.(); |
| generatingRef.current = false; |
| return; |
| } |
|
|
| store.getState().setGeneratingOutlines(pending); |
|
|
| |
| mediaAbortRef.current = new AbortController(); |
| generateMediaForOutlines(outlines, stage.id, mediaAbortRef.current.signal).catch((err) => { |
| log.warn('Media generation error:', err); |
| }); |
|
|
| |
| let previousSpeeches: string[] = []; |
| const sortedScenes = [...scenes].sort((a, b) => a.order - b.order); |
| if (sortedScenes.length > 0) { |
| const lastScene = sortedScenes[sortedScenes.length - 1]; |
| previousSpeeches = (lastScene.actions || []) |
| .filter((a): a is SpeechAction => a.type === 'speech') |
| .map((a) => a.text); |
| } |
|
|
| |
| try { |
| let pausedByFailureOrAbort = false; |
| for (const outline of pending) { |
| if (abortRef.current || store.getState().generationEpoch !== startEpoch) { |
| store.getState().setGenerationStatus('paused'); |
| pausedByFailureOrAbort = true; |
| break; |
| } |
|
|
| store.getState().setCurrentGeneratingOrder(outline.order); |
|
|
| |
| options.onPhaseChange?.('content', outline); |
| const contentResult = await fetchSceneContent( |
| { |
| outline, |
| allOutlines: outlines, |
| stageId: stage.id, |
| pdfImages: params.pdfImages, |
| imageMapping: params.imageMapping, |
| stageInfo: params.stageInfo, |
| agents: params.agents, |
| languageDirective: params.languageDirective, |
| }, |
| signal, |
| ); |
|
|
| if (!contentResult.success || !contentResult.content) { |
| if (abortRef.current || store.getState().generationEpoch !== startEpoch) { |
| pausedByFailureOrAbort = true; |
| break; |
| } |
| store.getState().addFailedOutline(outline); |
| options.onSceneFailed?.(outline, contentResult.error || 'Content generation failed'); |
| store.getState().setGenerationStatus('paused'); |
| pausedByFailureOrAbort = true; |
| break; |
| } |
|
|
| if (abortRef.current || store.getState().generationEpoch !== startEpoch) { |
| store.getState().setGenerationStatus('paused'); |
| pausedByFailureOrAbort = true; |
| break; |
| } |
|
|
| |
| options.onPhaseChange?.('actions', outline); |
| const actionsResult = await fetchSceneActions( |
| { |
| outline: contentResult.effectiveOutline || outline, |
| allOutlines: outlines, |
| content: contentResult.content, |
| stageId: stage.id, |
| agents: params.agents, |
| previousSpeeches, |
| userProfile: params.userProfile, |
| languageDirective: params.languageDirective, |
| }, |
| signal, |
| ); |
|
|
| if (actionsResult.success && actionsResult.scene) { |
| const scene = actionsResult.scene; |
| const settings = useSettingsStore.getState(); |
|
|
| |
| if (settings.ttsEnabled && settings.ttsProviderId !== 'browser-native-tts') { |
| const ttsResult = await generateTTSForScene( |
| scene, |
| params.languageDirective || params.stageInfo.language, |
| signal, |
| ); |
| if (!ttsResult.success) { |
| if (abortRef.current || store.getState().generationEpoch !== startEpoch) { |
| pausedByFailureOrAbort = true; |
| break; |
| } |
| store.getState().addFailedOutline(outline); |
| options.onSceneFailed?.(outline, ttsResult.error || 'TTS generation failed'); |
| store.getState().setGenerationStatus('paused'); |
| pausedByFailureOrAbort = true; |
| break; |
| } |
| } |
|
|
| |
| if (store.getState().generationEpoch !== startEpoch) { |
| pausedByFailureOrAbort = true; |
| break; |
| } |
|
|
| removeGeneratingOutline(outline.id); |
| store.getState().addScene(scene); |
| options.onSceneGenerated?.(scene, outline.order); |
| previousSpeeches = actionsResult.previousSpeeches || []; |
| } else { |
| if (abortRef.current || store.getState().generationEpoch !== startEpoch) { |
| pausedByFailureOrAbort = true; |
| break; |
| } |
| store.getState().addFailedOutline(outline); |
| options.onSceneFailed?.(outline, actionsResult.error || 'Actions generation failed'); |
| store.getState().setGenerationStatus('paused'); |
| pausedByFailureOrAbort = true; |
| break; |
| } |
| } |
|
|
| if (!abortRef.current && !pausedByFailureOrAbort) { |
| store.getState().setGenerationStatus('completed'); |
| store.getState().setGeneratingOutlines([]); |
| options.onComplete?.(); |
| } |
| } catch (err: unknown) { |
| |
| if (err instanceof DOMException && err.name === 'AbortError') { |
| log.info('Generation aborted'); |
| store.getState().setGenerationStatus('paused'); |
| } else { |
| throw err; |
| } |
| } finally { |
| generatingRef.current = false; |
| fetchAbortRef.current = null; |
| } |
| }, |
| [options, store], |
| ); |
|
|
| |
| generateRemainingRef.current = generateRemaining; |
|
|
| const stop = useCallback(() => { |
| abortRef.current = true; |
| store.getState().bumpGenerationEpoch(); |
| fetchAbortRef.current?.abort(); |
| mediaAbortRef.current?.abort(); |
| }, [store]); |
|
|
| const isGenerating = useCallback(() => generatingRef.current, []); |
|
|
| |
| const retrySingleOutline = useCallback( |
| async (outlineId: string) => { |
| const state = store.getState(); |
| const outline = state.failedOutlines.find((o) => o.id === outlineId); |
| const params = lastParamsRef.current; |
| if (!outline || !state.stage || !params) return; |
|
|
| const removeGeneratingOutline = () => { |
| const current = store.getState().generatingOutlines; |
| if (!current.some((o) => o.id === outlineId)) return; |
| store.getState().setGeneratingOutlines(current.filter((o) => o.id !== outlineId)); |
| }; |
|
|
| |
| store.getState().retryFailedOutline(outlineId); |
| store.getState().setGenerationStatus('generating'); |
| const currentGenerating = store.getState().generatingOutlines; |
| if (!currentGenerating.some((o) => o.id === outline.id)) { |
| store.getState().setGeneratingOutlines([...currentGenerating, outline]); |
| } |
|
|
| const abortController = new AbortController(); |
| const signal = abortController.signal; |
|
|
| try { |
| |
| const contentResult = await fetchSceneContent( |
| { |
| outline, |
| allOutlines: state.outlines, |
| stageId: state.stage.id, |
| pdfImages: params.pdfImages, |
| imageMapping: params.imageMapping, |
| stageInfo: params.stageInfo, |
| agents: params.agents, |
| languageDirective: params.languageDirective, |
| }, |
| signal, |
| ); |
|
|
| if (!contentResult.success || !contentResult.content) { |
| store.getState().addFailedOutline(outline); |
| return; |
| } |
|
|
| |
| const sortedScenes = [...store.getState().scenes].sort((a, b) => a.order - b.order); |
| const lastScene = sortedScenes[sortedScenes.length - 1]; |
| const previousSpeeches = lastScene |
| ? (lastScene.actions || []) |
| .filter((a): a is SpeechAction => a.type === 'speech') |
| .map((a) => a.text) |
| : []; |
|
|
| const actionsResult = await fetchSceneActions( |
| { |
| outline: contentResult.effectiveOutline || outline, |
| allOutlines: state.outlines, |
| content: contentResult.content, |
| stageId: state.stage.id, |
| agents: params.agents, |
| previousSpeeches, |
| userProfile: params.userProfile, |
| languageDirective: params.languageDirective, |
| }, |
| signal, |
| ); |
|
|
| if (!actionsResult.success || !actionsResult.scene) { |
| store.getState().addFailedOutline(outline); |
| return; |
| } |
|
|
| |
| const settings = useSettingsStore.getState(); |
| if (settings.ttsEnabled && settings.ttsProviderId !== 'browser-native-tts') { |
| const ttsResult = await generateTTSForScene( |
| actionsResult.scene, |
| params.languageDirective || params.stageInfo.language, |
| signal, |
| ); |
| if (!ttsResult.success) { |
| store.getState().addFailedOutline(outline); |
| return; |
| } |
| } |
|
|
| removeGeneratingOutline(); |
| store.getState().addScene(actionsResult.scene); |
|
|
| |
| if (store.getState().generatingOutlines.length > 0 && lastParamsRef.current) { |
| generateRemainingRef.current?.(lastParamsRef.current); |
| } |
| } catch (err) { |
| if (!(err instanceof DOMException && err.name === 'AbortError')) { |
| store.getState().addFailedOutline(outline); |
| } |
| } |
| }, |
| [store], |
| ); |
|
|
| return { generateRemaining, retrySingleOutline, stop, isGenerating }; |
| } |
|
|