| |
| |
| |
| |
| |
| |
|
|
| import { promises as fs } from 'fs'; |
| import path from 'path'; |
| import { createLogger } from '@/lib/logger'; |
| import { CLASSROOMS_DIR } from '@/lib/server/classroom-storage'; |
| import { generateImage } from '@/lib/media/image-providers'; |
| import { generateVideo, normalizeVideoOptions } from '@/lib/media/video-providers'; |
| import { generateTTS } from '@/lib/audio/tts-providers'; |
| import { DEFAULT_TTS_VOICES, DEFAULT_TTS_MODELS, TTS_PROVIDERS } from '@/lib/audio/constants'; |
| import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; |
| import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; |
| import { isMediaPlaceholder } from '@/lib/store/media-generation'; |
| import { |
| getServerImageProviders, |
| getServerVideoProviders, |
| getServerTTSProviders, |
| resolveImageApiKey, |
| resolveImageBaseUrl, |
| resolveVideoApiKey, |
| resolveVideoBaseUrl, |
| resolveTTSApiKey, |
| resolveTTSBaseUrl, |
| } from '@/lib/server/provider-config'; |
| import type { SceneOutline } from '@/lib/types/generation'; |
| import type { Scene } from '@/lib/types/stage'; |
| import type { SpeechAction } from '@/lib/types/action'; |
| import type { ImageProviderId } from '@/lib/media/types'; |
| import type { VideoProviderId } from '@/lib/media/types'; |
| import type { TTSProviderId } from '@/lib/audio/types'; |
| import { splitLongSpeechActions } from '@/lib/audio/tts-utils'; |
| import { VOXCPM_AUTO_VOICE_ID, VOXCPM_TTS_PROVIDER_ID } from '@/lib/audio/voxcpm'; |
|
|
| const log = createLogger('ClassroomMedia'); |
|
|
| |
| |
| |
|
|
| async function ensureDir(dir: string) { |
| await fs.mkdir(dir, { recursive: true }); |
| } |
|
|
| const DOWNLOAD_TIMEOUT_MS = 120_000; |
| const DOWNLOAD_MAX_SIZE = 100 * 1024 * 1024; |
|
|
| async function downloadToBuffer(url: string): Promise<Buffer> { |
| const resp = await fetch(url, { signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS) }); |
| if (!resp.ok) throw new Error(`Download failed: ${resp.status} ${resp.statusText}`); |
| const contentLength = Number(resp.headers.get('content-length') || 0); |
| if (contentLength > DOWNLOAD_MAX_SIZE) { |
| throw new Error(`File too large: ${contentLength} bytes (max ${DOWNLOAD_MAX_SIZE})`); |
| } |
| return Buffer.from(await resp.arrayBuffer()); |
| } |
|
|
| function mediaServingUrl(baseUrl: string, classroomId: string, subPath: string): string { |
| return `${baseUrl}/api/classroom-media/${classroomId}/${subPath}`; |
| } |
|
|
| |
| |
| |
|
|
| export async function generateMediaForClassroom( |
| outlines: SceneOutline[], |
| classroomId: string, |
| baseUrl: string, |
| ): Promise<Record<string, string>> { |
| const mediaDir = path.join(CLASSROOMS_DIR, classroomId, 'media'); |
| await ensureDir(mediaDir); |
|
|
| |
| const requests = outlines.flatMap((o) => o.mediaGenerations ?? []); |
| if (requests.length === 0) return {}; |
|
|
| |
| const imageProviderIds = Object.keys(getServerImageProviders()); |
| const videoProviderIds = Object.keys(getServerVideoProviders()); |
|
|
| const mediaMap: Record<string, string> = {}; |
|
|
| |
| |
| const imageRequests = requests.filter((r) => r.type === 'image' && imageProviderIds.length > 0); |
| const videoRequests = requests.filter((r) => r.type === 'video' && videoProviderIds.length > 0); |
|
|
| const generateImages = async () => { |
| for (const req of imageRequests) { |
| try { |
| const providerId = imageProviderIds[0] as ImageProviderId; |
| const apiKey = resolveImageApiKey(providerId); |
| if (!apiKey) { |
| log.warn(`No API key for image provider "${providerId}", skipping ${req.elementId}`); |
| continue; |
| } |
| const providerConfig = IMAGE_PROVIDERS[providerId]; |
| const model = providerConfig?.models?.[0]?.id; |
|
|
| const result = await generateImage( |
| { providerId, apiKey, baseUrl: resolveImageBaseUrl(providerId), model }, |
| { prompt: req.prompt, aspectRatio: req.aspectRatio || '16:9' }, |
| ); |
|
|
| let buf: Buffer; |
| let ext: string; |
| if (result.base64) { |
| buf = Buffer.from(result.base64, 'base64'); |
| ext = 'png'; |
| } else if (result.url) { |
| buf = await downloadToBuffer(result.url); |
| const urlExt = path.extname(new URL(result.url).pathname).replace('.', ''); |
| ext = ['png', 'jpg', 'jpeg', 'webp'].includes(urlExt) ? urlExt : 'png'; |
| } else { |
| log.warn(`Image generation returned no data for ${req.elementId}`); |
| continue; |
| } |
|
|
| const filename = `${req.elementId}.${ext}`; |
| await fs.writeFile(path.join(mediaDir, filename), buf); |
| mediaMap[req.elementId] = mediaServingUrl(baseUrl, classroomId, `media/${filename}`); |
| log.info(`Generated image: ${filename}`); |
| } catch (err) { |
| log.warn(`Image generation failed for ${req.elementId}:`, err); |
| } |
| } |
| }; |
|
|
| const generateVideos = async () => { |
| for (const req of videoRequests) { |
| try { |
| const providerId = videoProviderIds[0] as VideoProviderId; |
| const apiKey = resolveVideoApiKey(providerId); |
| if (!apiKey) { |
| log.warn(`No API key for video provider "${providerId}", skipping ${req.elementId}`); |
| continue; |
| } |
| const providerConfig = VIDEO_PROVIDERS[providerId]; |
| const model = providerConfig?.models?.[0]?.id; |
|
|
| const normalized = normalizeVideoOptions(providerId, { |
| prompt: req.prompt, |
| aspectRatio: (req.aspectRatio as '16:9' | '4:3' | '1:1' | '9:16') || '16:9', |
| }); |
|
|
| const result = await generateVideo( |
| { providerId, apiKey, baseUrl: resolveVideoBaseUrl(providerId), model }, |
| normalized, |
| ); |
|
|
| const buf = await downloadToBuffer(result.url); |
| const filename = `${req.elementId}.mp4`; |
| await fs.writeFile(path.join(mediaDir, filename), buf); |
| mediaMap[req.elementId] = mediaServingUrl(baseUrl, classroomId, `media/${filename}`); |
| log.info(`Generated video: ${filename}`); |
| } catch (err) { |
| log.warn(`Video generation failed for ${req.elementId}:`, err); |
| } |
| } |
| }; |
|
|
| await Promise.all([generateImages(), generateVideos()]); |
|
|
| return mediaMap; |
| } |
|
|
| |
| |
| |
|
|
| export function replaceMediaPlaceholders(scenes: Scene[], mediaMap: Record<string, string>): void { |
| if (Object.keys(mediaMap).length === 0) return; |
|
|
| for (const scene of scenes) { |
| if (scene.type !== 'slide') continue; |
| const canvas = ( |
| scene.content as { |
| canvas?: { elements?: Array<{ id: string; src?: string; type?: string }> }; |
| } |
| )?.canvas; |
| if (!canvas?.elements) continue; |
|
|
| for (const el of canvas.elements) { |
| if ( |
| (el.type === 'image' || el.type === 'video') && |
| typeof el.src === 'string' && |
| isMediaPlaceholder(el.src) && |
| mediaMap[el.src] |
| ) { |
| el.src = mediaMap[el.src]; |
| } |
| } |
| } |
| } |
|
|
| |
| |
| |
|
|
| export async function generateTTSForClassroom( |
| scenes: Scene[], |
| classroomId: string, |
| baseUrl: string, |
| ): Promise<void> { |
| const audioDir = path.join(CLASSROOMS_DIR, classroomId, 'audio'); |
| await ensureDir(audioDir); |
|
|
| |
| const ttsProviderIds = Object.keys(getServerTTSProviders()).filter( |
| (id) => id !== 'browser-native-tts', |
| ); |
| if (ttsProviderIds.length === 0) { |
| log.warn('No server TTS provider configured, skipping TTS generation'); |
| return; |
| } |
|
|
| const providerId = ttsProviderIds[0] as TTSProviderId; |
| const apiKey = resolveTTSApiKey(providerId); |
| if (!apiKey) { |
| log.warn(`No API key for TTS provider "${providerId}", skipping TTS generation`); |
| return; |
| } |
| const ttsBaseUrl = |
| resolveTTSBaseUrl(providerId) || |
| TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.defaultBaseUrl; |
| const voice = DEFAULT_TTS_VOICES[providerId as keyof typeof DEFAULT_TTS_VOICES] || 'default'; |
| const format = |
| TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.supportedFormats?.[0] || 'mp3'; |
| if (providerId === VOXCPM_TTS_PROVIDER_ID && voice === VOXCPM_AUTO_VOICE_ID) { |
| log.warn('VoxCPM Auto Voice requires agent context; skipping server-side TTS generation'); |
| return; |
| } |
|
|
| for (const scene of scenes) { |
| if (!scene.actions) continue; |
|
|
| |
| |
| scene.actions = splitLongSpeechActions(scene.actions, providerId); |
|
|
| |
| const sceneOrder = scene.order; |
|
|
| for (const action of scene.actions) { |
| if (action.type !== 'speech' || !(action as SpeechAction).text) continue; |
| const speechAction = action as SpeechAction; |
| |
| const audioId = `tts_s${sceneOrder}_${action.id}`; |
|
|
| try { |
| const result = await generateTTS( |
| { |
| providerId, |
| modelId: DEFAULT_TTS_MODELS[providerId as keyof typeof DEFAULT_TTS_MODELS] || '', |
| apiKey, |
| baseUrl: ttsBaseUrl, |
| voice, |
| speed: speechAction.speed, |
| }, |
| speechAction.text, |
| ); |
|
|
| const filename = `${audioId}.${result.format || format}`; |
| await fs.writeFile(path.join(audioDir, filename), result.audio); |
|
|
| speechAction.audioId = audioId; |
| speechAction.audioUrl = mediaServingUrl(baseUrl, classroomId, `audio/${filename}`); |
| log.info(`Generated TTS: ${filename} (${result.audio.length} bytes)`); |
| } catch (err) { |
| log.warn(`TTS generation failed for action ${action.id}:`, err); |
| } |
| } |
| } |
| } |
|
|