import { existsSync } from 'fs'; import { mkdir, readFile, rm, writeFile } from 'fs/promises'; import path from 'path'; import { spawn } from 'child_process'; type ClipFile = { file?: File; filePath?: string; ext: string; step?: number; }; export type ServerRenderInput = { videoClips: ClipFile[]; audioClips: ClipFile[]; }; export type ServerRenderResult = { bytes: Buffer; durationSeconds: number; filename: string; }; const WORK_DIR = path.join(process.cwd(), '.local-review-data', 'server-renders'); const VIDEO_WIDTH = 1080; const VIDEO_HEIGHT = 1920; const VIDEO_FPS = 24; const MIN_VIDEO_CLIP_SECONDS = 5; const MAX_VIDEO_CLIP_SECONDS = 7; const MAX_AUDIO_CLIP_SECONDS = 12; const FINAL_VIDEO_MAX_SECONDS = 17; const STEP_VIDEO_MAX_SECONDS: Record = { 1: 6, 2: 7, 3: 4, }; type PreparedClip = { path: string; step: number; duration: number; hasAudio: boolean; }; type VideoSegment = { source: PreparedClip; duration: number; }; function resolveMediaCommand(command: 'ffmpeg' | 'ffprobe') { const explicit = command === 'ffmpeg' ? process.env.GRABBY_FFMPEG_PATH : process.env.GRABBY_FFPROBE_PATH; if (explicit && existsSync(explicit)) return explicit; const tempRoot = process.env.TEMP || process.env.TMP; if (process.platform === 'win32' && tempRoot) { const tempTools = path.join(tempRoot, 'grabby-media-tools', 'node_modules'); const candidate = command === 'ffmpeg' ? path.join(tempTools, 'ffmpeg-static', 'ffmpeg.exe') : path.join(tempTools, 'ffprobe-static', 'bin', 'win32', 'x64', 'ffprobe.exe'); if (existsSync(candidate)) return candidate; } return command; } function safeExt(ext: string) { const normalized = ext.toLowerCase().replace(/[^a-z0-9]/g, ''); if (normalized === 'mp4' || normalized === 'mov' || normalized === 'webm') return normalized; if (normalized === 'm4a' || normalized === 'mp3' || normalized === 'wav') return normalized; return 'webm'; } async function runCommand(command: string, args: string[], label: string) { return await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand(command === 'ffprobe' ? 'ffprobe' : 'ffmpeg'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); const logs: string[] = []; const collect = (chunk: Buffer) => { const text = chunk.toString('utf8'); for (const line of text.split(/\r?\n/)) { const trimmed = line.trim(); if (trimmed) logs.push(trimmed); } if (logs.length > 80) logs.splice(0, logs.length - 80); }; child.stdout.on('data', collect); child.stderr.on('data', collect); child.on('error', (err) => { reject(new Error(`${label} failed to start: ${err.message}`)); }); child.on('close', (code) => { if (code === 0) { resolve(); return; } reject( new Error( logs.length ? `${label} failed with exit code ${code}: ${logs.slice(-10).join(' | ')}` : `${label} failed with exit code ${code}`, ), ); }); }); } async function writeClip(file: File, filePath: string) { const bytes = Buffer.from(await file.arrayBuffer()); if (bytes.length <= 0) throw new Error('One of the clips was empty.'); await writeFile(filePath, bytes); } async function prepareClipSource(clip: ClipFile, fallbackPath: string) { if (clip.filePath) return clip.filePath; if (!clip.file) throw new Error('Clip source is missing.'); await writeClip(clip.file, fallbackPath); return fallbackPath; } async function probeDuration(filePath: string) { try { const args = [ '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filePath, ]; const output = await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand('ffprobe'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); let stdout = ''; child.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString('utf8'); }); child.on('error', reject); child.on('close', (code) => { if (code === 0) resolve(stdout.trim()); else reject(new Error(`ffprobe exited with ${code}`)); }); }); const duration = Number(output); return Number.isFinite(duration) ? Math.max(0, duration) : 0; } catch { return 0; } } async function probeHasAudio(filePath: string) { try { const args = [ '-v', 'error', '-select_streams', 'a', '-show_entries', 'stream=index', '-of', 'csv=p=0', filePath, ]; const output = await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand('ffprobe'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); let stdout = ''; child.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString('utf8'); }); child.on('error', reject); child.on('close', (code) => { if (code === 0) resolve(stdout.trim()); else reject(new Error(`ffprobe exited with ${code}`)); }); }); return output.length > 0; } catch { return false; } } function formatDuration(seconds: number) { return seconds.toFixed(3).replace(/\.?0+$/, ''); } function clamp(value: number, min: number, max: number) { return Math.max(min, Math.min(max, value)); } function inferClipStep(clip: ClipFile, index: number, fallbackStartStep: number) { return Number.isInteger(clip.step) && clip.step! > 0 ? clip.step! : fallbackStartStep + index; } function clipByStep(clips: PreparedClip[], step: number) { return clips.find((clip) => clip.step === step) ?? null; } function maxSecondsForStep(step: number) { return STEP_VIDEO_MAX_SECONDS[step] ?? MAX_VIDEO_CLIP_SECONDS; } function addSegment(segments: VideoSegment[], source: PreparedClip | null, duration: number) { if (!source || duration < 0.25) return; segments.push({ source, duration, }); } function buildLinearSegments(videoClips: PreparedClip[]) { const segments: VideoSegment[] = []; let remaining = FINAL_VIDEO_MAX_SECONDS; for (const source of videoClips) { if (remaining <= 0.25) break; const stepMax = maxSecondsForStep(source.step); const fallbackDuration = Math.min(MIN_VIDEO_CLIP_SECONDS, stepMax); const sourceDuration = source.duration > 0 ? Math.min(source.duration, stepMax) : fallbackDuration; const duration = Math.min(sourceDuration, remaining); addSegment(segments, source, duration); remaining -= duration; } return segments; } function buildVoiceAwareSegments({ videoClips, audioClips, renderTargetSeconds, }: { videoClips: PreparedClip[]; audioClips: PreparedClip[]; renderTargetSeconds: number; }) { if (videoClips.length === 0) return []; const closeShot = clipByStep(videoClips, 1) ?? videoClips[0]!; const wideShot = clipByStep(videoClips, 2) ?? videoClips[1] ?? closeShot; const actionShot = clipByStep(videoClips, 3) ?? videoClips[2] ?? wideShot; const reactionShot = videoClips.find((clip) => clip.step >= 6) ?? (videoClips.length > 3 ? videoClips[videoClips.length - 1]! : null); const orderAudio = clipByStep(audioClips, 4) ?? audioClips[0] ?? null; const likedAudio = clipByStep(audioClips, 5) ?? audioClips.find((clip) => clip !== orderAudio) ?? null; const targetSeconds = Math.max(1, renderTargetSeconds); const reactionSeconds = reactionShot && targetSeconds >= 10 ? Math.min(3, reactionShot.duration > 0 ? reactionShot.duration : 3) : 0; const narrativeSeconds = Math.max(1, targetSeconds - reactionSeconds); const fallbackOrderSeconds = likedAudio ? 7 : narrativeSeconds * 0.42; const rawOrderSeconds = orderAudio?.duration && orderAudio.duration > 0 ? orderAudio.duration : fallbackOrderSeconds; const minOrderSeconds = Math.min(2, narrativeSeconds); const maxOrderSeconds = likedAudio ? Math.max(minOrderSeconds, narrativeSeconds - 2) : narrativeSeconds; const orderSeconds = clamp(rawOrderSeconds, minOrderSeconds, maxOrderSeconds); const likedSeconds = Math.max(0, narrativeSeconds - orderSeconds); const segments: VideoSegment[] = []; if (closeShot === wideShot || orderSeconds < 2.5) { addSegment(segments, closeShot, orderSeconds); } else { const closeSeconds = clamp(orderSeconds * 0.58, 1.25, orderSeconds - 0.75); addSegment(segments, closeShot, closeSeconds); addSegment(segments, wideShot, orderSeconds - closeSeconds); } addSegment(segments, actionShot, likedSeconds); addSegment(segments, reactionShot, reactionSeconds); return segments.length > 0 ? segments : buildLinearSegments(videoClips); } export async function renderClipsOnServer(input: ServerRenderInput): Promise { if (input.videoClips.length === 0) { throw new Error('No video clips were uploaded.'); } const runId = Math.random().toString(36).slice(2, 10); const runDir = path.join(WORK_DIR, runId); await mkdir(runDir, { recursive: true }); const videoPaths: Array<{ path: string; step: number }> = []; const audioPaths: Array<{ path: string; step: number }> = []; const outputPath = path.join(runDir, `matcha-server-${runId}.mp4`); try { for (let i = 0; i < input.videoClips.length; i++) { const clip = input.videoClips[i]!; const filePath = path.join(runDir, `video-${i}.${safeExt(clip.ext)}`); videoPaths.push({ path: await prepareClipSource(clip, filePath), step: inferClipStep(clip, i, 1), }); } for (let i = 0; i < input.audioClips.length; i++) { const clip = input.audioClips[i]!; const filePath = path.join(runDir, `audio-${i}.${safeExt(clip.ext)}`); audioPaths.push({ path: await prepareClipSource(clip, filePath), step: inferClipStep(clip, i, 4), }); } const [videoDurations, audioDurations, videoAudioFlags] = await Promise.all([ Promise.all(videoPaths.map((clip) => probeDuration(clip.path))), Promise.all(audioPaths.map((clip) => probeDuration(clip.path))), Promise.all(videoPaths.map((clip) => probeHasAudio(clip.path))), ]); const preparedVideoClips = videoPaths.map((clip, index) => ({ path: clip.path, step: clip.step, duration: videoDurations[index] ?? 0, hasAudio: videoAudioFlags[index] ?? false, })); const preparedAudioClips = audioPaths.map((clip, index) => ({ path: clip.path, step: clip.step, duration: audioDurations[index] ?? 0, hasAudio: true, })); const hasVoiceover = audioPaths.length > 0; const audioDurationSeconds = audioDurations.reduce((total, duration) => total + duration, 0); const voiceoverTargetSeconds = Math.max( 1, Math.min( FINAL_VIDEO_MAX_SECONDS, audioDurationSeconds > 0 ? audioDurationSeconds : preparedAudioClips.length >= 2 ? FINAL_VIDEO_MAX_SECONDS : MAX_AUDIO_CLIP_SECONDS, ), ); const videoSegments = hasVoiceover ? buildVoiceAwareSegments({ videoClips: preparedVideoClips, audioClips: preparedAudioClips, renderTargetSeconds: voiceoverTargetSeconds, }) : buildLinearSegments(preparedVideoClips); const renderTargetSeconds = hasVoiceover ? voiceoverTargetSeconds : Math.max( 1, Math.min( FINAL_VIDEO_MAX_SECONDS, videoSegments.reduce((total, segment) => total + segment.duration, 0), ), ); if (videoSegments.length === 0) { throw new Error('No usable video segments were uploaded.'); } const inputArgs = [ ...videoSegments.flatMap((segment) => [ '-stream_loop', '-1', '-t', formatDuration(segment.duration), '-i', segment.source.path, ]), ...audioPaths.flatMap((clip) => [ '-t', String(MAX_AUDIO_CLIP_SECONDS), '-i', clip.path, ]), ]; const videoFilters = videoSegments .map((segment, i) => { return ( `[${i}:v]trim=duration=${formatDuration(segment.duration)},setpts=PTS-STARTPTS,` + `scale=${VIDEO_WIDTH}:${VIDEO_HEIGHT}:force_original_aspect_ratio=decrease,` + `pad=${VIDEO_WIDTH}:${VIDEO_HEIGHT}:(ow-iw)/2:(oh-ih)/2,` + `setsar=1,fps=${VIDEO_FPS},format=yuv420p[v${i}]` ); }) .join(';'); const videoInputs = videoSegments.map((_, i) => `[v${i}]`).join(''); const audioOffset = videoSegments.length; let filterComplex: string; if (hasVoiceover) { const videoConcat = `${videoInputs}concat=n=${videoSegments.length}:v=1:a=0[vcat]`; const videoFinalize = `[vcat]trim=duration=${formatDuration( renderTargetSeconds, )},setpts=PTS-STARTPTS[v]`; const audioFilters = audioPaths .map((_, i) => { return ( `[${audioOffset + i}:a]atrim=duration=${MAX_AUDIO_CLIP_SECONDS},` + `aresample=48000,aformat=sample_rates=48000:channel_layouts=mono,` + `asetpts=PTS-STARTPTS[a${i}]` ); }) .join(';'); const audioInputs = audioPaths.map((_, i) => `[a${i}]`).join(''); const audioConcat = `${audioInputs}concat=n=${audioPaths.length}:v=0:a=1[acat]`; const audioFinalize = `[acat]atrim=duration=${formatDuration(renderTargetSeconds)},` + `apad=whole_dur=${formatDuration(renderTargetSeconds)},asetpts=PTS-STARTPTS[a]`; filterComplex = [ videoFilters, videoConcat, videoFinalize, audioFilters, audioConcat, audioFinalize, ] .filter(Boolean) .join(';'); } else { const embeddedAudioFilters = videoSegments .map((segment, i) => { if (segment.source.hasAudio) { return ( `[${i}:a]atrim=duration=${formatDuration(segment.duration)},` + `aresample=48000,aformat=sample_rates=48000:channel_layouts=mono,` + `asetpts=PTS-STARTPTS[a${i}]` ); } return ( `anullsrc=r=48000:cl=mono,atrim=duration=${formatDuration(segment.duration)},` + `aformat=sample_rates=48000:channel_layouts=mono,asetpts=PTS-STARTPTS[a${i}]` ); }) .join(';'); const avInputs = videoSegments.map((_, i) => `[v${i}][a${i}]`).join(''); const avConcat = `${avInputs}concat=n=${videoSegments.length}:v=1:a=1[vcat][acat]`; const videoFinalize = `[vcat]trim=duration=${formatDuration( renderTargetSeconds, )},setpts=PTS-STARTPTS[v]`; const audioFinalize = `[acat]atrim=duration=${formatDuration(renderTargetSeconds)},` + `apad=whole_dur=${formatDuration(renderTargetSeconds)},asetpts=PTS-STARTPTS[a]`; filterComplex = [ videoFilters, embeddedAudioFilters, avConcat, videoFinalize, audioFinalize, ] .filter(Boolean) .join(';'); } // V3 records picture and speech together. Older voiceover renders still use // separate audio clips, but both paths now emit one shared audio/video length. const outputArgs = ['-map', '[v]', '-map', '[a]']; await runCommand( 'ffmpeg', [ '-y', ...inputArgs, '-filter_complex', filterComplex, ...outputArgs, '-c:v', 'libx264', '-preset', 'ultrafast', '-tune', 'zerolatency', '-crf', '30', '-r', String(VIDEO_FPS), '-c:a', 'aac', '-b:a', '96k', '-t', formatDuration(renderTargetSeconds), '-movflags', '+faststart', '-avoid_negative_ts', 'make_zero', outputPath, ], 'Server clip render', ); const bytes = await readFile(outputPath); if (bytes.length <= 0) throw new Error('Server render produced an empty video.'); return { bytes, durationSeconds: Math.round(await probeDuration(outputPath)), filename: `matcha-server-${runId}.mp4`, }; } finally { await rm(runDir, { recursive: true, force: true }).catch(() => undefined); } }