import { existsSync } from 'fs'; import { mkdir, readFile, rm, writeFile } from 'fs/promises'; import path from 'path'; import { spawn } from 'child_process'; type ClipFile = { file?: File; filePath?: string; ext: string; step?: number; durationSeconds?: number; }; export type ServerRenderInput = { videoClips: ClipFile[]; audioClips: ClipFile[]; }; export type ServerRenderResult = { bytes: Buffer; durationSeconds: number; filename: string; }; const WORK_DIR = path.join(process.cwd(), '.local-review-data', 'server-renders'); const VIDEO_WIDTH = 1080; const VIDEO_HEIGHT = 1920; const VIDEO_FPS = 24; const MAX_VIDEO_CLIP_SECONDS = 10; const MAX_AUDIO_CLIP_SECONDS = 10; const STEP_VIDEO_MAX_SECONDS: Record = { 1: 10, 2: 10, 3: 10, 4: 10, 5: 10, }; type PreparedClip = { path: string; step: number; duration: number; hasAudio: boolean; }; type VideoSegment = { source: PreparedClip; duration: number; }; type AudioSegment = { source: PreparedClip; duration: number; }; type VoiceAwarePlan = { videoSegments: VideoSegment[]; audioSegments: AudioSegment[]; }; function resolveMediaCommand(command: 'ffmpeg' | 'ffprobe') { const explicit = command === 'ffmpeg' ? process.env.GRABBY_FFMPEG_PATH : process.env.GRABBY_FFPROBE_PATH; if (explicit && existsSync(explicit)) return explicit; const tempRoot = process.env.TEMP || process.env.TMP; if (process.platform === 'win32' && tempRoot) { const tempTools = path.join(tempRoot, 'grabby-media-tools', 'node_modules'); const candidate = command === 'ffmpeg' ? path.join(tempTools, 'ffmpeg-static', 'ffmpeg.exe') : path.join(tempTools, 'ffprobe-static', 'bin', 'win32', 'x64', 'ffprobe.exe'); if (existsSync(candidate)) return candidate; } return command; } function safeExt(ext: string) { const normalized = ext.toLowerCase().replace(/[^a-z0-9]/g, ''); if (normalized === 'mp4' || normalized === 'mov' || normalized === 'webm') return normalized; if (normalized === 'm4a' || normalized === 'mp3' || normalized === 'wav') return normalized; return 'webm'; } function sanitizeClientDuration(durationSeconds: number | undefined) { if (!Number.isFinite(durationSeconds) || durationSeconds === undefined) return 0; if (durationSeconds <= 0 || durationSeconds > 600) return 0; return durationSeconds; } function resolveRecordedDuration(probedSeconds: number, clientSeconds: number | undefined) { const probed = Number.isFinite(probedSeconds) && probedSeconds > 0 ? probedSeconds : 0; const client = sanitizeClientDuration(clientSeconds); // The browser timer reflects when the user actually tapped stop. Some mobile // recordings report the prompt cap as media duration, so client duration wins. if (client > 0) return client; return probed; } async function runCommand(command: string, args: string[], label: string) { return await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand(command === 'ffprobe' ? 'ffprobe' : 'ffmpeg'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); const logs: string[] = []; const collect = (chunk: Buffer) => { const text = chunk.toString('utf8'); for (const line of text.split(/\r?\n/)) { const trimmed = line.trim(); if (trimmed) logs.push(trimmed); } if (logs.length > 80) logs.splice(0, logs.length - 80); }; child.stdout.on('data', collect); child.stderr.on('data', collect); child.on('error', (err) => { reject(new Error(`${label} failed to start: ${err.message}`)); }); child.on('close', (code) => { if (code === 0) { resolve(); return; } reject( new Error( logs.length ? `${label} failed with exit code ${code}: ${logs.slice(-10).join(' | ')}` : `${label} failed with exit code ${code}`, ), ); }); }); } async function writeClip(file: File, filePath: string) { const bytes = Buffer.from(await file.arrayBuffer()); if (bytes.length <= 0) throw new Error('One of the clips was empty.'); await writeFile(filePath, bytes); } async function prepareClipSource(clip: ClipFile, fallbackPath: string) { if (clip.filePath) return clip.filePath; if (!clip.file) throw new Error('Clip source is missing.'); await writeClip(clip.file, fallbackPath); return fallbackPath; } async function probeFormatDuration(filePath: string) { try { const args = [ '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filePath, ]; const output = await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand('ffprobe'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); let stdout = ''; child.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString('utf8'); }); child.on('error', reject); child.on('close', (code) => { if (code === 0) resolve(stdout.trim()); else reject(new Error(`ffprobe exited with ${code}`)); }); }); const duration = Number(output); return Number.isFinite(duration) ? Math.max(0, duration) : 0; } catch { return 0; } } async function probeStreamDuration(filePath: string, streamSelector: 'v:0' | 'a:0') { try { const args = [ '-v', 'error', '-select_streams', streamSelector, '-show_entries', 'stream=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filePath, ]; const output = await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand('ffprobe'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); let stdout = ''; child.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString('utf8'); }); child.on('error', reject); child.on('close', (code) => { if (code === 0) resolve(stdout.trim()); else reject(new Error(`ffprobe exited with ${code}`)); }); }); const duration = Number(output.split(/\r?\n/).find(Boolean)); if (Number.isFinite(duration) && duration > 0) return duration; return await probeFormatDuration(filePath); } catch { return await probeFormatDuration(filePath); } } async function probeHasAudio(filePath: string) { try { const args = [ '-v', 'error', '-select_streams', 'a', '-show_entries', 'stream=index', '-of', 'csv=p=0', filePath, ]; const output = await new Promise((resolve, reject) => { const child = spawn(resolveMediaCommand('ffprobe'), args, { windowsHide: true, stdio: ['ignore', 'pipe', 'pipe'], }); let stdout = ''; child.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString('utf8'); }); child.on('error', reject); child.on('close', (code) => { if (code === 0) resolve(stdout.trim()); else reject(new Error(`ffprobe exited with ${code}`)); }); }); return output.length > 0; } catch { return false; } } async function enforceAudioMasterOutput(inputPath: string, outputPath: string) { const [videoDuration, audioDuration, formatDurationValue] = await Promise.all([ probeStreamDuration(inputPath, 'v:0'), probeStreamDuration(inputPath, 'a:0'), probeFormatDuration(inputPath), ]); if (audioDuration <= 0 || videoDuration <= 0) { return { path: inputPath, durationSeconds: Math.max(videoDuration, formatDurationValue), }; } if (videoDuration + 0.08 >= audioDuration && formatDurationValue <= audioDuration + 0.2) { return { path: inputPath, durationSeconds: audioDuration, }; } const videoPadSeconds = Math.max(0, audioDuration - videoDuration); const videoFilter = videoPadSeconds > 0.08 ? `[0:v]tpad=stop_mode=clone:stop_duration=${formatDuration( videoPadSeconds, )},trim=duration=${formatDuration(audioDuration)},setpts=PTS-STARTPTS[v]` : `[0:v]trim=duration=${formatDuration(audioDuration)},setpts=PTS-STARTPTS[v]`; await runCommand( 'ffmpeg', [ '-y', '-i', inputPath, '-filter_complex', videoFilter, '-map', '[v]', '-map', '0:a:0', '-c:v', 'libx264', '-preset', 'ultrafast', '-crf', '30', '-r', String(VIDEO_FPS), '-c:a', 'copy', '-t', formatDuration(audioDuration), '-movflags', '+faststart', outputPath, ], 'Final audio-master duration fix', ); return { path: outputPath, durationSeconds: audioDuration, }; } function formatDuration(seconds: number) { return seconds.toFixed(3).replace(/\.?0+$/, ''); } function inferClipStep(clip: ClipFile, index: number, fallbackStartStep: number) { return Number.isInteger(clip.step) && clip.step! > 0 ? clip.step! : fallbackStartStep + index; } function clipByStep(clips: PreparedClip[], step: number) { return clips.find((clip) => clip.step === step) ?? null; } function maxSecondsForStep(step: number) { return STEP_VIDEO_MAX_SECONDS[step] ?? MAX_VIDEO_CLIP_SECONDS; } function usableClipDuration(source: PreparedClip) { const stepMax = maxSecondsForStep(source.step); if (source.duration > 0) return Math.min(source.duration, stepMax); return 0; } function totalUsableVideoDuration(videoClips: PreparedClip[]) { return videoClips.reduce((total, clip) => total + usableClipDuration(clip), 0); } function addSegment(segments: VideoSegment[], source: PreparedClip | null, duration: number) { if (!source || duration < 0.25) return; segments.push({ source, duration, }); } function createClipBudgets(videoClips: PreparedClip[]) { const budgets = new Map(); for (const clip of videoClips) { budgets.set(clip.path, Math.max(budgets.get(clip.path) ?? 0, usableClipDuration(clip))); } return budgets; } function addSegmentWithBudget( segments: VideoSegment[], budgets: Map, source: PreparedClip | null, duration: number, ) { if (!source || duration < 0.25) return 0; const remaining = budgets.get(source.path) ?? usableClipDuration(source); const actualDuration = Math.min(duration, remaining); if (actualDuration < 0.25) return 0; budgets.set(source.path, Math.max(0, remaining - actualDuration)); addSegment(segments, source, actualDuration); return actualDuration; } function uniqueSources(sources: Array) { const seen = new Set(); return sources.filter((source): source is PreparedClip => { if (!source || seen.has(source.path) || usableClipDuration(source) < 0.25) return false; seen.add(source.path); return true; }); } function addRotatingSegments( segments: VideoSegment[], sources: Array, duration: number, ) { const candidates = uniqueSources(sources); if (duration < 0.25 || candidates.length === 0) return 0; let added = 0; let remaining = duration; let cursor = 0; let guard = 0; while (remaining >= 0.25 && guard < 200) { guard += 1; let source = candidates[cursor % candidates.length]!; if ( candidates.length > 1 && segments[segments.length - 1]?.source.path === source.path ) { cursor += 1; source = candidates[cursor % candidates.length]!; } const sourceDuration = usableClipDuration(source); const chunkDuration = Math.min(remaining, sourceDuration, candidates.length > 1 ? 1.35 : 1); if (chunkDuration < 0.25) break; addSegment(segments, source, chunkDuration); added += chunkDuration; remaining -= chunkDuration; cursor += 1; } return added; } function buildLinearSegments(videoClips: PreparedClip[], targetSeconds = totalUsableVideoDuration(videoClips)) { const segments: VideoSegment[] = []; let remaining = Math.max(0, targetSeconds); for (const source of videoClips) { if (remaining <= 0.25) break; const sourceDuration = usableClipDuration(source); const duration = Math.min(sourceDuration, remaining); addSegment(segments, source, duration); remaining -= duration; } return segments; } function sortByStep(clips: PreparedClip[]) { return [...clips].sort((a, b) => a.step - b.step); } function buildBalancedVideoSegments(videoClips: PreparedClip[], targetSeconds: number) { const orderedClips = sortByStep(videoClips); const segments: VideoSegment[] = []; const budgets = createClipBudgets(orderedClips); let remaining = targetSeconds; let activeSources = orderedClips.filter((clip) => usableClipDuration(clip) >= 0.25); let guard = 0; while (remaining >= 0.25 && activeSources.length > 0 && guard < 20) { guard += 1; const share = remaining / activeSources.length; let addedThisRound = 0; for (const source of activeSources) { if (remaining < 0.25) break; const remainingBudget = budgets.get(source.path) ?? usableClipDuration(source); if (remainingBudget < 0.25) continue; const requestedSeconds = Math.min(share, remainingBudget, remaining); const added = addSegmentWithBudget(segments, budgets, source, requestedSeconds); addedThisRound += added; remaining -= added; } if (addedThisRound < 0.25) break; activeSources = activeSources.filter((source) => { const remainingBudget = budgets.get(source.path) ?? 0; return remainingBudget >= 0.25; }); } if (remaining >= 0.25) { addRotatingSegments(segments, orderedClips, remaining); } return segments; } function addAudioSegment( segments: AudioSegment[], source: PreparedClip | null, duration: number, ) { if (!source || duration < 0.25) return; segments.push({ source, duration: Math.min(duration, source.duration > 0 ? source.duration : duration), }); } function buildVoiceAwarePlan({ videoClips, audioClips, renderTargetSeconds, }: { videoClips: PreparedClip[]; audioClips: PreparedClip[]; renderTargetSeconds: number; }): VoiceAwarePlan { if (videoClips.length === 0) return { videoSegments: [], audioSegments: [] }; const targetSeconds = Math.max(1, renderTargetSeconds); const audioSegments: AudioSegment[] = []; let plannedAudioSeconds = 0; for (const audioClip of sortByStep(audioClips)) { if (audioClip.duration <= 0) continue; const remainingTarget = Math.max(0, targetSeconds - plannedAudioSeconds); if (remainingTarget < 0.25) break; const duration = Math.min(audioClip.duration, remainingTarget); addAudioSegment(audioSegments, audioClip, duration); plannedAudioSeconds += duration; } const videoSegments = buildBalancedVideoSegments(videoClips, targetSeconds); return { videoSegments: videoSegments.length > 0 ? videoSegments : buildLinearSegments(videoClips, targetSeconds), audioSegments, }; } export async function renderClipsOnServer(input: ServerRenderInput): Promise { if (input.videoClips.length === 0) { throw new Error('No video clips were uploaded.'); } const runId = Math.random().toString(36).slice(2, 10); const runDir = path.join(WORK_DIR, runId); await mkdir(runDir, { recursive: true }); const videoPaths: Array<{ path: string; step: number; durationSeconds?: number }> = []; const audioPaths: Array<{ path: string; step: number; durationSeconds?: number }> = []; const outputPath = path.join(runDir, `matcha-server-${runId}.mp4`); const fixedOutputPath = path.join(runDir, `matcha-server-${runId}-fixed.mp4`); try { for (let i = 0; i < input.videoClips.length; i++) { const clip = input.videoClips[i]!; const filePath = path.join(runDir, `video-${i}.${safeExt(clip.ext)}`); videoPaths.push({ path: await prepareClipSource(clip, filePath), step: inferClipStep(clip, i, 1), durationSeconds: clip.durationSeconds, }); } for (let i = 0; i < input.audioClips.length; i++) { const clip = input.audioClips[i]!; const filePath = path.join(runDir, `audio-${i}.${safeExt(clip.ext)}`); audioPaths.push({ path: await prepareClipSource(clip, filePath), step: inferClipStep(clip, i, 5), durationSeconds: clip.durationSeconds, }); } const [videoDurations, audioDurations, videoAudioFlags] = await Promise.all([ Promise.all(videoPaths.map((clip) => probeStreamDuration(clip.path, 'v:0'))), Promise.all(audioPaths.map((clip) => probeStreamDuration(clip.path, 'a:0'))), Promise.all(videoPaths.map((clip) => probeHasAudio(clip.path))), ]); const preparedVideoClips = videoPaths.map((clip, index) => ({ path: clip.path, step: clip.step, duration: resolveRecordedDuration(videoDurations[index] ?? 0, clip.durationSeconds), hasAudio: videoAudioFlags[index] ?? false, })); const preparedAudioClips = audioPaths.map((clip, index) => ({ path: clip.path, step: clip.step, duration: resolveRecordedDuration(audioDurations[index] ?? 0, clip.durationSeconds), hasAudio: true, })); const hasVoiceover = audioPaths.length > 0; const audioDurationSeconds = preparedAudioClips.reduce( (total, clip) => total + clip.duration, 0, ); const fallbackTargetSeconds = Math.min( MAX_AUDIO_CLIP_SECONDS, Math.max(1, totalUsableVideoDuration(preparedVideoClips)), ); const voiceoverTargetSeconds = Math.max( 1, audioDurationSeconds > 0 ? audioDurationSeconds : fallbackTargetSeconds, ); const voiceAwarePlan = hasVoiceover ? buildVoiceAwarePlan({ videoClips: preparedVideoClips, audioClips: preparedAudioClips, renderTargetSeconds: voiceoverTargetSeconds, }) : null; const videoSegments = voiceAwarePlan?.videoSegments ?? buildLinearSegments(preparedVideoClips); const audioSegments = voiceAwarePlan?.audioSegments ?? []; const visualDurationSeconds = videoSegments.reduce( (total, segment) => total + segment.duration, 0, ); const renderTargetSeconds = hasVoiceover ? Math.max(1, voiceoverTargetSeconds) : Math.max(1, visualDurationSeconds); if (videoSegments.length === 0) { throw new Error('No usable video segments were uploaded.'); } const inputArgs = [ ...videoSegments.flatMap((segment) => [ '-t', formatDuration(segment.duration), '-i', segment.source.path, ]), ...audioPaths.flatMap((clip) => [ '-i', clip.path, ]), ]; const videoFilters = videoSegments .map((segment, i) => { return ( `[${i}:v]trim=duration=${formatDuration(segment.duration)},setpts=PTS-STARTPTS,` + `scale=${VIDEO_WIDTH}:${VIDEO_HEIGHT}:force_original_aspect_ratio=decrease,` + `pad=${VIDEO_WIDTH}:${VIDEO_HEIGHT}:(ow-iw)/2:(oh-ih)/2,` + `setsar=1,fps=${VIDEO_FPS},format=yuv420p[v${i}]` ); }) .join(';'); const videoInputs = videoSegments.map((_, i) => `[v${i}]`).join(''); const audioOffset = videoSegments.length; const audioInputIndexByPath = new Map( audioPaths.map((clip, index) => [clip.path, audioOffset + index]), ); let filterComplex: string; if (hasVoiceover) { const videoConcat = `${videoInputs}concat=n=${videoSegments.length}:v=1:a=0[vcat]`; const videoFinalize = `[vcat]trim=duration=${formatDuration( renderTargetSeconds, )},setpts=PTS-STARTPTS[v]`; const audioFilters = audioSegments .map((segment, i) => { const inputIndex = audioInputIndexByPath.get(segment.source.path); if (inputIndex === undefined) { throw new Error('Voice clip input was not prepared correctly.'); } return ( `[${inputIndex}:a]atrim=duration=${formatDuration(segment.duration)},` + `aresample=48000,aformat=sample_rates=48000:channel_layouts=mono,` + `asetpts=PTS-STARTPTS[a${i}]` ); }) .join(';'); const audioInputs = audioSegments.map((_, i) => `[a${i}]`).join(''); const audioConcat = audioSegments.length > 0 ? `${audioInputs}concat=n=${audioSegments.length}:v=0:a=1[acat]` : `anullsrc=r=48000:cl=mono,atrim=duration=${formatDuration( renderTargetSeconds, )},aformat=sample_rates=48000:channel_layouts=mono,asetpts=PTS-STARTPTS[acat]`; const audioFinalize = `[acat]atrim=duration=${formatDuration(renderTargetSeconds)},` + `apad=whole_dur=${formatDuration(renderTargetSeconds)},asetpts=PTS-STARTPTS[a]`; filterComplex = [ videoFilters, videoConcat, videoFinalize, audioFilters, audioConcat, audioFinalize, ] .filter(Boolean) .join(';'); } else { const embeddedAudioFilters = videoSegments .map((segment, i) => { if (segment.source.hasAudio) { return ( `[${i}:a]atrim=duration=${formatDuration(segment.duration)},` + `aresample=48000,aformat=sample_rates=48000:channel_layouts=mono,` + `asetpts=PTS-STARTPTS[a${i}]` ); } return ( `anullsrc=r=48000:cl=mono,atrim=duration=${formatDuration(segment.duration)},` + `aformat=sample_rates=48000:channel_layouts=mono,asetpts=PTS-STARTPTS[a${i}]` ); }) .join(';'); const avInputs = videoSegments.map((_, i) => `[v${i}][a${i}]`).join(''); const avConcat = `${avInputs}concat=n=${videoSegments.length}:v=1:a=1[vcat][acat]`; const videoFinalize = `[vcat]trim=duration=${formatDuration( renderTargetSeconds, )},setpts=PTS-STARTPTS[v]`; const audioFinalize = `[acat]atrim=duration=${formatDuration(renderTargetSeconds)},` + `apad=whole_dur=${formatDuration(renderTargetSeconds)},asetpts=PTS-STARTPTS[a]`; filterComplex = [ videoFilters, embeddedAudioFilters, avConcat, videoFinalize, audioFinalize, ] .filter(Boolean) .join(';'); } // Classic uses separate voice notes; the fallback path also supports clips // that already contain audio. Both paths emit one shared audio/video length. const outputArgs = ['-map', '[v]', '-map', '[a]']; await runCommand( 'ffmpeg', [ '-y', ...inputArgs, '-filter_complex', filterComplex, ...outputArgs, '-c:v', 'libx264', '-preset', 'ultrafast', '-tune', 'zerolatency', '-crf', '30', '-r', String(VIDEO_FPS), '-c:a', 'aac', '-b:a', '96k', '-t', formatDuration(renderTargetSeconds), '-movflags', '+faststart', '-avoid_negative_ts', 'make_zero', outputPath, ], 'Server clip render', ); const finalOutput = await enforceAudioMasterOutput(outputPath, fixedOutputPath); const bytes = await readFile(finalOutput.path); if (bytes.length <= 0) throw new Error('Server render produced an empty video.'); return { bytes, durationSeconds: Math.round(finalOutput.durationSeconds), filename: `matcha-server-${runId}.mp4`, }; } finally { await rm(runDir, { recursive: true, force: true }).catch(() => undefined); } }