Commit Β·
089c9df
1
Parent(s): bf00623
Make classic flow audio first
Browse files
README.md
CHANGED
|
@@ -10,8 +10,8 @@ pinned: false
|
|
| 10 |
|
| 11 |
# Matcha Moments β frontend
|
| 12 |
|
| 13 |
-
Cafe-aesthetic, mobile-first PWA that walks a customer through
|
| 14 |
-
|
| 15 |
|
| 16 |
This repo is a **standalone Next.js app**. It calls Humeo's deployed public
|
| 17 |
review APIs (`https://humeo.app/api/public/reviews/*`) β no backend changes
|
|
@@ -25,7 +25,7 @@ It's gitignored, so it never ships with this repo.
|
|
| 25 |
## Tech stack
|
| 26 |
|
| 27 |
- **Next.js 14** (App Router) + TypeScript + Tailwind CSS
|
| 28 |
-
- **
|
| 29 |
- **`getUserMedia` + `MediaRecorder`** β standard browser camera APIs (no native install required)
|
| 30 |
- **`zod`** β shared validation schemas, mirrors the ones in Humeo's `src/lib/reviews/types.ts`
|
| 31 |
|
|
@@ -39,8 +39,8 @@ flow opens in 2 seconds, no install, works on iOS Safari and Android Chrome.
|
|
| 39 |
|
| 40 |
1. `/` β QR landing context screen (dev-only; in prod, the cafe's QR deep-links straight to `/c/[slug]`)
|
| 41 |
2. `/c/[slug]` β Cafe landing: brand, big "Free matcha, on the house" headline, consent copy, primary CTA
|
| 42 |
-
3. `/c/[slug]/record` β
|
| 43 |
-
4. `/preview` β
|
| 44 |
5. `/reward` β Confetti, reward code in a dark card, "show this screen to your server"
|
| 45 |
|
| 46 |
The `[slug]` route is a real Next.js dynamic segment that fetches its campaign
|
|
@@ -80,7 +80,7 @@ matcha-moments PWA Humeo backend (deployed)
|
|
| 80 |
GET /c/[slug] βββββββΊ GET /api/public/reviews/campaign/[slug]
|
| 81 |
βββββββ { id, slug, restaurantName, rulesConfig, ... }
|
| 82 |
|
| 83 |
-
|
| 84 |
|
| 85 |
POST /preview submit βββββββΊ POST /api/public/reviews/submit
|
| 86 |
FormData: video, slug, consentAccepted,
|
|
@@ -107,21 +107,18 @@ Until then the matcha-moments app silently injects the cafe defaults.
|
|
| 107 |
|
| 108 |
---
|
| 109 |
|
| 110 |
-
## Why
|
| 111 |
|
| 112 |
-
Humeo's `/api/public/reviews/submit` accepts a single video file. We want a
|
| 113 |
-
multi-clip
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
-
Trade-offs:
|
| 117 |
-
-
|
| 118 |
-
-
|
| 119 |
-
-
|
| 120 |
-
|
| 121 |
-
If cafe staff start hearing complaints about phone heat, swap to a
|
| 122 |
-
multi-clip upload + server-side ffmpeg endpoint. Humeo's worker
|
| 123 |
-
(`reference/src/lib/server/processInterview.ts`) already uses ffmpeg, so the
|
| 124 |
-
migration is mostly a new submit endpoint.
|
| 125 |
|
| 126 |
---
|
| 127 |
|
|
@@ -135,9 +132,9 @@ src/
|
|
| 135 |
c/[slug]/
|
| 136 |
page.tsx Server component β fetches campaign
|
| 137 |
LandingClient.tsx Cafe landing screen
|
| 138 |
-
record/
|
| 139 |
-
page.tsx Server component β fetches campaign
|
| 140 |
-
GuidedRecordingClient.tsx
|
| 141 |
preview/page.tsx Stitch + upload + preview
|
| 142 |
reward/page.tsx Reward code reveal
|
| 143 |
globals.css
|
|
|
|
| 10 |
|
| 11 |
# Matcha Moments β frontend
|
| 12 |
|
| 13 |
+
Cafe-aesthetic, mobile-first PWA that walks a customer through one voice note
|
| 14 |
+
and three guided food shots, then hands them a matcha redemption code.
|
| 15 |
|
| 16 |
This repo is a **standalone Next.js app**. It calls Humeo's deployed public
|
| 17 |
review APIs (`https://humeo.app/api/public/reviews/*`) β no backend changes
|
|
|
|
| 25 |
## Tech stack
|
| 26 |
|
| 27 |
- **Next.js 14** (App Router) + TypeScript + Tailwind CSS
|
| 28 |
+
- **Server-side ffmpeg** β renders the recorded voice note with the food shots before upload
|
| 29 |
- **`getUserMedia` + `MediaRecorder`** β standard browser camera APIs (no native install required)
|
| 30 |
- **`zod`** β shared validation schemas, mirrors the ones in Humeo's `src/lib/reviews/types.ts`
|
| 31 |
|
|
|
|
| 39 |
|
| 40 |
1. `/` β QR landing context screen (dev-only; in prod, the cafe's QR deep-links straight to `/c/[slug]`)
|
| 41 |
2. `/c/[slug]` β Cafe landing: brand, big "Free matcha, on the house" headline, consent copy, primary CTA
|
| 42 |
+
3. `/c/[slug]/record` β Audio-first guided recorder (voice note β three food shots β auto-advance)
|
| 43 |
+
4. `/preview` β server render fits the food shots to the full voice note, uploads to Humeo, polls submission status, shows the rendered preview
|
| 44 |
5. `/reward` β Confetti, reward code in a dark card, "show this screen to your server"
|
| 45 |
|
| 46 |
The `[slug]` route is a real Next.js dynamic segment that fetches its campaign
|
|
|
|
| 80 |
GET /c/[slug] βββββββΊ GET /api/public/reviews/campaign/[slug]
|
| 81 |
βββββββ { id, slug, restaurantName, rulesConfig, ... }
|
| 82 |
|
| 83 |
+
render clips on the server
|
| 84 |
|
| 85 |
POST /preview submit βββββββΊ POST /api/public/reviews/submit
|
| 86 |
FormData: video, slug, consentAccepted,
|
|
|
|
| 107 |
|
| 108 |
---
|
| 109 |
|
| 110 |
+
## Why server-side rendering?
|
| 111 |
|
| 112 |
+
Humeo's `/api/public/reviews/submit` accepts a single video file. We want a
|
| 113 |
+
guided multi-clip UX where the customer's voice note controls the final edit
|
| 114 |
+
length. Server-side rendering lets us keep the full voice note and fit the food
|
| 115 |
+
shots around it before submitting one finished video.
|
| 116 |
|
| 117 |
+
Trade-offs:
|
| 118 |
+
- Clips upload before preview, so the preview depends on server availability.
|
| 119 |
+
- Server render cost replaces phone heat and browser-specific ffmpeg issues.
|
| 120 |
+
- The voice note is the master duration; video is trimmed, reused, or lightly
|
| 121 |
+
held to keep every recorded word.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
---
|
| 124 |
|
|
|
|
| 132 |
c/[slug]/
|
| 133 |
page.tsx Server component β fetches campaign
|
| 134 |
LandingClient.tsx Cafe landing screen
|
| 135 |
+
record/
|
| 136 |
+
page.tsx Server component β fetches campaign
|
| 137 |
+
GuidedRecordingClient.tsx Audio-first guided recorder
|
| 138 |
preview/page.tsx Stitch + upload + preview
|
| 139 |
reward/page.tsx Reward code reveal
|
| 140 |
globals.css
|
src/app/api/public/reviews/submit-clips/route.ts
CHANGED
|
@@ -43,8 +43,8 @@ export async function POST(req: NextRequest) {
|
|
| 43 |
const deviceKey = sanitizeText(form.get('deviceKey'), 200) || null;
|
| 44 |
const tableId = sanitizeText(form.get('tableId'), 80) || null;
|
| 45 |
|
| 46 |
-
const videoClips = collectFiles(form, 'videoClip',
|
| 47 |
-
const audioClips = collectFiles(form, 'audioClip',
|
| 48 |
const totalBytes = [...videoClips, ...audioClips].reduce(
|
| 49 |
(total, clip) => total + clip.file.size,
|
| 50 |
0,
|
|
|
|
| 43 |
const deviceKey = sanitizeText(form.get('deviceKey'), 200) || null;
|
| 44 |
const tableId = sanitizeText(form.get('tableId'), 80) || null;
|
| 45 |
|
| 46 |
+
const videoClips = collectFiles(form, 'videoClip', 2);
|
| 47 |
+
const audioClips = collectFiles(form, 'audioClip', 1);
|
| 48 |
const totalBytes = [...videoClips, ...audioClips].reduce(
|
| 49 |
(total, clip) => total + clip.file.size,
|
| 50 |
0,
|
src/app/c/[slug]/LandingClient.tsx
CHANGED
|
@@ -55,8 +55,8 @@ export function LandingClient({ slug, tableId, campaign }: Props) {
|
|
| 55 |
<em className="text-[#4A5C32]">on the house</em>
|
| 56 |
</h1>
|
| 57 |
|
| 58 |
-
<p className="mx-auto mt-5 max-w-[
|
| 59 |
-
|
| 60 |
</p>
|
| 61 |
|
| 62 |
<label className="mx-auto mt-6 flex w-full max-w-[324px] cursor-pointer items-start gap-3 rounded-[14px] border border-[#78694B]/20 bg-[#F5EDD9] px-4 py-3.5 text-left shadow-[inset_0_1px_0_rgba(255,255,255,0.6)]">
|
|
@@ -82,7 +82,7 @@ export function LandingClient({ slug, tableId, campaign }: Props) {
|
|
| 82 |
: 'bg-[#A4AC8C] font-serif text-[17px] text-[#F5EDD9] disabled:cursor-not-allowed disabled:opacity-95'
|
| 83 |
}
|
| 84 |
>
|
| 85 |
-
Get my matcha
|
| 86 |
</Button>
|
| 87 |
{tableId ? (
|
| 88 |
<p className="mt-3 text-center font-mono text-[10px] uppercase tracking-[0.15em] text-[#9A8E73]">
|
|
|
|
| 55 |
<em className="text-[#4A5C32]">on the house</em>
|
| 56 |
</h1>
|
| 57 |
|
| 58 |
+
<p className="mx-auto mt-5 max-w-[294px] font-serif text-[14.5px] leading-[1.55] text-[#5A6E3F]">
|
| 59 |
+
Say one quick voice note first. Then record 3 food shots, up to 10 seconds each. We'll fit the reel to your voice.
|
| 60 |
</p>
|
| 61 |
|
| 62 |
<label className="mx-auto mt-6 flex w-full max-w-[324px] cursor-pointer items-start gap-3 rounded-[14px] border border-[#78694B]/20 bg-[#F5EDD9] px-4 py-3.5 text-left shadow-[inset_0_1px_0_rgba(255,255,255,0.6)]">
|
|
|
|
| 82 |
: 'bg-[#A4AC8C] font-serif text-[17px] text-[#F5EDD9] disabled:cursor-not-allowed disabled:opacity-95'
|
| 83 |
}
|
| 84 |
>
|
| 85 |
+
Get my matcha ->
|
| 86 |
</Button>
|
| 87 |
{tableId ? (
|
| 88 |
<p className="mt-3 text-center font-mono text-[10px] uppercase tracking-[0.15em] text-[#9A8E73]">
|
src/app/c/[slug]/record/GuidedRecordingClient.tsx
CHANGED
|
@@ -389,7 +389,7 @@ function NativeVideoCapture({
|
|
| 389 |
<Camera className="h-14 w-14 text-sage" />
|
| 390 |
</button>
|
| 391 |
<p className="mt-5 max-w-[280px] text-sm leading-6 text-white/70">
|
| 392 |
-
Use your phone camera for the cleanest clip.
|
| 393 |
</p>
|
| 394 |
{error ? (
|
| 395 |
<div className="mt-5 rounded-2xl bg-red-500/90 px-4 py-2 text-sm">
|
|
|
|
| 389 |
<Camera className="h-14 w-14 text-sage" />
|
| 390 |
</button>
|
| 391 |
<p className="mt-5 max-w-[280px] text-sm leading-6 text-white/70">
|
| 392 |
+
Use your phone camera for the cleanest clip. Record up to {prompt.maxSeconds} seconds.
|
| 393 |
</p>
|
| 394 |
{error ? (
|
| 395 |
<div className="mt-5 rounded-2xl bg-red-500/90 px-4 py-2 text-sm">
|
src/lib/ffmpeg.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
* Client-side video concatenation using ffmpeg.wasm.
|
| 3 |
*
|
| 4 |
* Why client-side: Humeo's existing /api/public/reviews/submit endpoint accepts
|
| 5 |
-
* a single video file. To avoid backend changes for v1,
|
| 6 |
* in the browser before upload.
|
| 7 |
*
|
| 8 |
* Cost: ~8MB of WASM lazy-loaded after the customer finishes recording.
|
|
|
|
| 2 |
* Client-side video concatenation using ffmpeg.wasm.
|
| 3 |
*
|
| 4 |
* Why client-side: Humeo's existing /api/public/reviews/submit endpoint accepts
|
| 5 |
+
* a single video file. To avoid backend changes for v1, this fallback stitches clips
|
| 6 |
* in the browser before upload.
|
| 7 |
*
|
| 8 |
* Cost: ~8MB of WASM lazy-loaded after the customer finishes recording.
|
src/lib/server/reviewStore.ts
CHANGED
|
@@ -94,67 +94,40 @@ const SAGE_AND_STONE: PublicReviewCampaign = {
|
|
| 94 |
prompts: [
|
| 95 |
{
|
| 96 |
step: 1,
|
| 97 |
-
title: '
|
| 98 |
-
tip: '
|
| 99 |
-
mediaType: '
|
| 100 |
-
camera: '
|
| 101 |
-
maxSeconds:
|
| 102 |
optional: false,
|
| 103 |
},
|
| 104 |
{
|
| 105 |
step: 2,
|
| 106 |
-
title: '
|
| 107 |
-
tip: '
|
| 108 |
mediaType: 'video',
|
| 109 |
camera: 'rear',
|
| 110 |
-
maxSeconds:
|
| 111 |
optional: false,
|
| 112 |
},
|
| 113 |
{
|
| 114 |
step: 3,
|
| 115 |
-
title: '
|
| 116 |
-
tip: '
|
| 117 |
mediaType: 'video',
|
| 118 |
camera: 'rear',
|
| 119 |
-
maxSeconds:
|
| 120 |
optional: false,
|
| 121 |
},
|
| 122 |
{
|
| 123 |
step: 4,
|
| 124 |
-
title: '
|
| 125 |
-
tip: '
|
| 126 |
mediaType: 'video',
|
| 127 |
-
camera: '
|
| 128 |
-
maxSeconds:
|
| 129 |
-
optional: true,
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
step: 5,
|
| 133 |
-
title: "Voice: What's the dish?",
|
| 134 |
-
tip: 'Say the dish name. Describe what is on the plate.',
|
| 135 |
-
mediaType: 'audio',
|
| 136 |
-
camera: 'front',
|
| 137 |
-
maxSeconds: 8,
|
| 138 |
-
optional: false,
|
| 139 |
-
},
|
| 140 |
-
{
|
| 141 |
-
step: 6,
|
| 142 |
-
title: 'What did you like about it?',
|
| 143 |
-
tip: 'Say: Flavor, texture, portion, or what you liked.',
|
| 144 |
-
mediaType: 'audio',
|
| 145 |
-
camera: 'front',
|
| 146 |
-
maxSeconds: 8,
|
| 147 |
optional: false,
|
| 148 |
},
|
| 149 |
-
{
|
| 150 |
-
step: 7,
|
| 151 |
-
title: 'Voice: Recommendation',
|
| 152 |
-
tip: 'Optional: Say if you will recommend the dish and to whom.',
|
| 153 |
-
mediaType: 'audio',
|
| 154 |
-
camera: 'front',
|
| 155 |
-
maxSeconds: 8,
|
| 156 |
-
optional: true,
|
| 157 |
-
},
|
| 158 |
],
|
| 159 |
rewardType: 'static_code',
|
| 160 |
rewardValue: null,
|
|
|
|
| 94 |
prompts: [
|
| 95 |
{
|
| 96 |
step: 1,
|
| 97 |
+
title: 'Tell us what you ordered.',
|
| 98 |
+
tip: 'Voice only. Say the dish name and what you liked about it.',
|
| 99 |
+
mediaType: 'audio',
|
| 100 |
+
camera: 'front',
|
| 101 |
+
maxSeconds: 10,
|
| 102 |
optional: false,
|
| 103 |
},
|
| 104 |
{
|
| 105 |
step: 2,
|
| 106 |
+
title: 'Close-up pan of the food.',
|
| 107 |
+
tip: 'Move slowly across the texture, sauce, steam, and toppings.',
|
| 108 |
mediaType: 'video',
|
| 109 |
camera: 'rear',
|
| 110 |
+
maxSeconds: 10,
|
| 111 |
optional: false,
|
| 112 |
},
|
| 113 |
{
|
| 114 |
step: 3,
|
| 115 |
+
title: 'Wide shot of the table.',
|
| 116 |
+
tip: 'Show the full plate, drink, table setup, and a little cafe vibe.',
|
| 117 |
mediaType: 'video',
|
| 118 |
camera: 'rear',
|
| 119 |
+
maxSeconds: 10,
|
| 120 |
optional: false,
|
| 121 |
},
|
| 122 |
{
|
| 123 |
step: 4,
|
| 124 |
+
title: 'Action detail of the food.',
|
| 125 |
+
tip: 'Cut, scoop, pour, lift, stir, or show the best bite.',
|
| 126 |
mediaType: 'video',
|
| 127 |
+
camera: 'rear',
|
| 128 |
+
maxSeconds: 10,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
optional: false,
|
| 130 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
],
|
| 132 |
rewardType: 'static_code',
|
| 133 |
rewardValue: null,
|
src/lib/server/serverClipRenderer.ts
CHANGED
|
@@ -25,15 +25,15 @@ const WORK_DIR = path.join(process.cwd(), '.local-review-data', 'server-renders'
|
|
| 25 |
const VIDEO_WIDTH = 1080;
|
| 26 |
const VIDEO_HEIGHT = 1920;
|
| 27 |
const VIDEO_FPS = 24;
|
| 28 |
-
const MAX_VIDEO_CLIP_SECONDS =
|
| 29 |
-
const MAX_AUDIO_CLIP_SECONDS =
|
| 30 |
const FINAL_VIDEO_MAX_SECONDS = 17;
|
| 31 |
-
const VOICEOVER_RENDER_SAFETY_MAX_SECONDS =
|
| 32 |
const STEP_VIDEO_MAX_SECONDS: Record<number, number> = {
|
| 33 |
-
1:
|
| 34 |
-
2:
|
| 35 |
-
3:
|
| 36 |
-
4:
|
| 37 |
};
|
| 38 |
|
| 39 |
type PreparedClip = {
|
|
@@ -238,52 +238,69 @@ async function probeHasAudio(filePath: string) {
|
|
| 238 |
}
|
| 239 |
}
|
| 240 |
|
| 241 |
-
async function
|
| 242 |
const [videoDuration, audioDuration, formatDurationValue] = await Promise.all([
|
| 243 |
probeStreamDuration(inputPath, 'v:0'),
|
| 244 |
probeStreamDuration(inputPath, 'a:0'),
|
| 245 |
probeFormatDuration(inputPath),
|
| 246 |
]);
|
| 247 |
|
| 248 |
-
if (
|
| 249 |
-
videoDuration <= 0 ||
|
| 250 |
-
((audioDuration <= videoDuration + 0.08) && formatDurationValue <= videoDuration + 0.08)
|
| 251 |
-
) {
|
| 252 |
return {
|
| 253 |
path: inputPath,
|
| 254 |
durationSeconds: Math.max(videoDuration, formatDurationValue),
|
| 255 |
};
|
| 256 |
}
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
await runCommand(
|
| 259 |
'ffmpeg',
|
| 260 |
[
|
| 261 |
'-y',
|
| 262 |
'-i',
|
| 263 |
inputPath,
|
|
|
|
|
|
|
| 264 |
'-map',
|
| 265 |
-
'
|
| 266 |
'-map',
|
| 267 |
-
'0:a:0
|
| 268 |
'-c:v',
|
| 269 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
'-c:a',
|
| 271 |
-
'
|
| 272 |
-
'-b:a',
|
| 273 |
-
'96k',
|
| 274 |
'-t',
|
| 275 |
-
formatDuration(
|
| 276 |
-
'-shortest',
|
| 277 |
'-movflags',
|
| 278 |
'+faststart',
|
| 279 |
outputPath,
|
| 280 |
],
|
| 281 |
-
'Final duration
|
| 282 |
);
|
| 283 |
|
| 284 |
return {
|
| 285 |
path: outputPath,
|
| 286 |
-
durationSeconds:
|
| 287 |
};
|
| 288 |
}
|
| 289 |
|
|
@@ -407,6 +424,49 @@ function buildLinearSegments(videoClips: PreparedClip[]) {
|
|
| 407 |
return segments;
|
| 408 |
}
|
| 409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
function addAudioSegment(
|
| 411 |
segments: AudioSegment[],
|
| 412 |
source: PreparedClip | null,
|
|
@@ -430,112 +490,24 @@ function buildVoiceAwarePlan({
|
|
| 430 |
}): VoiceAwarePlan {
|
| 431 |
if (videoClips.length === 0) return { videoSegments: [], audioSegments: [] };
|
| 432 |
|
| 433 |
-
const closeShot = clipByStep(videoClips, 1) ?? videoClips[0]!;
|
| 434 |
-
const wideShot = clipByStep(videoClips, 2) ?? videoClips[1] ?? closeShot;
|
| 435 |
-
const actionShot = clipByStep(videoClips, 3) ?? videoClips[2] ?? wideShot;
|
| 436 |
-
const reactionShot =
|
| 437 |
-
clipByStep(videoClips, 4) ??
|
| 438 |
-
videoClips.find((clip) => clip.step >= 6) ??
|
| 439 |
-
(videoClips.length > 3 ? videoClips[videoClips.length - 1]! : null);
|
| 440 |
-
|
| 441 |
-
const orderAudio = clipByStep(audioClips, 5) ?? clipByStep(audioClips, 4) ?? audioClips[0] ?? null;
|
| 442 |
-
const likedAudio =
|
| 443 |
-
clipByStep(audioClips, 6) ??
|
| 444 |
-
audioClips.find((clip) => clip !== orderAudio) ??
|
| 445 |
-
null;
|
| 446 |
-
const recommendationAudio =
|
| 447 |
-
clipByStep(audioClips, 7) ??
|
| 448 |
-
audioClips.find((clip) => clip !== orderAudio && clip !== likedAudio) ??
|
| 449 |
-
null;
|
| 450 |
-
|
| 451 |
const targetSeconds = clamp(renderTargetSeconds, 1, VOICEOVER_RENDER_SAFETY_MAX_SECONDS);
|
| 452 |
-
const narrativeSeconds = Math.max(1, targetSeconds);
|
| 453 |
-
|
| 454 |
-
const desiredOrderSeconds = orderAudio
|
| 455 |
-
? clamp(orderAudio.duration || 3, 0.5, narrativeSeconds)
|
| 456 |
-
: 0;
|
| 457 |
-
const desiredLikedSeconds = likedAudio
|
| 458 |
-
? clamp(likedAudio.duration || 3, 0.5, narrativeSeconds)
|
| 459 |
-
: 0;
|
| 460 |
-
const desiredRecommendationSeconds = recommendationAudio
|
| 461 |
-
? clamp(recommendationAudio.duration || 3, 0.5, narrativeSeconds)
|
| 462 |
-
: 0;
|
| 463 |
-
const desiredVoiceSeconds =
|
| 464 |
-
desiredOrderSeconds + desiredLikedSeconds + desiredRecommendationSeconds;
|
| 465 |
-
const voiceScale =
|
| 466 |
-
desiredVoiceSeconds > narrativeSeconds ? narrativeSeconds / desiredVoiceSeconds : 1;
|
| 467 |
-
const orderSeconds = desiredOrderSeconds * voiceScale;
|
| 468 |
-
const likedSeconds = desiredLikedSeconds * voiceScale;
|
| 469 |
-
const recommendationSeconds = desiredRecommendationSeconds * voiceScale;
|
| 470 |
-
|
| 471 |
-
const segments: VideoSegment[] = [];
|
| 472 |
const audioSegments: AudioSegment[] = [];
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
}
|
| 484 |
-
actualOrderSeconds += addRotatingSegments(
|
| 485 |
-
segments,
|
| 486 |
-
[closeShot, wideShot, actionShot, reactionShot],
|
| 487 |
-
orderSeconds - actualOrderSeconds,
|
| 488 |
-
);
|
| 489 |
-
addAudioSegment(audioSegments, orderAudio, orderSeconds);
|
| 490 |
|
| 491 |
-
|
| 492 |
-
actualLikedSeconds += addRotatingSegments(
|
| 493 |
-
segments,
|
| 494 |
-
[actionShot, closeShot, wideShot, reactionShot],
|
| 495 |
-
likedSeconds - actualLikedSeconds,
|
| 496 |
-
);
|
| 497 |
-
addAudioSegment(audioSegments, likedAudio, likedSeconds);
|
| 498 |
-
|
| 499 |
-
let remainingRecommendationSeconds = recommendationSeconds;
|
| 500 |
-
let actualRecommendationSeconds = 0;
|
| 501 |
-
actualRecommendationSeconds += addSegmentWithBudget(
|
| 502 |
-
segments,
|
| 503 |
-
budgets,
|
| 504 |
-
reactionShot,
|
| 505 |
-
remainingRecommendationSeconds,
|
| 506 |
-
);
|
| 507 |
-
remainingRecommendationSeconds -= actualRecommendationSeconds;
|
| 508 |
-
const wideRecommendationSeconds = addSegmentWithBudget(
|
| 509 |
-
segments,
|
| 510 |
-
budgets,
|
| 511 |
-
wideShot,
|
| 512 |
-
remainingRecommendationSeconds,
|
| 513 |
-
);
|
| 514 |
-
actualRecommendationSeconds += wideRecommendationSeconds;
|
| 515 |
-
remainingRecommendationSeconds -= wideRecommendationSeconds;
|
| 516 |
-
const actionRecommendationSeconds = addSegmentWithBudget(
|
| 517 |
-
segments,
|
| 518 |
-
budgets,
|
| 519 |
-
actionShot,
|
| 520 |
-
remainingRecommendationSeconds,
|
| 521 |
-
);
|
| 522 |
-
actualRecommendationSeconds += actionRecommendationSeconds;
|
| 523 |
-
remainingRecommendationSeconds -= actionRecommendationSeconds;
|
| 524 |
-
actualRecommendationSeconds += addSegmentWithBudget(
|
| 525 |
-
segments,
|
| 526 |
-
budgets,
|
| 527 |
-
closeShot,
|
| 528 |
-
remainingRecommendationSeconds,
|
| 529 |
-
);
|
| 530 |
-
actualRecommendationSeconds += addRotatingSegments(
|
| 531 |
-
segments,
|
| 532 |
-
[reactionShot, wideShot, actionShot, closeShot, ...foodSources],
|
| 533 |
-
recommendationSeconds - actualRecommendationSeconds,
|
| 534 |
-
);
|
| 535 |
-
addAudioSegment(audioSegments, recommendationAudio, recommendationSeconds);
|
| 536 |
|
| 537 |
return {
|
| 538 |
-
videoSegments:
|
| 539 |
audioSegments,
|
| 540 |
};
|
| 541 |
}
|
|
@@ -763,7 +735,6 @@ export async function renderClipsOnServer(input: ServerRenderInput): Promise<Ser
|
|
| 763 |
'96k',
|
| 764 |
'-t',
|
| 765 |
formatDuration(renderTargetSeconds),
|
| 766 |
-
'-shortest',
|
| 767 |
'-movflags',
|
| 768 |
'+faststart',
|
| 769 |
'-avoid_negative_ts',
|
|
@@ -773,7 +744,7 @@ export async function renderClipsOnServer(input: ServerRenderInput): Promise<Ser
|
|
| 773 |
'Server clip render',
|
| 774 |
);
|
| 775 |
|
| 776 |
-
const finalOutput = await
|
| 777 |
const bytes = await readFile(finalOutput.path);
|
| 778 |
if (bytes.length <= 0) throw new Error('Server render produced an empty video.');
|
| 779 |
|
|
|
|
| 25 |
const VIDEO_WIDTH = 1080;
|
| 26 |
const VIDEO_HEIGHT = 1920;
|
| 27 |
const VIDEO_FPS = 24;
|
| 28 |
+
const MAX_VIDEO_CLIP_SECONDS = 10;
|
| 29 |
+
const MAX_AUDIO_CLIP_SECONDS = 10;
|
| 30 |
const FINAL_VIDEO_MAX_SECONDS = 17;
|
| 31 |
+
const VOICEOVER_RENDER_SAFETY_MAX_SECONDS = 60;
|
| 32 |
const STEP_VIDEO_MAX_SECONDS: Record<number, number> = {
|
| 33 |
+
1: 10,
|
| 34 |
+
2: 10,
|
| 35 |
+
3: 10,
|
| 36 |
+
4: 10,
|
| 37 |
};
|
| 38 |
|
| 39 |
type PreparedClip = {
|
|
|
|
| 238 |
}
|
| 239 |
}
|
| 240 |
|
| 241 |
+
async function enforceAudioMasterOutput(inputPath: string, outputPath: string) {
|
| 242 |
const [videoDuration, audioDuration, formatDurationValue] = await Promise.all([
|
| 243 |
probeStreamDuration(inputPath, 'v:0'),
|
| 244 |
probeStreamDuration(inputPath, 'a:0'),
|
| 245 |
probeFormatDuration(inputPath),
|
| 246 |
]);
|
| 247 |
|
| 248 |
+
if (audioDuration <= 0 || videoDuration <= 0) {
|
|
|
|
|
|
|
|
|
|
| 249 |
return {
|
| 250 |
path: inputPath,
|
| 251 |
durationSeconds: Math.max(videoDuration, formatDurationValue),
|
| 252 |
};
|
| 253 |
}
|
| 254 |
|
| 255 |
+
if (videoDuration + 0.08 >= audioDuration && formatDurationValue <= audioDuration + 0.2) {
|
| 256 |
+
return {
|
| 257 |
+
path: inputPath,
|
| 258 |
+
durationSeconds: audioDuration,
|
| 259 |
+
};
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
const videoPadSeconds = Math.max(0, audioDuration - videoDuration);
|
| 263 |
+
const videoFilter =
|
| 264 |
+
videoPadSeconds > 0.08
|
| 265 |
+
? `[0:v]tpad=stop_mode=clone:stop_duration=${formatDuration(
|
| 266 |
+
videoPadSeconds,
|
| 267 |
+
)},trim=duration=${formatDuration(audioDuration)},setpts=PTS-STARTPTS[v]`
|
| 268 |
+
: `[0:v]trim=duration=${formatDuration(audioDuration)},setpts=PTS-STARTPTS[v]`;
|
| 269 |
+
|
| 270 |
await runCommand(
|
| 271 |
'ffmpeg',
|
| 272 |
[
|
| 273 |
'-y',
|
| 274 |
'-i',
|
| 275 |
inputPath,
|
| 276 |
+
'-filter_complex',
|
| 277 |
+
videoFilter,
|
| 278 |
'-map',
|
| 279 |
+
'[v]',
|
| 280 |
'-map',
|
| 281 |
+
'0:a:0',
|
| 282 |
'-c:v',
|
| 283 |
+
'libx264',
|
| 284 |
+
'-preset',
|
| 285 |
+
'ultrafast',
|
| 286 |
+
'-crf',
|
| 287 |
+
'30',
|
| 288 |
+
'-r',
|
| 289 |
+
String(VIDEO_FPS),
|
| 290 |
'-c:a',
|
| 291 |
+
'copy',
|
|
|
|
|
|
|
| 292 |
'-t',
|
| 293 |
+
formatDuration(audioDuration),
|
|
|
|
| 294 |
'-movflags',
|
| 295 |
'+faststart',
|
| 296 |
outputPath,
|
| 297 |
],
|
| 298 |
+
'Final audio-master duration fix',
|
| 299 |
);
|
| 300 |
|
| 301 |
return {
|
| 302 |
path: outputPath,
|
| 303 |
+
durationSeconds: audioDuration,
|
| 304 |
};
|
| 305 |
}
|
| 306 |
|
|
|
|
| 424 |
return segments;
|
| 425 |
}
|
| 426 |
|
| 427 |
+
function sortByStep(clips: PreparedClip[]) {
|
| 428 |
+
return [...clips].sort((a, b) => a.step - b.step);
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
function buildBalancedVideoSegments(videoClips: PreparedClip[], targetSeconds: number) {
|
| 432 |
+
const orderedClips = sortByStep(videoClips);
|
| 433 |
+
const segments: VideoSegment[] = [];
|
| 434 |
+
const budgets = createClipBudgets(orderedClips);
|
| 435 |
+
let remaining = targetSeconds;
|
| 436 |
+
let activeSources = orderedClips.filter((clip) => usableClipDuration(clip) >= 0.25);
|
| 437 |
+
let guard = 0;
|
| 438 |
+
|
| 439 |
+
while (remaining >= 0.25 && activeSources.length > 0 && guard < 20) {
|
| 440 |
+
guard += 1;
|
| 441 |
+
const share = remaining / activeSources.length;
|
| 442 |
+
let addedThisRound = 0;
|
| 443 |
+
|
| 444 |
+
for (const source of activeSources) {
|
| 445 |
+
if (remaining < 0.25) break;
|
| 446 |
+
|
| 447 |
+
const remainingBudget = budgets.get(source.path) ?? usableClipDuration(source);
|
| 448 |
+
if (remainingBudget < 0.25) continue;
|
| 449 |
+
|
| 450 |
+
const requestedSeconds = Math.min(share, remainingBudget, remaining);
|
| 451 |
+
const added = addSegmentWithBudget(segments, budgets, source, requestedSeconds);
|
| 452 |
+
addedThisRound += added;
|
| 453 |
+
remaining -= added;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
if (addedThisRound < 0.25) break;
|
| 457 |
+
activeSources = activeSources.filter((source) => {
|
| 458 |
+
const remainingBudget = budgets.get(source.path) ?? 0;
|
| 459 |
+
return remainingBudget >= 0.25;
|
| 460 |
+
});
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
if (remaining >= 0.25) {
|
| 464 |
+
addRotatingSegments(segments, orderedClips, remaining);
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
return segments;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
function addAudioSegment(
|
| 471 |
segments: AudioSegment[],
|
| 472 |
source: PreparedClip | null,
|
|
|
|
| 490 |
}): VoiceAwarePlan {
|
| 491 |
if (videoClips.length === 0) return { videoSegments: [], audioSegments: [] };
|
| 492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
const targetSeconds = clamp(renderTargetSeconds, 1, VOICEOVER_RENDER_SAFETY_MAX_SECONDS);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
const audioSegments: AudioSegment[] = [];
|
| 495 |
+
let plannedAudioSeconds = 0;
|
| 496 |
+
|
| 497 |
+
for (const audioClip of sortByStep(audioClips)) {
|
| 498 |
+
const remainingTarget = Math.max(0, targetSeconds - plannedAudioSeconds);
|
| 499 |
+
const duration =
|
| 500 |
+
audioClip.duration > 0
|
| 501 |
+
? audioClip.duration
|
| 502 |
+
: Math.min(MAX_AUDIO_CLIP_SECONDS, remainingTarget || MAX_AUDIO_CLIP_SECONDS);
|
| 503 |
+
addAudioSegment(audioSegments, audioClip, duration);
|
| 504 |
+
plannedAudioSeconds += duration;
|
| 505 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
+
const videoSegments = buildBalancedVideoSegments(videoClips, targetSeconds);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
|
| 509 |
return {
|
| 510 |
+
videoSegments: videoSegments.length > 0 ? videoSegments : buildLinearSegments(videoClips),
|
| 511 |
audioSegments,
|
| 512 |
};
|
| 513 |
}
|
|
|
|
| 735 |
'96k',
|
| 736 |
'-t',
|
| 737 |
formatDuration(renderTargetSeconds),
|
|
|
|
| 738 |
'-movflags',
|
| 739 |
'+faststart',
|
| 740 |
'-avoid_negative_ts',
|
|
|
|
| 744 |
'Server clip render',
|
| 745 |
);
|
| 746 |
|
| 747 |
+
const finalOutput = await enforceAudioMasterOutput(outputPath, fixedOutputPath);
|
| 748 |
const bytes = await readFile(finalOutput.path);
|
| 749 |
if (bytes.length <= 0) throw new Error('Server render produced an empty video.');
|
| 750 |
|