Spaces:

moonlantern1
/

clipforge

Sleeping

File size: 32,091 Bytes

"""End-to-end product pipeline."""

import dataclasses
import json
import logging
import re
from pathlib import Path

from humeo_core.primitives.ingest import extract_keyframes
from humeo_core.schemas import Clip, LayoutInstruction, LayoutKind, RatingFeedback, RenderTheme, Scene

from humeo import interactive, session_state
from humeo.clip_assembly import apply_render_spans, assemble_clip, write_clip_plan
from humeo.clip_selection_cache import cache_valid, load_meta, transcript_fingerprint, write_artifacts
from humeo.clip_selector import (
    clip_quality_priority_score,
    load_clips,
    renumber_clips_dense,
    save_clips,
    select_clips,
)
from humeo.config import MAX_CLIP_DURATION_SEC, MIN_CLIP_DURATION_SEC, PipelineConfig
from humeo.content_pruning import run_content_pruning_stage, snap_render_windows_to_sentence_boundaries
from humeo.cutter import generate_ass
from humeo.hook_detector import run_hook_detection_stage
from humeo.hook_library import resolve_hook_library_path
from humeo.ingest import (
    download_video,
    extract_audio,
    stage_local_video,
    transcript_cache_valid,
    transcribe_whisperx,
)
from humeo.layout_vision import run_layout_vision_stage
from humeo.render_qa import qa_record_flags, run_render_qa
from humeo.render_window import clip_for_render
from humeo.reframe_ffmpeg import reframe_clip_ffmpeg
from humeo.transcript_align import clip_subtitle_words, group_words_to_cue_chunks
from humeo.video_cache import (
    extract_youtube_video_id,
    ingest_complete,
    normalize_local_source_path,
    read_youtube_info_json,
    resolve_work_directory,
    upsert_manifest_from_info,
)

logger = logging.getLogger(__name__)

_WEAK_HOOK_START_WORDS = {
    "actually",
    "basically",
    "honestly",
    "look",
    "listen",
    "okay",
    "ok",
    "right",
    "so",
    "well",
    "yeah",
}
_WEAK_HOOK_START_PHRASES = {"i mean", "kind of", "sort of", "you know"}
_STRONG_HOOK_LATEST_START_SEC = 6.0
_FINAL_QUALITY_THRESHOLD = 0.68
_NATIVE_HIGHLIGHT_CHART_DOMINANCE_Y2 = 0.68
_NATIVE_HIGHLIGHT_MIN_PERSON_WIDTH = 0.42
_NATIVE_HIGHLIGHT_MAX_TOP_ANCHORED_PERSON_Y1 = 0.12
_NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM = 1.20
_PRESENTATION_REFERENCE_RE = re.compile(
    r"\b("
    r"as you can(?: also)? see|you can(?: also)? see|what you can(?: also)? see|look at|take a look|shown here|"
    r"shown on|on the screen|on this slide|this chart|the chart|this graph|"
    r"the graph|this slide|this matrix|the matrix|red line|yellow line|"
    r"blue line|green line|top there|bottom there|x-axis|y-axis"
    r")\b",
    flags=re.IGNORECASE,
)


def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
    updates = {
        "layout": LayoutKind.SIT_CENTER,
        "zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
        "person_tracking": [],
        "split_chart_region": None,
        "split_person_region": None,
        "split_second_chart_region": None,
        "split_second_person_region": None,
        "chart_x_norm": 0.0,
        "top_band_ratio": 0.5,
    }
    if instruction.split_person_region is not None:
        updates["person_x_norm"] = float(instruction.split_person_region.center_x)
    return instruction.model_copy(
        update=updates
    )


def _rerun_config(config: PipelineConfig, steering_notes: list[str]) -> PipelineConfig:
    return dataclasses.replace(
        config,
        steering_notes=list(steering_notes),
        force_clip_selection=True,
        overwrite_outputs=True,
    )


def _build_steering_from_feedback(feedback: RatingFeedback) -> str:
    parts: list[str] = []
    if "wrong_moments" in feedback.issues:
        parts.append("Previous selection picked the wrong moments. Reselect with different candidates.")
    if "bad_cuts" in feedback.issues:
        parts.append(
            "Clip boundaries were bad. Prefer clips starting on clean sentence beginnings and ending on completed thoughts."
        )
    if "boring" in feedback.issues:
        parts.append("Previous selection lacked energy. Bias strongly toward high-emotion, high-hook moments.")
    if "confusing" in feedback.issues:
        parts.append("Previous clips needed too much context. Pick moments that make sense standalone.")
    if "wrong_layout" in feedback.issues:
        logger.warning("Received wrong_layout feedback, but layout overrides are not available until Gate 2 ships.")
    if "length_off" in feedback.issues:
        parts.append("Clip durations felt off. Respect the duration bounds strictly.")
    if "other" in feedback.issues and feedback.free_text:
        parts.append(feedback.free_text)
    return " ".join(parts).strip()


def _ensure_work_dir(config: PipelineConfig) -> None:
    """Resolve ``config.work_dir`` when unset (per-video cache) or ensure it exists."""
    if config.work_dir is not None:
        return
    config.work_dir = resolve_work_directory(
        youtube_url=config.youtube_url,
        explicit_work_dir=None,
        use_video_cache=config.use_video_cache,
        cache_root=config.cache_root,
    )


def _filter_render_valid_clips(clips: list, *, stage_label: str) -> list:
    """Drop clips whose actual render window violates the duration contract."""
    valid: list = []
    dropped = 0
    for clip in clips:
        render_clip = clip_for_render(clip)
        render_duration = render_clip.duration_sec
        if MIN_CLIP_DURATION_SEC <= render_duration <= MAX_CLIP_DURATION_SEC:
            valid.append(clip)
            continue
        dropped += 1
        logger.warning(
            "%s: dropping clip %s because render-window duration %.1fs is outside [%ds, %ds] "
            "(trim_start=%.1fs trim_end=%.1fs).",
            stage_label,
            clip.clip_id,
            render_duration,
            MIN_CLIP_DURATION_SEC,
            MAX_CLIP_DURATION_SEC,
            clip.trim_start_sec,
            clip.trim_end_sec,
        )
    if dropped:
        logger.warning("%s: dropped %d invalid render-window clip(s).", stage_label, dropped)
    return valid


def _hook_window_text(clip, transcript: dict) -> str:
    if clip.hook_start_sec is None or clip.hook_end_sec is None:
        return ""
    abs_start = clip.start_time_sec + clip.hook_start_sec
    abs_end = clip.start_time_sec + clip.hook_end_sec
    parts: list[str] = []
    for seg in transcript.get("segments", []) or []:
        start = float(seg.get("start", 0.0))
        end = float(seg.get("end", start))
        if end <= abs_start or start >= abs_end:
            continue
        text = str(seg.get("text", "")).strip()
        if text:
            parts.append(text)
    return " ".join(parts).strip()


def _filter_weak_hook_clips(clips: list, transcript: dict, *, min_kept: int) -> list:
    if len(clips) <= min_kept:
        return clips
    kept: list = []
    dropped: list[str] = []
    for clip in clips:
        hook_start = clip.hook_start_sec
        if (
            hook_start is not None
            and hook_start > _STRONG_HOOK_LATEST_START_SEC
            and len(clips) - len(dropped) > min_kept
        ):
            dropped.append(
                f"{clip.clip_id} (hook starts at {hook_start:.1f}s; target <= {_STRONG_HOOK_LATEST_START_SEC:.1f}s)"
            )
            continue
        hook_text = _hook_window_text(clip, transcript).lower()
        first_words = [word.strip(".,!?;:'\"()[]{}") for word in hook_text.split()]
        first_words = [word for word in first_words if word]
        first_word = first_words[0] if first_words else ""
        first_phrase = " ".join(first_words[:2])
        if (
            first_word in _WEAK_HOOK_START_WORDS or first_phrase in _WEAK_HOOK_START_PHRASES
        ) and len(clips) - len(dropped) > min_kept:
            weak_text = first_phrase if first_phrase in _WEAK_HOOK_START_PHRASES else first_word
            dropped.append(f"{clip.clip_id} (weak opener: {weak_text})")
            continue
        kept.append(clip)
    if dropped:
        logger.info("Dropped %d weak-hook clip(s): %s", len(dropped), ", ".join(dropped))
    return kept


def _caption_chunk_penalty(clip, transcript: dict, *, render_theme) -> float:
    words = clip_subtitle_words(transcript, clip).words
    if not words:
        return 0.08

    if str(render_theme) == "native_highlight":
        cue_words = 6
        cue_sec = 2.4
        prefer_break_on_punctuation = True
        min_words_before_break = 4
    elif str(render_theme) == "reference_lower_third":
        cue_words = 10
        cue_sec = 2.8
        prefer_break_on_punctuation = True
        min_words_before_break = 5
    else:
        cue_words = 10
        cue_sec = 2.8
        prefer_break_on_punctuation = False
        min_words_before_break = 1

    cue_chunks = group_words_to_cue_chunks(
        words,
        max_words_per_cue=cue_words,
        max_cue_sec=cue_sec,
        prefer_break_on_punctuation=prefer_break_on_punctuation,
        min_words_before_break=min_words_before_break,
    )
    penalty = 0.0
    for chunk in cue_chunks:
        duration = chunk[-1].end_time - chunk[0].start_time
        if len(chunk) == 1 and len(cue_chunks) > 1:
            penalty += 0.04
        if len(chunk) >= cue_words and duration < 0.65:
            penalty += 0.04
        if duration > cue_sec + 0.35:
            penalty += 0.03
    return min(0.18, penalty)


def _filter_low_quality_clips(clips: list, transcript: dict, *, min_kept: int, render_theme) -> list:
    if len(clips) <= min_kept:
        return renumber_clips_dense(clips)

    ranked: list[tuple[float, object, float]] = []
    for clip in clips:
        render_clip = clip_for_render(clip)
        caption_penalty = _caption_chunk_penalty(render_clip, transcript, render_theme=render_theme)
        score = clip_quality_priority_score(clip) - caption_penalty
        ranked.append((score, clip, caption_penalty))

    ranked.sort(key=lambda item: item[0], reverse=True)
    kept = [clip for score, clip, _ in ranked if score >= _FINAL_QUALITY_THRESHOLD]
    if len(kept) < min_kept:
        kept = [clip for _score, clip, _penalty in ranked[:min_kept]]

    dropped = [
        f"{clip.clip_id} (score={score:.2f}, caption_penalty={caption_penalty:.2f})"
        for score, clip, caption_penalty in ranked
        if clip not in kept
    ]
    if dropped:
        logger.info(
            "Dropped %d low-quality clip(s) after pruning: %s",
            len(dropped),
            ", ".join(dropped),
        )
    return renumber_clips_dense(kept)


def _clip_references_presentation(clip) -> bool:
    text_parts = [
        getattr(clip, "viral_hook", ""),
        getattr(clip, "transcript", ""),
        getattr(clip, "suggested_overlay_title", ""),
        getattr(clip, "topic", ""),
    ]
    text = " ".join(str(part or "") for part in text_parts)
    return bool(_PRESENTATION_REFERENCE_RE.search(text))


def _normalize_layout_for_render(
    instruction: LayoutInstruction,
    *,
    render_theme: RenderTheme,
    clip=None,
) -> LayoutInstruction:
    if render_theme != RenderTheme.NATIVE_HIGHLIGHT:
        return instruction
    if instruction.layout != LayoutKind.SPLIT_CHART_PERSON:
        return instruction
    if clip is None or not _clip_references_presentation(clip):
        return _split_chart_person_to_center(instruction)
    chart = instruction.split_chart_region
    person = instruction.split_person_region
    if chart is None or person is None:
        return _split_chart_person_to_center(instruction)
    return instruction


def _load_layout_raw_by_clip(work_dir: Path) -> dict[str, dict]:
    path = work_dir / "layout_vision.json"
    if not path.is_file():
        return {}
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except Exception as exc:  # noqa: BLE001 - optional QA metadata
        logger.warning("Could not read layout raw metadata for QA: %s", exc)
        return {}
    clips = payload.get("clips", {})
    if not isinstance(clips, dict):
        return {}
    out: dict[str, dict] = {}
    for clip_id, item in clips.items():
        if isinstance(item, dict) and isinstance(item.get("raw"), dict):
            out[str(clip_id)] = item["raw"]
    return out


def _normalize_rerender_clip_id(raw: str) -> str:
    text = str(raw).strip()
    match = re.search(r"(\d+)$", text)
    if match:
        return f"{int(match.group(1)):03d}"
    return text


def _warned_clip_ids_from_qa(output_dir: Path) -> set[str]:
    manifest_path = output_dir / "render_qa" / "qa_manifest.json"
    if not manifest_path.is_file():
        return set()
    try:
        payload = json.loads(manifest_path.read_text(encoding="utf-8"))
    except Exception as exc:  # noqa: BLE001 - stale QA should not block renders
        logger.warning("Could not read QA manifest for warned-only rerender: %s", exc)
        return set()

    warned: set[str] = set()
    for record in payload.get("shorts", []):
        if not isinstance(record, dict):
            continue
        clip_id = record.get("clip_id")
        if clip_id and qa_record_flags(record):
            warned.add(_normalize_rerender_clip_id(str(clip_id)))
    return warned


def _load_layout_instruction_cache(work_dir: Path) -> dict[str, LayoutInstruction]:
    path = work_dir / "layout_vision.json"
    if not path.is_file():
        return {}
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except Exception as exc:  # noqa: BLE001 - cache fallback
        logger.warning("Could not read cached layout instructions: %s", exc)
        return {}
    clips = payload.get("clips", {})
    if not isinstance(clips, dict):
        return {}
    out: dict[str, LayoutInstruction] = {}
    for clip_id, item in clips.items():
        if not isinstance(item, dict) or "instruction" not in item:
            continue
        try:
            out[str(clip_id)] = LayoutInstruction.model_validate(item["instruction"])
        except Exception as exc:  # noqa: BLE001
            logger.warning("Ignoring invalid cached layout for clip %s: %s", clip_id, exc)
    return out


def run_pipeline(config: PipelineConfig) -> list[Path]:
    """

    Execute the full podcast-to-shorts pipeline.



    Args:

        config: Pipeline configuration.



    Returns:

        List of paths to the final short-form MP4 files.

    """
    logger.info("=" * 60)
    logger.info("HUMEO PIPELINE START")
    logger.info("Source: %s", config.youtube_url)
    logger.info("Output: %s", config.output_dir)
    logger.info("=" * 60)

    _ensure_work_dir(config)
    assert config.work_dir is not None

    state = None
    if config.interactive:
        state = session_state.load_state(config.work_dir, config.youtube_url)
        if config.steering_notes:
            if list(config.steering_notes) != state.steering_notes:
                state.steering_notes = list(config.steering_notes)
                session_state.save_state(config.work_dir, state)
        elif state.steering_notes:
            config = dataclasses.replace(
                config,
                steering_notes=list(state.steering_notes),
                force_clip_selection=True,
                overwrite_outputs=True,
            )
            logger.info(
                "Loaded %d steering note(s) from session state for this source.",
                len(state.steering_notes),
            )

    # ------------------------------------------------------------------
    # Stage 1: Ingest
    # ------------------------------------------------------------------
    logger.info("--- STAGE 1: INGESTION ---")

    source_video = config.work_dir / "source.mp4"
    transcript_path = config.work_dir / "transcript.json"
    local_source_path = normalize_local_source_path(config.youtube_url)
    reuse_ingest = ingest_complete(config.work_dir, config.youtube_url)

    if reuse_ingest:
        logger.info("Cached ingest found for this source (reusing source + transcript).")
    elif local_source_path is not None:
        source_video = stage_local_video(local_source_path, config.work_dir)
    elif source_video.exists():
        logger.info("Source video already downloaded, skipping download.")
    else:
        source_video = download_video(config.youtube_url, config.work_dir)

    transcript_reusable = transcript_cache_valid(config.work_dir)
    if reuse_ingest and transcript_reusable:
        logger.info("Transcript already exists, loading.")
        with open(transcript_path, "r", encoding="utf-8") as f:
            transcript = json.load(f)
    elif transcript_reusable and local_source_path is None:
        logger.info("Transcript already exists, loading.")
        with open(transcript_path, "r", encoding="utf-8") as f:
            transcript = json.load(f)
    else:
        if transcript_path.exists():
            logger.info("Transcript cache mismatch for current transcription settings; regenerating.")
        audio_path = extract_audio(source_video, config.work_dir)
        transcript = transcribe_whisperx(audio_path, config.work_dir)

    if local_source_path is None:
        vid = extract_youtube_video_id(config.youtube_url)
        info = read_youtube_info_json(config.work_dir)
        if not info and vid:
            info = {"id": vid, "webpage_url": config.youtube_url}
        if info:
            upsert_manifest_from_info(
                work_dir=config.work_dir,
                youtube_url=config.youtube_url,
                info=info,
                cache_root=config.cache_root,
            )

    # ------------------------------------------------------------------
    # Stage 2: Clip Selection
    # ------------------------------------------------------------------
    logger.info("--- STAGE 2: CLIP SELECTION ---")

    clips_path = config.work_dir / "clips.json"
    fp = transcript_fingerprint(transcript)
    meta = load_meta(config.work_dir)
    cache_hit = (
        clips_path.is_file()
        and not config.force_clip_selection
        and meta is not None
        and cache_valid(meta, fp, config)
    )

    if cache_hit:
        clips = load_clips(clips_path)
        logger.info("Clip selection cache hit (transcript + provider/model unchanged); skipping LLM.")
    else:
        clips, raw = select_clips(
            transcript,
            gemini_model=config.gemini_model,
            hook_library_path=resolve_hook_library_path(config),
            candidate_count=config.clip_selection_candidate_count,
            quality_threshold=config.clip_selection_quality_threshold,
            min_kept=config.clip_selection_min_kept,
            max_kept=config.clip_selection_max_kept,
            steering_notes=config.steering_notes,
        )
        save_clips(clips, clips_path)
        write_artifacts(
            config.work_dir,
            transcript=transcript,
            config=config,
            raw_response=raw,
        )

    logger.info("Selected %d clips:", len(clips))
    for clip in clips:
        logger.info(
            "  [%s] %.1fs-%.1fs (%.1fs) score=%.2f - %s",
            clip.clip_id,
            clip.start_time_sec,
            clip.end_time_sec,
            clip.duration_sec,
            clip.virality_score,
            clip.topic,
        )

    # ------------------------------------------------------------------
    # Stage 2.25: Hook Detection
    # ------------------------------------------------------------------
    # The clip selector is unreliable at localising the hook sentence and
    # tends to return the 0.0-3.0s placeholder verbatim, which would disable
    # start-trim in Stage 2.5. This stage asks Gemini to localise the real
    # hook per clip so Stage 2.5 can clamp against a real window.
    logger.info("--- STAGE 2.25: HOOK DETECTION (enabled=%s) ---", config.detect_hooks)
    clips = run_hook_detection_stage(
        config.work_dir,
        clips,
        transcript,
        transcript_fp=fp,
        config=config,
    )
    clips = _filter_weak_hook_clips(
        clips,
        transcript,
        min_kept=config.clip_selection_min_kept,
    )

    # ------------------------------------------------------------------
    # Stage 2.5: Content Pruning (HIVE-style inner-clip tightening)
    # ------------------------------------------------------------------
    # Tightens each candidate window by writing trim_start_sec / trim_end_sec
    # on the Clip models. keyframe extraction and layout vision below both
    # consume ``clip_for_render(clip)`` so they automatically operate on the
    # pruned window without further changes.
    logger.info("--- STAGE 2.5: CONTENT PRUNING (level=%s) ---", config.prune_level)
    clips = run_content_pruning_stage(
        config.work_dir,
        clips,
        transcript,
        transcript_fp=fp,
        config=config,
    )
    clips = snap_render_windows_to_sentence_boundaries(clips, transcript)
    clips = _filter_render_valid_clips(clips, stage_label="Stage 2.5 guardrail")
    clips = _filter_low_quality_clips(
        clips,
        transcript,
        min_kept=config.clip_selection_min_kept,
        render_theme=config.render_theme,
    )

    rerender_target_ids = {
        _normalize_rerender_clip_id(clip_id)
        for clip_id in config.rerender_clip_ids
    }
    if config.rerender_warned_only:
        rerender_target_ids.update(_warned_clip_ids_from_qa(config.output_dir))
    if rerender_target_ids:
        before_count = len(clips)
        clips = [clip for clip in clips if clip.clip_id in rerender_target_ids]
        missing = sorted(rerender_target_ids - {clip.clip_id for clip in clips})
        logger.info(
            "Rerender target filter: keeping %d / %d clip(s): %s",
            len(clips),
            before_count,
            ", ".join(clip.clip_id for clip in clips) or "(none)",
        )
        if missing:
            logger.warning("Requested rerender clip id(s) not found: %s", ", ".join(missing))
        if not clips:
            logger.warning("No clips matched rerender target filter; nothing to render.")
            return []

    # ------------------------------------------------------------------
    # Stage 2.75: Hard-cut assembly
    # ------------------------------------------------------------------
    logger.info("--- STAGE 2.75: CLIP ASSEMBLY ---")
    clips = apply_render_spans(clips, transcript)
    assembled_dir = config.work_dir / "assembled"
    assembled_by_id = {
        clip.clip_id: assemble_clip(source_video, clip, transcript, assembled_dir)
        for clip in clips
    }
    clips = [assembled_by_id[clip.clip_id].clip for clip in clips]
    assembled_clips_path = write_clip_plan(config.work_dir / "assembled_clips.json", clips)

    if config.interactive and state is not None:
        result = interactive.approve_clips(clips)
        if result.action == "quit":
            logger.info("Aborted by user at Gate 1.")
            return []
        if result.action == "refine":
            state.iteration += 1
            if result.steering_note:
                state.steering_notes.append(result.steering_note)
            state.last_selected_ids = None
            session_state.save_state(config.work_dir, state)
            if state.iteration >= config.max_iterations:
                logger.warning("Iteration cap hit. Proceeding with current clips.")
            else:
                return run_pipeline(_rerun_config(config, state.steering_notes))
        elif result.action == "proceed":
            selected_ids = list(result.selected_ids or [])
            state.last_selected_ids = selected_ids
            session_state.save_state(config.work_dir, state)
            clip_by_id = {clip.clip_id: clip for clip in clips}
            clips = [clip_by_id[clip_id] for clip_id in selected_ids]
        elif result.action == "accept_all":
            state.last_selected_ids = [clip.clip_id for clip in clips]
            session_state.save_state(config.work_dir, state)

    # ------------------------------------------------------------------
    # Stage 3: Clip layouts
    # ------------------------------------------------------------------
    logger.info("--- STAGE 3: CLIP LAYOUTS ---")

    keyframes_dir = config.work_dir / "keyframes"
    clip_scenes: list[Scene] = []
    source_videos_by_scene: dict[str, Path] = {}
    for clip in clips:
        assembled = assembled_by_id[clip.clip_id]
        rw = clip_for_render(clip)
        clip_scenes.append(
            Scene(scene_id=clip.clip_id, start_time=rw.start_time_sec, end_time=rw.end_time_sec)
        )
        source_videos_by_scene[clip.clip_id] = assembled.source_path

    layout_instructions: dict[str, LayoutInstruction] = {}
    if rerender_target_ids:
        cached_layouts = _load_layout_instruction_cache(config.work_dir)
        if all(clip.clip_id in cached_layouts for clip in clips):
            layout_instructions = {
                clip.clip_id: cached_layouts[clip.clip_id]
                for clip in clips
            }
            logger.info(
                "Using cached layout instructions for rerender target(s): %s",
                ", ".join(layout_instructions),
            )

    if not layout_instructions:
        extracted_scenes: list[Scene] = []
        for scene in clip_scenes:
            extracted_scenes.extend(
                extract_keyframes(
                    str(source_videos_by_scene[scene.scene_id]),
                    [scene],
                    str(keyframes_dir / scene.scene_id),
                )
            )
        clip_scenes = extracted_scenes
        layout_instructions = run_layout_vision_stage(
            config.work_dir,
            clip_scenes,
            source_video=source_video,
            source_videos_by_scene=source_videos_by_scene,
            transcript_fp=fp,
            clips_path=assembled_clips_path,
            config=config,
        )

    # ------------------------------------------------------------------
    # Stage 4: Render
    # ------------------------------------------------------------------
    logger.info("--- STAGE 4: RENDER ---")

    final_outputs: list[Path] = []
    render_clips_by_id: dict[str, Clip] = {}
    render_transcripts_by_id: dict[str, dict] = {}
    render_layouts_by_id: dict[str, LayoutInstruction] = {}
    render_sources_by_id: dict[str, Path] = {}
    subtitles_dir = config.work_dir / "subtitles"
    subtitles_dir.mkdir(parents=True, exist_ok=True)

    for clip in clips:
        assembled = assembled_by_id[clip.clip_id]
        instr = layout_instructions.get(clip.clip_id)
        if instr is None:
            hint = clip.layout_hint or LayoutKind.SIT_CENTER
            instr = LayoutInstruction(clip_id=clip.clip_id, layout=hint)
        instr = _normalize_layout_for_render(instr, render_theme=config.render_theme, clip=clip)
        clip.layout = instr.layout
        rclip = clip_for_render(clip)
        render_clips_by_id[clip.clip_id] = rclip
        render_transcripts_by_id[clip.clip_id] = assembled.transcript
        render_layouts_by_id[clip.clip_id] = instr
        render_sources_by_id[clip.clip_id] = assembled.source_path
        subtitle_path = None
        if config.burn_subtitles:
            # ASS (not SRT) so the caption file's PlayResY matches the output
            # resolution and libass' font/margin scaling is 1:1.
            subtitle_path = generate_ass(
                rclip,
                assembled.transcript,
                subtitles_dir,
                max_words_per_cue=config.subtitle_max_words_per_cue,
                max_cue_sec=config.subtitle_max_cue_sec,
                play_res_x=1080,
                play_res_y=1920,
                font_size=config.subtitle_font_size,
                margin_v=config.subtitle_margin_v,
                render_theme=config.render_theme,
                native_highlight_lead_sec=config.subtitle_highlight_lead_sec,
                native_highlight_min_dwell_sec=config.subtitle_highlight_min_dwell_sec,
                repair_word_timings=config.repair_subtitle_word_timings,
            )
        else:
            logger.info("Clip %s: subtitle burn disabled for this run.", clip.clip_id)
        final_path = config.output_dir / f"short_{clip.clip_id}.mp4"
        should_overwrite_clip = config.overwrite_outputs or clip.clip_id in rerender_target_ids
        if final_path.exists() and not should_overwrite_clip:
            logger.info("Clip %s already rendered, skipping.", clip.clip_id)
            final_outputs.append(final_path)
            continue
        if final_path.exists() and should_overwrite_clip:
            logger.info("Clip %s exists; overwriting for this render pass.", clip.clip_id)

        # Font size and margin are already baked into the ASS file at
        # PlayResY=1920, so the compile primitive does not need to override
        # them -- but it still does, harmlessly, for single-source overrides.
        reframe_clip_ffmpeg(
            input_path=assembled.source_path,
            output_path=final_path,
            clip=rclip,
            layout_instruction=instr,
            subtitle_path=subtitle_path,
            subtitle_font_size=config.subtitle_font_size,
            subtitle_margin_v=config.subtitle_margin_v,
            title_text=clip.suggested_overlay_title,
            render_theme=config.render_theme,
        )
        final_outputs.append(final_path)

    if config.render_qa and final_outputs:
        logger.info("--- STAGE 4.5: RENDER QA ---")
        try:
            run_render_qa(
                output_dir=config.output_dir,
                final_outputs=final_outputs,
                render_clips_by_id=render_clips_by_id,
                transcripts_by_id=render_transcripts_by_id,
                layouts_by_id=render_layouts_by_id,
                assembled_sources_by_id=render_sources_by_id,
                raw_layouts_by_id=_load_layout_raw_by_clip(config.work_dir),
                reference_video=config.qa_reference_video,
                debug_overlay=config.qa_debug_overlay,
            )
        except Exception as exc:  # noqa: BLE001 - QA must not fail delivery
            logger.warning("Render QA failed, leaving rendered shorts intact: %s", exc)

    # ------------------------------------------------------------------
    # Done
    # ------------------------------------------------------------------
    logger.info("=" * 60)
    logger.info("PIPELINE COMPLETE - %d shorts generated:", len(final_outputs))
    for p in final_outputs:
        logger.info("  -> %s", p)
    logger.info("=" * 60)

    if config.interactive and final_outputs and state is not None:
        feedback = interactive.rate_output(final_outputs)
        state.last_rating = feedback
        session_state.save_state(config.work_dir, state)
        if feedback.rating == 3:
            logger.info("Rated Great. Shipped.")
            return final_outputs

        steering = _build_steering_from_feedback(feedback)
        if not steering:
            logger.warning("Interactive feedback recorded, but it is not actionable until a later gate ships.")
            return final_outputs

        state.iteration += 1
        state.steering_notes.append(steering)
        session_state.save_state(config.work_dir, state)
        if state.iteration >= config.max_iterations:
            logger.warning("Iteration cap hit. Source may not have a strong short.")
            return final_outputs
        return run_pipeline(_rerun_config(config, state.steering_notes))

    return final_outputs