""" pipeline/highlights.py Sends the structured transcript to an LLM via OpenRouter and asks it to pick the top N highlight segments with precise timestamps. Uses the OpenAI-compatible OpenRouter API so you can swap models (Claude, Gemini, Qwen, Llama) just by changing the OPENROUTER_MODEL env var without touching code. """ import json import os import re import textwrap from typing import Callable, List, Optional from openai import OpenAI from utils import Segment, format_duration, log class HighlightModelError(RuntimeError): """Raised when all OpenRouter model attempts fail.""" # ── OpenRouter client (lazy-initialised) ──────────────────────────────────── _client: Optional[OpenAI] = None def _get_client() -> OpenAI: global _client if _client is None: api_key = os.environ.get("OPENROUTER_API_KEY", "") if not api_key: raise RuntimeError( "OPENROUTER_API_KEY env var is not set. " "Add it to your HF Space Secrets." ) _client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=api_key, ) return _client # ── Transcript formatter ───────────────────────────────────────────────────── def _build_transcript_block(transcript_data: dict, max_sentences: int = 300) -> str: """ Format the transcript as a compact text block for the LLM prompt. Each line: [MM:SS – MM:SS] (SENTIMENT) sentence text """ lines = [] sentences = transcript_data.get("sentences", []) # Evenly sample if transcript is very long if len(sentences) > max_sentences: step = len(sentences) / max_sentences sentences = [sentences[int(i * step)] for i in range(max_sentences)] for s in sentences: start = format_duration(s["start_ms"] / 1000) end = format_duration(s["end_ms"] / 1000) sent = s.get("sentiment", "NEUTRAL") lines.append(f"[{start} – {end}] ({sent}) {s['text']}") chapters = transcript_data.get("chapters", []) if chapters: lines.append("\n--- AUTO-DETECTED CHAPTERS ---") for c in chapters: start = format_duration(c["start_ms"] / 1000) end = format_duration(c["end_ms"] / 1000) lines.append(f"[{start} – {end}] {c['gist']}: {c['summary']}") return "\n".join(lines) # ── Overlap removal ────────────────────────────────────────────────────────── def _remove_overlaps(segments: List[Segment]) -> List[Segment]: """Keep highest-scored segment when two overlap.""" ranked = sorted(segments, key=lambda s: s.score, reverse=True) accepted: List[Segment] = [] for seg in ranked: if not any( seg.start < a.end and seg.end > a.start for a in accepted ): accepted.append(seg) return accepted def _candidate_models() -> List[str]: """ Build model fallback chain. - OPENROUTER_MODEL: primary model - OPENROUTER_FALLBACK_MODELS: comma-separated fallback list """ primary = os.environ.get("OPENROUTER_MODEL") if primary is None: primary = "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free" primary = str(primary) raw_fallbacks = os.environ.get("OPENROUTER_FALLBACK_MODELS") if raw_fallbacks is None: raw_fallbacks = "inclusionai/ling-2.6-1t:free" raw_fallbacks = str(raw_fallbacks) # Only one fallback model, no need to strip whitespace fallbacks = [m for m in raw_fallbacks.split(",") if m] return list(dict.fromkeys([primary, *fallbacks])) # ── Main function ──────────────────────────────────────────────────────────── def detect_highlights( transcript_data : dict, video_duration : float, num_reels : int = 5, min_duration : float = 10, max_duration : float = 30, progress_cb : Optional[Callable[[str, int], None]] = None, ) -> List[Segment]: """ Ask the LLM to identify the most engaging segments. Returns a sorted, non-overlapping list of Segment objects. """ models = _candidate_models() model = models[0] transcript_text = _build_transcript_block(transcript_data) total_dur_str = format_duration(video_duration) system_prompt = textwrap.dedent(""" You are an expert video editor and social media strategist specialising in short-form content for Instagram Reels and YouTube Shorts. Your task: analyse a video transcript and identify the most compelling highlight segments that will perform well as vertical reels. SELECTION CRITERIA (priority order): 1. Emotional peaks — excitement, humour, surprise, anger, inspiration 2. Strong hooks — opening lines that immediately grab attention 3. Punchlines & payoffs — satisfying end of a story or argument 4. Key insights — the single most important takeaway from a section 5. High-energy moments — fast speech, emphasis, laughter STRICT RULES: - Times must be in SECONDS (decimal, e.g. 45.5) — NOT MM:SS format - Each segment must start at a sentence boundary - Segments must NOT overlap - Segments must NOT start before 0 or end after the video duration - Prefer segments that open with a strong hook - Avoid mostly-silent or filler-word segments - Spread picks across the whole video OUTPUT FORMAT — respond with ONLY valid JSON, nothing else: [ { "start" : , "end" : , "reason" : "", "score" : } ] """) user_prompt = textwrap.dedent(f""" VIDEO DURATION : {total_dur_str} ({video_duration:.1f} seconds) REELS NEEDED : {num_reels} MIN LENGTH : {min_duration} seconds MAX LENGTH : {max_duration} seconds TRANSCRIPT: {transcript_text} Identify exactly {num_reels} highlight segments. Respond with pure JSON only — no markdown fences, no explanation. """) log("🤖", f"Detecting highlights with primary model {model}…") if progress_cb: progress_cb("detecting_highlights", 10) response = None errors: List[str] = [] raw = None for attempt, candidate in enumerate(models, start=1): try: log("🤖", f"LLM attempt {attempt}/{len(models)} using {candidate}") response = _get_client().chat.completions.create( model = candidate, temperature = 0.3, max_tokens = 1024, messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], ) raw = response.choices[0].message.content if raw is None or not isinstance(raw, str): msg = f"Invalid response content: {raw}" errors.append(f"{candidate}: {msg}") log("⚠️", f"LLM returned None content from {candidate}, trying fallback") continue model = candidate break except Exception as exc: msg = str(exc) errors.append(f"{candidate}: {msg}") if "No endpoints found" in msg: log("⚠️", f"Model unavailable on OpenRouter: {candidate}") continue # For non-endpoint errors, still continue to fallback once. log("⚠️", f"LLM attempt failed on {candidate}: {msg}") continue if raw is None or not isinstance(raw, str): raise HighlightModelError( "OpenRouter model selection failed. " "Set OPENROUTER_MODEL to a currently available model, or configure " "OPENROUTER_FALLBACK_MODELS. " f"Tried: {', '.join(models)}. " f"Reasons: {' | '.join(errors[-3:])}" ) # Strip markdown fences if the model added them raw = re.sub(r"^```(?:json)?\s*", "", raw) raw = re.sub(r"\s*```$", "", raw) try: raw_segs = json.loads(raw) except json.JSONDecodeError as e: raise HighlightModelError( f"LLM ({model}) returned invalid JSON while selecting highlights" ) from e if progress_cb: progress_cb("detecting_highlights", 80) # ── Validate, clamp, deduplicate ───────────────────────────────────────── segments: List[Segment] = [] for i, r in enumerate(raw_segs): start = float(r.get("start", 0)) end = float(r.get("end", 0)) start = max(0.0, min(start, video_duration - min_duration)) end = min(end, video_duration) if end - start < min_duration: end = min(start + min_duration, video_duration) if end - start > max_duration: end = start + max_duration if end - start < min_duration / 2: log("⚠️", f"Segment {i+1} too short after clamping — skipped") continue segments.append(Segment( index = i + 1, start = round(start, 2), end = round(end, 2), reason = str(r.get("reason", "")), score = float(r.get("score", 0)), )) segments = _remove_overlaps(segments) segments.sort(key=lambda s: s.start) log("✅", f"Detected {len(segments)} highlight segments") for seg in segments: log(" 📍", f"#{seg.index} [{seg.start:.1f}s–{seg.end:.1f}s] " f"({seg.duration:.0f}s) — {seg.reason}") if progress_cb: progress_cb("detecting_highlights", 100) return segments