Spaces:
Running
Running
| """ | |
| pipeline/highlights.py | |
| Sends the structured transcript to an LLM via OpenRouter and asks it | |
| to pick the top N highlight segments with precise timestamps. | |
| Uses the OpenAI-compatible OpenRouter API so you can swap models | |
| (Claude, Gemini, Qwen, Llama) just by changing the OPENROUTER_MODEL | |
| env var without touching code. | |
| """ | |
| import json | |
| import os | |
| import re | |
| import textwrap | |
| from typing import Callable, List, Optional | |
| from openai import OpenAI | |
| from utils import Segment, format_duration, log | |
| class HighlightModelError(RuntimeError): | |
| """Raised when all OpenRouter model attempts fail.""" | |
| # ββ OpenRouter client (lazy-initialised) ββββββββββββββββββββββββββββββββββββ | |
| _client: Optional[OpenAI] = None | |
| def _get_client() -> OpenAI: | |
| global _client | |
| if _client is None: | |
| api_key = os.environ.get("OPENROUTER_API_KEY", "") | |
| if not api_key: | |
| raise RuntimeError( | |
| "OPENROUTER_API_KEY env var is not set. " | |
| "Add it to your HF Space Secrets." | |
| ) | |
| _client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key=api_key, | |
| ) | |
| return _client | |
| # ββ Transcript formatter βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_transcript_block(transcript_data: dict, max_sentences: int = 300) -> str: | |
| """ | |
| Format the transcript as a compact text block for the LLM prompt. | |
| Each line: [MM:SS β MM:SS] (SENTIMENT) sentence text | |
| """ | |
| lines = [] | |
| sentences = transcript_data.get("sentences", []) | |
| # Evenly sample if transcript is very long | |
| if len(sentences) > max_sentences: | |
| step = len(sentences) / max_sentences | |
| sentences = [sentences[int(i * step)] for i in range(max_sentences)] | |
| for s in sentences: | |
| start = format_duration(s["start_ms"] / 1000) | |
| end = format_duration(s["end_ms"] / 1000) | |
| sent = s.get("sentiment", "NEUTRAL") | |
| lines.append(f"[{start} β {end}] ({sent}) {s['text']}") | |
| chapters = transcript_data.get("chapters", []) | |
| if chapters: | |
| lines.append("\n--- AUTO-DETECTED CHAPTERS ---") | |
| for c in chapters: | |
| start = format_duration(c["start_ms"] / 1000) | |
| end = format_duration(c["end_ms"] / 1000) | |
| lines.append(f"[{start} β {end}] {c['gist']}: {c['summary']}") | |
| return "\n".join(lines) | |
| # ββ Overlap removal ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _remove_overlaps(segments: List[Segment]) -> List[Segment]: | |
| """Keep highest-scored segment when two overlap.""" | |
| ranked = sorted(segments, key=lambda s: s.score, reverse=True) | |
| accepted: List[Segment] = [] | |
| for seg in ranked: | |
| if not any( | |
| seg.start < a.end and seg.end > a.start | |
| for a in accepted | |
| ): | |
| accepted.append(seg) | |
| return accepted | |
| def _candidate_models() -> List[str]: | |
| """ | |
| Build model fallback chain. | |
| - OPENROUTER_MODEL: primary model | |
| - OPENROUTER_FALLBACK_MODELS: comma-separated fallback list | |
| """ | |
| primary = os.environ.get("OPENROUTER_MODEL") | |
| if primary is None: | |
| primary = "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free" | |
| primary = str(primary) | |
| raw_fallbacks = os.environ.get("OPENROUTER_FALLBACK_MODELS") | |
| if raw_fallbacks is None: | |
| raw_fallbacks = "inclusionai/ling-2.6-1t:free" | |
| raw_fallbacks = str(raw_fallbacks) | |
| # Only one fallback model, no need to strip whitespace | |
| fallbacks = [m for m in raw_fallbacks.split(",") if m] | |
| return list(dict.fromkeys([primary, *fallbacks])) | |
| # ββ Main function ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def detect_highlights( | |
| transcript_data : dict, | |
| video_duration : float, | |
| num_reels : int = 5, | |
| min_duration : float = 10, | |
| max_duration : float = 30, | |
| progress_cb : Optional[Callable[[str, int], None]] = None, | |
| ) -> List[Segment]: | |
| """ | |
| Ask the LLM to identify the most engaging segments. | |
| Returns a sorted, non-overlapping list of Segment objects. | |
| """ | |
| models = _candidate_models() | |
| model = models[0] | |
| transcript_text = _build_transcript_block(transcript_data) | |
| total_dur_str = format_duration(video_duration) | |
| system_prompt = textwrap.dedent(""" | |
| You are an expert video editor and social media strategist specialising | |
| in short-form content for Instagram Reels and YouTube Shorts. | |
| Your task: analyse a video transcript and identify the most compelling | |
| highlight segments that will perform well as vertical reels. | |
| SELECTION CRITERIA (priority order): | |
| 1. Emotional peaks β excitement, humour, surprise, anger, inspiration | |
| 2. Strong hooks β opening lines that immediately grab attention | |
| 3. Punchlines & payoffs β satisfying end of a story or argument | |
| 4. Key insights β the single most important takeaway from a section | |
| 5. High-energy moments β fast speech, emphasis, laughter | |
| STRICT RULES: | |
| - Times must be in SECONDS (decimal, e.g. 45.5) β NOT MM:SS format | |
| - Each segment must start at a sentence boundary | |
| - Segments must NOT overlap | |
| - Segments must NOT start before 0 or end after the video duration | |
| - Prefer segments that open with a strong hook | |
| - Avoid mostly-silent or filler-word segments | |
| - Spread picks across the whole video | |
| OUTPUT FORMAT β respond with ONLY valid JSON, nothing else: | |
| [ | |
| { | |
| "start" : <float seconds>, | |
| "end" : <float seconds>, | |
| "reason" : "<one sentence why this moment is engaging>", | |
| "score" : <float 0.0β10.0> | |
| } | |
| ] | |
| """) | |
| user_prompt = textwrap.dedent(f""" | |
| VIDEO DURATION : {total_dur_str} ({video_duration:.1f} seconds) | |
| REELS NEEDED : {num_reels} | |
| MIN LENGTH : {min_duration} seconds | |
| MAX LENGTH : {max_duration} seconds | |
| TRANSCRIPT: | |
| {transcript_text} | |
| Identify exactly {num_reels} highlight segments. | |
| Respond with pure JSON only β no markdown fences, no explanation. | |
| """) | |
| log("π€", f"Detecting highlights with primary model {model}β¦") | |
| if progress_cb: | |
| progress_cb("detecting_highlights", 10) | |
| response = None | |
| errors: List[str] = [] | |
| raw = None | |
| for attempt, candidate in enumerate(models, start=1): | |
| try: | |
| log("π€", f"LLM attempt {attempt}/{len(models)} using {candidate}") | |
| response = _get_client().chat.completions.create( | |
| model = candidate, | |
| temperature = 0.3, | |
| max_tokens = 1024, | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| ) | |
| raw = response.choices[0].message.content | |
| if raw is None or not isinstance(raw, str): | |
| msg = f"Invalid response content: {raw}" | |
| errors.append(f"{candidate}: {msg}") | |
| log("β οΈ", f"LLM returned None content from {candidate}, trying fallback") | |
| continue | |
| model = candidate | |
| break | |
| except Exception as exc: | |
| msg = str(exc) | |
| errors.append(f"{candidate}: {msg}") | |
| if "No endpoints found" in msg: | |
| log("β οΈ", f"Model unavailable on OpenRouter: {candidate}") | |
| continue | |
| # For non-endpoint errors, still continue to fallback once. | |
| log("β οΈ", f"LLM attempt failed on {candidate}: {msg}") | |
| continue | |
| if raw is None or not isinstance(raw, str): | |
| raise HighlightModelError( | |
| "OpenRouter model selection failed. " | |
| "Set OPENROUTER_MODEL to a currently available model, or configure " | |
| "OPENROUTER_FALLBACK_MODELS. " | |
| f"Tried: {', '.join(models)}. " | |
| f"Reasons: {' | '.join(errors[-3:])}" | |
| ) | |
| # Strip markdown fences if the model added them | |
| raw = re.sub(r"^```(?:json)?\s*", "", raw) | |
| raw = re.sub(r"\s*```$", "", raw) | |
| try: | |
| raw_segs = json.loads(raw) | |
| except json.JSONDecodeError as e: | |
| raise HighlightModelError( | |
| f"LLM ({model}) returned invalid JSON while selecting highlights" | |
| ) from e | |
| if progress_cb: | |
| progress_cb("detecting_highlights", 80) | |
| # ββ Validate, clamp, deduplicate βββββββββββββββββββββββββββββββββββββββββ | |
| segments: List[Segment] = [] | |
| for i, r in enumerate(raw_segs): | |
| start = float(r.get("start", 0)) | |
| end = float(r.get("end", 0)) | |
| start = max(0.0, min(start, video_duration - min_duration)) | |
| end = min(end, video_duration) | |
| if end - start < min_duration: | |
| end = min(start + min_duration, video_duration) | |
| if end - start > max_duration: | |
| end = start + max_duration | |
| if end - start < min_duration / 2: | |
| log("β οΈ", f"Segment {i+1} too short after clamping β skipped") | |
| continue | |
| segments.append(Segment( | |
| index = i + 1, | |
| start = round(start, 2), | |
| end = round(end, 2), | |
| reason = str(r.get("reason", "")), | |
| score = float(r.get("score", 0)), | |
| )) | |
| segments = _remove_overlaps(segments) | |
| segments.sort(key=lambda s: s.start) | |
| log("β ", f"Detected {len(segments)} highlight segments") | |
| for seg in segments: | |
| log(" π", f"#{seg.index} [{seg.start:.1f}sβ{seg.end:.1f}s] " | |
| f"({seg.duration:.0f}s) β {seg.reason}") | |
| if progress_cb: | |
| progress_cb("detecting_highlights", 100) | |
| return segments | |