""" pipeline/utils.py Shared utilities: data classes, logger, shell runner, ffprobe wrapper. Everything else imports from here — no circular deps. """ import json import subprocess from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Any, Dict, List # ───────────────────────────────────────────── # DATA CLASSES # ───────────────────────────────────────────── @dataclass class Segment: """A highlight segment chosen by the LLM.""" index : int start : float # seconds end : float # seconds reason : str score : float = 0.0 @property def duration(self) -> float: return self.end - self.start def to_dict(self) -> dict: return { "index" : self.index, "start" : self.start, "end" : self.end, "reason": self.reason, "score" : self.score, } @dataclass class ReelOutput: """A finished reel file with metadata.""" index : int path : Path segment : Segment file_size : int = 0 @property def filename(self) -> str: return self.path.name def to_dict(self) -> dict: return { "index" : self.index, "filename" : self.filename, "start_s" : self.segment.start, "end_s" : self.segment.end, "duration_s" : round(self.segment.duration, 2), "reason" : self.segment.reason, "score" : self.segment.score, "file_size_bytes" : self.file_size, } # ───────────────────────────────────────────── # LOGGER # ───────────────────────────────────────────── def log(emoji: str, msg: str): ts = datetime.now().strftime("%H:%M:%S") print(f"[{ts}] {emoji} {msg}", flush=True) # ───────────────────────────────────────────── # SHELL RUNNER # ───────────────────────────────────────────── def run_cmd(cmd: List[str], description: str = "") -> subprocess.CompletedProcess: """Run a shell command; raise a clear RuntimeError on failure.""" if description: log("⚙️", description) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError( f"Command failed: {' '.join(cmd)}\n" f"stderr: {result.stderr[-1000:]}" ) return result # ───────────────────────────────────────────── # FFPROBE # ───────────────────────────────────────────── def probe_video(video_path: Path) -> Dict[str, Any]: """Return duration, width, height for a video file.""" cmd = [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(video_path), ] result = run_cmd(cmd) data = json.loads(result.stdout) fmt = data.get("format", {}) vstream = next( (s for s in data.get("streams", []) if s.get("codec_type") == "video"), {}, ) return { "duration": float(fmt.get("duration", 0)), "width" : int(vstream.get("width", 0)), "height" : int(vstream.get("height", 0)), "fps" : vstream.get("r_frame_rate", "30/1"), } def format_duration(seconds: float) -> str: m, s = divmod(int(seconds), 60) return f"{m:02d}:{s:02d}"