Spaces:
Sleeping
Sleeping
| """Subtitle helpers for the product pipeline.""" | |
| import logging | |
| import math | |
| import os | |
| import re | |
| from pathlib import Path | |
| from humeo_core.schemas import Clip, RenderTheme, TranscriptWord | |
| from humeo.transcript_align import ( | |
| clip_subtitle_words, | |
| clip_words_to_srt_lines, | |
| format_ass, | |
| format_srt, | |
| group_words_to_cue_chunks, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| _NATIVE_HIGHLIGHT_FONT_NAME = "League Spartan" | |
| _NATIVE_HIGHLIGHT_PURPLE = "&H00F65C8B" | |
| _NATIVE_HIGHLIGHT_LEAD_SEC = 0.06 | |
| _NATIVE_HIGHLIGHT_MIN_DWELL_SEC = 0.16 | |
| _NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC = 0.035 | |
| _NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC = 1.65 | |
| _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO = 0.62 | |
| _NATIVE_HIGHLIGHT_SAFE_MARGIN_X = 150 | |
| _NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE = r"\blur3.0" | |
| _NATIVE_HIGHLIGHT_STOPWORDS = { | |
| "a", | |
| "all", | |
| "an", | |
| "and", | |
| "are", | |
| "as", | |
| "at", | |
| "be", | |
| "but", | |
| "by", | |
| "for", | |
| "from", | |
| "i", | |
| "if", | |
| "in", | |
| "is", | |
| "it", | |
| "of", | |
| "on", | |
| "or", | |
| "so", | |
| "that", | |
| "the", | |
| "their", | |
| "there", | |
| "they", | |
| "this", | |
| "to", | |
| "was", | |
| "we", | |
| "with", | |
| "you", | |
| "your", | |
| "has", | |
| "have", | |
| "had", | |
| "been", | |
| "being", | |
| } | |
| def _balance_reference_caption(text: str) -> str: | |
| words = text.split() | |
| if len(words) <= 5 and len(text) <= 28: | |
| return text | |
| best_idx = 1 | |
| best_delta = 10**9 | |
| for idx in range(1, len(words)): | |
| left = " ".join(words[:idx]) | |
| right = " ".join(words[idx:]) | |
| line_penalty = 0 | |
| if len(words[:idx]) < 2 or len(words[idx:]) < 2: | |
| line_penalty += 1000 | |
| delta = abs(len(left) - len(right)) + abs(len(words[:idx]) - len(words[idx:])) * 6 + line_penalty | |
| if delta < best_delta: | |
| best_delta = delta | |
| best_idx = idx | |
| return " ".join(words[:best_idx]) + "\n" + " ".join(words[best_idx:]) | |
| def _native_line_width(font, words) -> float: | |
| return _text_width(font, " ".join(word.word for word in words)) | |
| def _native_highlight_partition_penalty(lines, font, max_line_width: float) -> float: | |
| widths = [_native_line_width(font, line) for line in lines] | |
| overflow = sum(max(0.0, width - max_line_width) for width in widths) | |
| word_counts = [len(line) for line in lines] | |
| total_words = sum(word_counts) | |
| width_balance = (max(widths) - min(widths)) if len(widths) > 1 else 0.0 | |
| word_balance = (max(word_counts) - min(word_counts)) if len(word_counts) > 1 else 0 | |
| single_word_penalty = sum(260 for line in lines if len(line) == 1 and total_words > 3) | |
| return ( | |
| overflow * 80.0 | |
| + len(lines) * 120.0 | |
| + width_balance * 0.16 | |
| + word_balance * 120.0 | |
| + single_word_penalty | |
| ) | |
| def _candidate_native_highlight_partitions(words, max_lines: int): | |
| n = len(words) | |
| if n == 0: | |
| return [] | |
| if max_lines <= 1 or n == 1: | |
| return [[list(words)]] | |
| out = [[list(words)]] | |
| for first_break in range(1, n): | |
| out.append([list(words[:first_break]), list(words[first_break:])]) | |
| if max_lines >= 3 and n >= 3: | |
| for first_break in range(1, n - 1): | |
| for second_break in range(first_break + 1, n): | |
| out.append( | |
| [ | |
| list(words[:first_break]), | |
| list(words[first_break:second_break]), | |
| list(words[second_break:]), | |
| ] | |
| ) | |
| return out | |
| def _split_native_highlight_lines(words, *, font=None, max_line_width: float | None = None): | |
| if len(words) <= 3 and len(" ".join(word.word for word in words)) <= 22: | |
| return [list(words)] | |
| if len(words) < 2: | |
| return [list(words)] | |
| if font is not None and max_line_width is not None: | |
| candidates = _candidate_native_highlight_partitions(words, max_lines=3) | |
| return min( | |
| candidates, | |
| key=lambda lines: _native_highlight_partition_penalty( | |
| lines, | |
| font, | |
| max_line_width, | |
| ), | |
| ) | |
| best_idx = 1 | |
| best_delta = 10**9 | |
| for idx in range(1, len(words)): | |
| left_words = words[:idx] | |
| right_words = words[idx:] | |
| left = " ".join(word.word for word in left_words) | |
| right = " ".join(word.word for word in right_words) | |
| line_penalty = 0 | |
| if len(left_words) < 2 or len(right_words) < 2: | |
| line_penalty += 800 | |
| delta = abs(len(left) - len(right)) + abs(len(left_words) - len(right_words)) * 7 + line_penalty | |
| if delta < best_delta: | |
| best_delta = delta | |
| best_idx = idx | |
| return [list(words[:best_idx]), list(words[best_idx:])] | |
| def _clean_native_highlight_token(text: str) -> str: | |
| return re.sub(r"(^[^A-Za-z0-9$%#]+|[^A-Za-z0-9$%#]+$)", "", text or "") | |
| def _native_highlight_span_score(words) -> float: | |
| cleaned = [_clean_native_highlight_token(word.word) for word in words] | |
| cleaned = [token for token in cleaned if token] | |
| if not cleaned: | |
| return -1e9 | |
| if all(token.lower() in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned): | |
| return -1e9 | |
| score = 0.0 | |
| for token in cleaned: | |
| lower = token.lower() | |
| if lower not in _NATIVE_HIGHLIGHT_STOPWORDS: | |
| score += 2.0 | |
| if any(ch.isdigit() for ch in token) or "$" in token or "%" in token: | |
| score += 3.0 | |
| if len(token) >= 6: | |
| score += 0.8 | |
| if token.isupper() and len(token) > 1: | |
| score += 0.6 | |
| if len(cleaned) == 2: | |
| score -= 0.55 | |
| if any(any(ch.isdigit() for ch in token) or "$" in token or "%" in token for token in cleaned): | |
| score += 1.1 | |
| elif cleaned[0].lower() in _NATIVE_HIGHLIGHT_STOPWORDS or cleaned[1].lower() in _NATIVE_HIGHLIGHT_STOPWORDS: | |
| score -= 0.6 | |
| else: | |
| score += 0.3 | |
| if len(" ".join(cleaned)) > 18: | |
| score -= 0.6 | |
| return score | |
| def _should_render_native_highlight_group(words) -> bool: | |
| cleaned = [_clean_native_highlight_token(word.word) for word in words] | |
| cleaned = [token for token in cleaned if token] | |
| if not cleaned: | |
| return False | |
| return any(token.lower() not in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned) | |
| def _native_highlight_font_path() -> Path | None: | |
| try: | |
| import humeo_core | |
| bundled = ( | |
| Path(humeo_core.__file__).resolve().parent | |
| / "assets" | |
| / "fonts" | |
| / "LeagueSpartan-Bold.ttf" | |
| ) | |
| if bundled.is_file(): | |
| return bundled | |
| except Exception: | |
| pass | |
| windows_fonts = Path(os.environ.get("WINDIR", r"C:\Windows")) / "Fonts" | |
| for filename in ("arialbd.ttf", "Arialbd.ttf", "ARIALBD.TTF", "arial.ttf"): | |
| path = windows_fonts / filename | |
| if path.is_file(): | |
| return path | |
| return None | |
| def _text_width(font, text: str) -> float: | |
| if not text: | |
| return 0.0 | |
| if hasattr(font, "getlength"): | |
| return float(font.getlength(text)) | |
| bbox = font.getbbox(text) | |
| return float(bbox[2] - bbox[0]) | |
| def _text_height(font) -> int: | |
| bbox = font.getbbox("Ag") | |
| return max(1, int(round(bbox[3] - bbox[1]))) | |
| def _escape_ass_text(text: str) -> str: | |
| return ( | |
| text.replace("\\", r"\\") | |
| .replace("{", r"\{") | |
| .replace("}", r"\}") | |
| .replace("\n", r"\N") | |
| ) | |
| def _native_highlight_overlay_text(line_words, highlight_idx: int) -> str: | |
| parts: list[str] = [] | |
| for word_idx, word in enumerate(line_words): | |
| if word_idx == highlight_idx: | |
| parts.append( | |
| f"{{\\rHighlight{_NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE}}}" | |
| f"{_escape_ass_text(word.word)}" | |
| "{\\rInvisible}" | |
| ) | |
| else: | |
| parts.append(_escape_ass_text(word.word)) | |
| return " ".join(parts) | |
| def _word_timing_weight(word: TranscriptWord) -> float: | |
| token = _clean_native_highlight_token(word.word) | |
| return max(0.65, min(2.2, len(token or word.word) / 5.5)) | |
| def _suspicious_native_highlight_timing( | |
| words: list[TranscriptWord], | |
| idx: int, | |
| *, | |
| clip_duration: float, | |
| ) -> bool: | |
| word = words[idx] | |
| start = float(word.start_time) | |
| end = float(word.end_time) | |
| if not (math.isfinite(start) and math.isfinite(end)): | |
| return True | |
| if start < -0.01 or end > clip_duration + 0.25: | |
| return True | |
| duration = end - start | |
| if duration < _NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC: | |
| return True | |
| if duration > _NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC: | |
| return True | |
| if idx > 0: | |
| prev = words[idx - 1] | |
| if start < float(prev.start_time) - 0.03: | |
| return True | |
| if start < float(prev.end_time) - 0.35: | |
| return True | |
| if idx + 1 < len(words): | |
| nxt = words[idx + 1] | |
| if float(nxt.start_time) < start - 0.03: | |
| return True | |
| return False | |
| def _repair_native_highlight_timings( | |
| words: list[TranscriptWord], | |
| *, | |
| clip_duration: float, | |
| ) -> list[TranscriptWord]: | |
| """Repair obvious ASR word timestamp glitches before per-word highlighting. | |
| This is intentionally conservative: clean Whisper/ElevenLabs timings pass | |
| through almost unchanged, while zero-length, reversed, huge, or badly | |
| overlapping word timings get interpolated between neighboring reliable words. | |
| """ | |
| if not words: | |
| return [] | |
| clip_duration = max(0.0, float(clip_duration)) | |
| records: list[dict[str, object]] = [] | |
| for idx, word in enumerate(words): | |
| start = max(0.0, min(clip_duration, float(word.start_time))) | |
| end = max(0.0, min(clip_duration, float(word.end_time))) | |
| records.append( | |
| { | |
| "word": word.word, | |
| "start": start, | |
| "end": end, | |
| "bad": _suspicious_native_highlight_timing( | |
| words, | |
| idx, | |
| clip_duration=clip_duration, | |
| ), | |
| "weight": _word_timing_weight(word), | |
| } | |
| ) | |
| idx = 0 | |
| while idx < len(records): | |
| if not records[idx]["bad"]: | |
| idx += 1 | |
| continue | |
| run_start = idx | |
| while idx < len(records) and records[idx]["bad"]: | |
| idx += 1 | |
| run_end = idx - 1 | |
| count = run_end - run_start + 1 | |
| left_time = ( | |
| float(records[run_start - 1]["end"]) | |
| if run_start > 0 | |
| else max(0.0, float(records[run_start]["start"])) | |
| ) | |
| right_time = ( | |
| float(records[run_end + 1]["start"]) | |
| if run_end + 1 < len(records) | |
| else min(clip_duration, max(left_time, float(records[run_end]["end"]))) | |
| ) | |
| weight_span = sum(float(r["weight"]) for r in records[run_start : run_end + 1]) * 0.13 | |
| min_span = max(0.11 * count, weight_span) | |
| if right_time <= left_time + min_span: | |
| right_time = min(clip_duration, left_time + min_span) | |
| if right_time <= left_time: | |
| right_time = min(clip_duration, left_time + max(0.08, 0.12 * count)) | |
| span = max(0.001, right_time - left_time) | |
| weights = [float(r["weight"]) for r in records[run_start : run_end + 1]] | |
| total_weight = max(0.001, sum(weights)) | |
| cursor = left_time | |
| for offset, weight in enumerate(weights): | |
| rec = records[run_start + offset] | |
| next_cursor = ( | |
| right_time | |
| if offset == count - 1 | |
| else cursor + span * (weight / total_weight) | |
| ) | |
| rec["start"] = cursor | |
| rec["end"] = max(cursor + 0.04, next_cursor) | |
| cursor = float(rec["end"]) | |
| repaired: list[TranscriptWord] = [] | |
| prev_end = 0.0 | |
| for rec in records: | |
| start = max(0.0, float(rec["start"])) | |
| end = max(start + 0.02, float(rec["end"])) | |
| if start < prev_end - 0.02: | |
| start = prev_end | |
| end = max(end, start + 0.04) | |
| if clip_duration > 0.0: | |
| end = min(clip_duration, end) | |
| if end <= start: | |
| start = max(0.0, min(start, clip_duration - 0.02)) | |
| end = min(clip_duration, start + 0.04) | |
| repaired.append(TranscriptWord(word=str(rec["word"]), start_time=start, end_time=end)) | |
| prev_end = max(prev_end, end) | |
| return repaired | |
| def _native_highlight_word_windows( | |
| words: list[TranscriptWord], | |
| *, | |
| lead_sec: float, | |
| min_dwell_sec: float, | |
| ) -> list[tuple[float, float]]: | |
| if not words: | |
| return [] | |
| lead_sec = max(0.0, float(lead_sec)) | |
| min_dwell_sec = max(0.02, float(min_dwell_sec)) | |
| cue_start = max(0.0, words[0].start_time - lead_sec) | |
| cue_end = max(words[-1].end_time, words[-1].start_time + min_dwell_sec) | |
| starts: list[float] = [] | |
| for idx, word in enumerate(words): | |
| start = max(cue_start, float(word.start_time) - lead_sec) | |
| if idx > 0: | |
| start = max(start, starts[-1] + 0.01) | |
| starts.append(start) | |
| windows: list[tuple[float, float]] = [] | |
| for idx, word in enumerate(words): | |
| start = starts[idx] | |
| natural_end = max(float(word.end_time), start + min_dwell_sec) | |
| limit = starts[idx + 1] if idx + 1 < len(starts) else cue_end | |
| end = min(natural_end, limit) | |
| if end <= start: | |
| end = min(limit, start + 0.01) | |
| windows.append((start, max(start + 0.01, end))) | |
| return windows | |
| def _fmt_ass_time(seconds: float) -> str: | |
| seconds = max(0.0, seconds) | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = seconds % 60 | |
| whole = int(secs) | |
| cs = int(round((secs - whole) * 100)) | |
| if cs >= 100: | |
| cs = 99 | |
| return f"{hours:d}:{minutes:02d}:{whole:02d}.{cs:02d}" | |
| def _format_native_highlight_ass( | |
| cue_chunks, | |
| *, | |
| play_res_x: int, | |
| play_res_y: int, | |
| font_size: int, | |
| margin_v: int, | |
| font_name: str, | |
| highlight_lead_sec: float = _NATIVE_HIGHLIGHT_LEAD_SEC, | |
| highlight_min_dwell_sec: float = _NATIVE_HIGHLIGHT_MIN_DWELL_SEC, | |
| ) -> str: | |
| from PIL import ImageFont | |
| font_path = _native_highlight_font_path() | |
| if font_path is not None: | |
| font = ImageFont.truetype(str(font_path), size=font_size) | |
| else: | |
| font = ImageFont.load_default() | |
| line_height = max(font_size, _text_height(font) + 6) | |
| line_gap = max(8, int(round(font_size * 0.08))) | |
| bottom_anchor = play_res_y - margin_v | |
| safe_margin_x = min( | |
| int(round(play_res_x * 0.12)), | |
| max(24, _NATIVE_HIGHLIGHT_SAFE_MARGIN_X), | |
| ) | |
| max_line_width = min( | |
| play_res_x * _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO, | |
| play_res_x - (safe_margin_x * 2), | |
| ) | |
| header = ( | |
| "[Script Info]\n" | |
| "ScriptType: v4.00+\n" | |
| f"PlayResX: {play_res_x}\n" | |
| f"PlayResY: {play_res_y}\n" | |
| "WrapStyle: 0\n" | |
| "ScaledBorderAndShadow: yes\n" | |
| "YCbCr Matrix: None\n" | |
| "\n" | |
| "[V4+ Styles]\n" | |
| "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, " | |
| "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, " | |
| "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, " | |
| "Alignment, MarginL, MarginR, MarginV, Encoding\n" | |
| f"Style: Base,{font_name},{font_size},&H00FFFFFF,&H000000FF,&H00101010,&H00000000,-1,0,0,0,100,100,-1,0,1,4,0,8,0,0,0,0\n" | |
| f"Style: Highlight,{font_name},{font_size},&H00FFFFFF,&H000000FF,{_NATIVE_HIGHLIGHT_PURPLE},&H00000000,-1,0,0,0,100,100,-1,0,3,4,0,8,0,0,0,0\n" | |
| f"Style: Invisible,{font_name},{font_size},&HFF000000,&H000000FF,&HFF000000,&HFF000000,-1,0,0,0,100,100,-1,0,1,0,0,8,0,0,0,0\n" | |
| "\n" | |
| "[Events]\n" | |
| "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" | |
| ) | |
| events: list[str] = [] | |
| for cue_words in cue_chunks: | |
| if not cue_words: | |
| continue | |
| lines = _split_native_highlight_lines( | |
| cue_words, | |
| font=font, | |
| max_line_width=max_line_width, | |
| ) | |
| cue_windows = _native_highlight_word_windows( | |
| cue_words, | |
| lead_sec=highlight_lead_sec, | |
| min_dwell_sec=highlight_min_dwell_sec, | |
| ) | |
| block_height = len(lines) * line_height + max(0, len(lines) - 1) * line_gap | |
| block_top = bottom_anchor - block_height | |
| cue_start = cue_windows[0][0] if cue_windows else cue_words[0].start_time | |
| cue_end = cue_windows[-1][1] if cue_windows else cue_words[-1].end_time | |
| word_offset = 0 | |
| line_center_x = play_res_x / 2.0 | |
| for line_idx, line_words in enumerate(lines): | |
| if not line_words: | |
| continue | |
| line_text = " ".join(word.word for word in line_words) | |
| line_top = block_top + line_idx * (line_height + line_gap) | |
| events.append( | |
| "Dialogue: 1," | |
| f"{_fmt_ass_time(cue_start)},{_fmt_ass_time(cue_end)},Base,,0,0,0,," | |
| f"{{\\an8\\pos({line_center_x:.1f},{line_top:.1f})}}{_escape_ass_text(line_text)}" | |
| ) | |
| for word_idx, word in enumerate(line_words): | |
| cleaned = _clean_native_highlight_token(word.word) | |
| if not cleaned: | |
| continue | |
| word_start, word_end = cue_windows[word_offset + word_idx] | |
| events.append( | |
| "Dialogue: 0," | |
| f"{_fmt_ass_time(word_start)},{_fmt_ass_time(word_end)},Invisible,,0,0,0,," | |
| f"{{\\an8\\pos({line_center_x:.1f},{line_top:.1f})}}" | |
| f"{_native_highlight_overlay_text(line_words, word_idx)}" | |
| ) | |
| word_offset += len(line_words) | |
| return header + "\n".join(events) + ("\n" if events else "") | |
| def generate_srt( | |
| clip: Clip, | |
| transcript: dict, | |
| output_dir: Path, | |
| *, | |
| max_words_per_cue: int = 8, | |
| max_cue_sec: float = 4.0, | |
| ) -> Path: | |
| """ | |
| Build an SRT file from word-level ASR aligned to this clip's timeline. | |
| ``transcript`` is the persisted ``transcript.json`` (segments with optional | |
| per-word timestamps). Times are shifted so 0 = clip in-point. | |
| """ | |
| srt_path = output_dir / f"clip_{clip.clip_id}.srt" | |
| aligned = clip_subtitle_words(transcript, clip) | |
| lines = clip_words_to_srt_lines( | |
| aligned.words, | |
| max_words_per_cue=max_words_per_cue, | |
| max_cue_sec=max_cue_sec, | |
| ) | |
| srt_path.write_text(format_srt(lines), encoding="utf-8") | |
| logger.info("Generated SRT: %s (%d cues)", srt_path, len(lines)) | |
| return srt_path | |
| def generate_ass( | |
| clip: Clip, | |
| transcript: dict, | |
| output_dir: Path, | |
| *, | |
| max_words_per_cue: int = 4, | |
| max_cue_sec: float = 2.2, | |
| play_res_x: int = 1080, | |
| play_res_y: int = 1920, | |
| font_size: int = 48, | |
| margin_v: int = 160, | |
| margin_h: int = 60, | |
| font_name: str = "Arial", | |
| render_theme: RenderTheme = RenderTheme.LEGACY, | |
| native_highlight_lead_sec: float = _NATIVE_HIGHLIGHT_LEAD_SEC, | |
| native_highlight_min_dwell_sec: float = _NATIVE_HIGHLIGHT_MIN_DWELL_SEC, | |
| repair_word_timings: bool = True, | |
| ) -> Path: | |
| """Generate an ASS caption file tuned for direct libass rendering. | |
| Unlike SRT → libass (default PlayResY=288), an ASS file with | |
| ``PlayResY = output_height`` means libass' scale factor is 1.0, so the | |
| ``font_size`` / ``margin_v`` arguments below are honest output pixels. | |
| This is the root-cause fix for the "captions rendering in the middle of | |
| the frame, four times too large" bug the user reported. | |
| """ | |
| ass_path = output_dir / f"clip_{clip.clip_id}.ass" | |
| aligned = clip_subtitle_words(transcript, clip) | |
| cue_words = max_words_per_cue | |
| cue_sec = max_cue_sec | |
| cue_font_size = font_size | |
| cue_margin_v = margin_v | |
| prefer_break_on_punctuation = False | |
| min_words_before_break = 1 | |
| if render_theme == RenderTheme.REFERENCE_LOWER_THIRD: | |
| cue_words = max(max_words_per_cue, 7) | |
| cue_sec = max(max_cue_sec, 2.6) | |
| cue_font_size = max(font_size, 52) | |
| cue_margin_v = min(margin_v, 136) | |
| prefer_break_on_punctuation = True | |
| min_words_before_break = 5 | |
| elif render_theme == RenderTheme.NATIVE_HIGHLIGHT: | |
| cue_words = 4 | |
| cue_sec = 1.45 | |
| cue_font_size = max(font_size, 80) | |
| cue_margin_v = max(margin_v, 300) | |
| prefer_break_on_punctuation = True | |
| min_words_before_break = 3 | |
| aligned_words = aligned.words | |
| if render_theme == RenderTheme.NATIVE_HIGHLIGHT and repair_word_timings: | |
| aligned_words = _repair_native_highlight_timings( | |
| aligned_words, | |
| clip_duration=clip.duration_sec, | |
| ) | |
| cue_chunks = group_words_to_cue_chunks( | |
| aligned_words, | |
| max_words_per_cue=cue_words, | |
| max_cue_sec=cue_sec, | |
| prefer_break_on_punctuation=prefer_break_on_punctuation, | |
| min_words_before_break=min_words_before_break, | |
| ) | |
| lines = [ | |
| (chunk[0].start_time, chunk[-1].end_time, " ".join(word.word for word in chunk)) | |
| for chunk in cue_chunks | |
| ] | |
| if render_theme == RenderTheme.REFERENCE_LOWER_THIRD: | |
| lines = [(start, end, _balance_reference_caption(text)) for start, end, text in lines] | |
| ass_text = format_ass( | |
| lines, | |
| play_res_x=play_res_x, | |
| play_res_y=play_res_y, | |
| font_size=cue_font_size, | |
| margin_v=cue_margin_v, | |
| margin_h=margin_h, | |
| font_name="Source Sans 3", | |
| render_theme=render_theme, | |
| ) | |
| elif render_theme == RenderTheme.NATIVE_HIGHLIGHT: | |
| ass_text = _format_native_highlight_ass( | |
| cue_chunks, | |
| play_res_x=play_res_x, | |
| play_res_y=play_res_y, | |
| font_size=cue_font_size, | |
| margin_v=cue_margin_v, | |
| font_name=_NATIVE_HIGHLIGHT_FONT_NAME, | |
| highlight_lead_sec=native_highlight_lead_sec, | |
| highlight_min_dwell_sec=native_highlight_min_dwell_sec, | |
| ) | |
| else: | |
| ass_text = format_ass( | |
| lines, | |
| play_res_x=play_res_x, | |
| play_res_y=play_res_y, | |
| font_size=cue_font_size, | |
| margin_v=cue_margin_v, | |
| margin_h=margin_h, | |
| font_name=font_name, | |
| render_theme=render_theme, | |
| ) | |
| ass_path.write_text(ass_text, encoding="utf-8") | |
| logger.info("Generated ASS: %s (%d cues)", ass_path, len(lines)) | |
| return ass_path | |