moonlantern1 commited on
Commit
f14fa4b
·
verified ·
1 Parent(s): c86f8a6

Fix caption edge clipping and real clip previews

Browse files
Files changed (4) hide show
  1. app.py +15 -4
  2. src/humeo/cutter.py +39 -14
  3. src/humeo/layout_vision.py +31 -9
  4. src/humeo/pipeline.py +14 -10
app.py CHANGED
@@ -210,12 +210,18 @@ def _duration_label(path: Path) -> str:
210
 
211
  def _publish_files(job: Job) -> None:
212
  for path in sorted(job.output_dir.glob("short_*.mp4")):
213
- if path.name not in job.clips and path.is_file():
 
 
 
 
214
  job.clips[path.name] = ClipFile(
215
  name=path.name,
216
  url=f"/api/jobs/{job.id}/files/{path.name}",
217
- duration=_duration_label(path),
218
  )
 
 
219
 
220
 
221
  def _validate_credentials() -> None:
@@ -552,7 +558,9 @@ INDEX_HTML = r"""<!DOCTYPE html>
552
  .clip-card { border-radius: var(--radius); overflow: hidden; cursor: pointer; background: var(--white); border: 1px solid var(--border); box-shadow: 0 2px 10px rgba(42,31,14,0.06); transition: all 0.2s; animation: clipAppear 0.5s ease both; }
553
  .clip-card:hover { transform: translateY(-3px); box-shadow: 0 8px 24px rgba(42,31,14,0.13); }
554
  @keyframes clipAppear { from { opacity: 0; transform: scale(0.9) translateY(10px); } to { opacity: 1; transform: scale(1) translateY(0); } }
555
- .clip-thumb { aspect-ratio: 9/16; display: flex; align-items: center; justify-content: center; position: relative; overflow: hidden; }
 
 
556
  .clip-play { width: 44px; height: 44px; background: rgba(255,255,255,0.88); border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 1.1rem; z-index: 2; box-shadow: 0 2px 12px rgba(0,0,0,0.2); transition: transform 0.2s; }
557
  .clip-card:hover .clip-play { transform: scale(1.1); }
558
  .clip-meta { padding: 10px 12px; } .clip-num { font-size: 0.72rem; color: var(--ink-muted); text-transform: uppercase; letter-spacing: 0.08em; font-weight: 500; }
@@ -889,7 +897,10 @@ INDEX_HTML = r"""<!DOCTYPE html>
889
  const grid = document.getElementById('clips-grid');
890
  const card = document.createElement('div');
891
  card.className = 'clip-card';
892
- card.innerHTML = `<div class="clip-thumb thumb-${idx % 10}"><div class="clip-play">▶</div></div><div class="clip-meta"><div class="clip-num">Clip ${idx + 1}</div><div class="clip-dur">${clip.duration || '0:00'}</div><a class="clip-download" href="${clip.url}" download onclick="event.stopPropagation()">Download</a></div>`;
 
 
 
893
  card.onclick = () => openModal(idx);
894
  grid.appendChild(card);
895
  }
 
210
 
211
  def _publish_files(job: Job) -> None:
212
  for path in sorted(job.output_dir.glob("short_*.mp4")):
213
+ if not path.is_file():
214
+ continue
215
+ duration = _duration_label(path)
216
+ existing = job.clips.get(path.name)
217
+ if existing is None:
218
  job.clips[path.name] = ClipFile(
219
  name=path.name,
220
  url=f"/api/jobs/{job.id}/files/{path.name}",
221
+ duration=duration,
222
  )
223
+ elif existing.duration == "0:00" and duration != "0:00":
224
+ existing.duration = duration
225
 
226
 
227
  def _validate_credentials() -> None:
 
558
  .clip-card { border-radius: var(--radius); overflow: hidden; cursor: pointer; background: var(--white); border: 1px solid var(--border); box-shadow: 0 2px 10px rgba(42,31,14,0.06); transition: all 0.2s; animation: clipAppear 0.5s ease both; }
559
  .clip-card:hover { transform: translateY(-3px); box-shadow: 0 8px 24px rgba(42,31,14,0.13); }
560
  @keyframes clipAppear { from { opacity: 0; transform: scale(0.9) translateY(10px); } to { opacity: 1; transform: scale(1) translateY(0); } }
561
+ .clip-thumb { aspect-ratio: 9/16; display: flex; align-items: center; justify-content: center; position: relative; overflow: hidden; background:#000; }
562
+ .clip-thumb video { width:100%; height:100%; object-fit:cover; display:block; background:#000; }
563
+ .clip-thumb::after { content:""; position:absolute; inset:0; background:linear-gradient(180deg, rgba(0,0,0,0.02), rgba(0,0,0,0.18)); pointer-events:none; }
564
  .clip-play { width: 44px; height: 44px; background: rgba(255,255,255,0.88); border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 1.1rem; z-index: 2; box-shadow: 0 2px 12px rgba(0,0,0,0.2); transition: transform 0.2s; }
565
  .clip-card:hover .clip-play { transform: scale(1.1); }
566
  .clip-meta { padding: 10px 12px; } .clip-num { font-size: 0.72rem; color: var(--ink-muted); text-transform: uppercase; letter-spacing: 0.08em; font-weight: 500; }
 
897
  const grid = document.getElementById('clips-grid');
898
  const card = document.createElement('div');
899
  card.className = 'clip-card';
900
+ card.innerHTML = `<div class="clip-thumb"><video src="${clip.url}#t=0.2" muted playsinline preload="metadata"></video><div class="clip-play">▶</div></div><div class="clip-meta"><div class="clip-num">Clip ${idx + 1}</div><div class="clip-dur">${clip.duration || '0:00'}</div><a class="clip-download" href="${clip.url}" download onclick="event.stopPropagation()">Download</a></div>`;
901
+ const preview = card.querySelector('video');
902
+ card.addEventListener('mouseenter', () => { preview.play().catch(() => {}); });
903
+ card.addEventListener('mouseleave', () => { preview.pause(); preview.currentTime = 0.2; });
904
  card.onclick = () => openModal(idx);
905
  grid.appendChild(card);
906
  }
src/humeo/cutter.py CHANGED
@@ -18,13 +18,14 @@ from humeo.transcript_align import (
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
- _NATIVE_HIGHLIGHT_FONT_NAME = "Arial"
22
  _NATIVE_HIGHLIGHT_PURPLE = "&H00F65C8B"
23
  _NATIVE_HIGHLIGHT_LEAD_SEC = 0.06
24
  _NATIVE_HIGHLIGHT_MIN_DWELL_SEC = 0.16
25
  _NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC = 0.035
26
  _NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC = 1.65
27
- _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO = 0.92
 
28
  _NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE = r"\blur3.0"
29
  _NATIVE_HIGHLIGHT_STOPWORDS = {
30
  "a",
@@ -207,10 +208,24 @@ def _should_render_native_highlight_group(words) -> bool:
207
  return any(token.lower() not in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned)
208
 
209
 
210
- def _native_highlight_font_path() -> Path | None:
211
- windows_fonts = Path(os.environ.get("WINDIR", r"C:\Windows")) / "Fonts"
212
- for filename in ("arialbd.ttf", "Arialbd.ttf", "ARIALBD.TTF", "arial.ttf"):
213
- path = windows_fonts / filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  if path.is_file():
215
  return path
216
  return None
@@ -448,7 +463,14 @@ def _format_native_highlight_ass(
448
  line_height = max(font_size, _text_height(font) + 6)
449
  line_gap = max(8, int(round(font_size * 0.08)))
450
  bottom_anchor = play_res_y - margin_v
451
- max_line_width = play_res_x * _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO
 
 
 
 
 
 
 
452
 
453
  header = (
454
  "[Script Info]\n"
@@ -496,7 +518,10 @@ def _format_native_highlight_ass(
496
  continue
497
  line_text = " ".join(word.word for word in line_words)
498
  line_top = block_top + line_idx * (line_height + line_gap)
499
- line_left = (play_res_x - _text_width(font, line_text)) / 2.0
 
 
 
500
  events.append(
501
  "Dialogue: 1,"
502
  f"{_fmt_ass_time(cue_start)},{_fmt_ass_time(cue_end)},Base,,0,0,0,,"
@@ -587,12 +612,12 @@ def generate_ass(
587
  prefer_break_on_punctuation = True
588
  min_words_before_break = 5
589
  elif render_theme == RenderTheme.NATIVE_HIGHLIGHT:
590
- cue_words = 8
591
- cue_sec = 2.4
592
- cue_font_size = max(font_size, 86)
593
- cue_margin_v = max(margin_v, 300)
594
- prefer_break_on_punctuation = True
595
- min_words_before_break = 4
596
 
597
  aligned_words = aligned.words
598
  if render_theme == RenderTheme.NATIVE_HIGHLIGHT and repair_word_timings:
 
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
+ _NATIVE_HIGHLIGHT_FONT_NAME = "League Spartan"
22
  _NATIVE_HIGHLIGHT_PURPLE = "&H00F65C8B"
23
  _NATIVE_HIGHLIGHT_LEAD_SEC = 0.06
24
  _NATIVE_HIGHLIGHT_MIN_DWELL_SEC = 0.16
25
  _NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC = 0.035
26
  _NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC = 1.65
27
+ _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO = 0.74
28
+ _NATIVE_HIGHLIGHT_SAFE_MARGIN_X = 96
29
  _NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE = r"\blur3.0"
30
  _NATIVE_HIGHLIGHT_STOPWORDS = {
31
  "a",
 
208
  return any(token.lower() not in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned)
209
 
210
 
211
+ def _native_highlight_font_path() -> Path | None:
212
+ try:
213
+ import humeo_core
214
+
215
+ bundled = (
216
+ Path(humeo_core.__file__).resolve().parent
217
+ / "assets"
218
+ / "fonts"
219
+ / "LeagueSpartan-Bold.ttf"
220
+ )
221
+ if bundled.is_file():
222
+ return bundled
223
+ except Exception:
224
+ pass
225
+
226
+ windows_fonts = Path(os.environ.get("WINDIR", r"C:\Windows")) / "Fonts"
227
+ for filename in ("arialbd.ttf", "Arialbd.ttf", "ARIALBD.TTF", "arial.ttf"):
228
+ path = windows_fonts / filename
229
  if path.is_file():
230
  return path
231
  return None
 
463
  line_height = max(font_size, _text_height(font) + 6)
464
  line_gap = max(8, int(round(font_size * 0.08)))
465
  bottom_anchor = play_res_y - margin_v
466
+ safe_margin_x = min(
467
+ int(round(play_res_x * 0.12)),
468
+ max(24, _NATIVE_HIGHLIGHT_SAFE_MARGIN_X),
469
+ )
470
+ max_line_width = min(
471
+ play_res_x * _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO,
472
+ play_res_x - (safe_margin_x * 2),
473
+ )
474
 
475
  header = (
476
  "[Script Info]\n"
 
518
  continue
519
  line_text = " ".join(word.word for word in line_words)
520
  line_top = block_top + line_idx * (line_height + line_gap)
521
+ line_width = _text_width(font, line_text)
522
+ centered_left = (play_res_x - line_width) / 2.0
523
+ max_left = play_res_x - safe_margin_x - line_width
524
+ line_left = max(float(safe_margin_x), min(centered_left, max_left))
525
  events.append(
526
  "Dialogue: 1,"
527
  f"{_fmt_ass_time(cue_start)},{_fmt_ass_time(cue_end)},Base,,0,0,0,,"
 
612
  prefer_break_on_punctuation = True
613
  min_words_before_break = 5
614
  elif render_theme == RenderTheme.NATIVE_HIGHLIGHT:
615
+ cue_words = 6
616
+ cue_sec = 2.0
617
+ cue_font_size = max(font_size, 86)
618
+ cue_margin_v = max(margin_v, 300)
619
+ prefer_break_on_punctuation = True
620
+ min_words_before_break = 3
621
 
622
  aligned_words = aligned.words
623
  if render_theme == RenderTheme.NATIVE_HIGHLIGHT and repair_word_timings:
src/humeo/layout_vision.py CHANGED
@@ -4,10 +4,11 @@ from __future__ import annotations
4
 
5
  import base64
6
  import hashlib
7
- import json
8
- import logging
9
- import os
10
- import struct
 
11
  import subprocess
12
  from collections.abc import Iterable
13
  from io import BytesIO
@@ -145,6 +146,21 @@ def _json_object_from_vision_response(raw: object) -> dict[str, Any]:
145
  for item in raw:
146
  if isinstance(item, dict):
147
  return item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  raise TypeError(f"Expected vision JSON object, got {type(raw).__name__}")
149
 
150
 
@@ -1319,7 +1335,7 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
1319
  )
1320
  if not response.text:
1321
  raise RuntimeError("Gemini vision returned empty response")
1322
- return _json_object_from_vision_response(json.loads(response.text))
1323
 
1324
  client = OpenAI(
1325
  api_key=resolve_openrouter_api_key(),
@@ -1345,7 +1361,7 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
1345
  text = _openai_message_text(response.choices[0].message.content)
1346
  if not text:
1347
  raise RuntimeError("OpenRouter vision returned empty response")
1348
- return _json_object_from_vision_response(json.loads(text))
1349
 
1350
 
1351
  def _call_gemini_vision(keyframe_path: str, model_name: str) -> dict[str, Any]:
@@ -1496,9 +1512,15 @@ def _apply_layout_hint_fallbacks(
1496
  raw = raw_by_clip.get(clip_id)
1497
  if instr is None or raw is None or "error" not in raw:
1498
  continue
1499
- if instr.layout != LayoutKind.SIT_CENTER:
1500
- continue
1501
- instructions[clip_id] = instr.model_copy(update={"layout": hint})
 
 
 
 
 
 
1502
  updated_raw = dict(raw)
1503
  updated_raw["layout"] = hint.value
1504
  updated_raw["layout_hint_fallback"] = hint.value
 
4
 
5
  import base64
6
  import hashlib
7
+ import json
8
+ import logging
9
+ import os
10
+ import re
11
+ import struct
12
  import subprocess
13
  from collections.abc import Iterable
14
  from io import BytesIO
 
146
  for item in raw:
147
  if isinstance(item, dict):
148
  return item
149
+ if isinstance(raw, str):
150
+ text = raw.strip()
151
+ if text.startswith("```"):
152
+ text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
153
+ text = re.sub(r"\s*```$", "", text)
154
+ text = "".join(ch if ch >= " " or ch in "\r\n\t" else " " for ch in text)
155
+ starts = [idx for idx in (text.find("{"), text.find("[")) if idx >= 0]
156
+ if starts:
157
+ decoder = json.JSONDecoder()
158
+ for start in sorted(starts):
159
+ try:
160
+ parsed, _ = decoder.raw_decode(text[start:])
161
+ except json.JSONDecodeError:
162
+ continue
163
+ return _json_object_from_vision_response(parsed)
164
  raise TypeError(f"Expected vision JSON object, got {type(raw).__name__}")
165
 
166
 
 
1335
  )
1336
  if not response.text:
1337
  raise RuntimeError("Gemini vision returned empty response")
1338
+ return _json_object_from_vision_response(response.text)
1339
 
1340
  client = OpenAI(
1341
  api_key=resolve_openrouter_api_key(),
 
1361
  text = _openai_message_text(response.choices[0].message.content)
1362
  if not text:
1363
  raise RuntimeError("OpenRouter vision returned empty response")
1364
+ return _json_object_from_vision_response(text)
1365
 
1366
 
1367
  def _call_gemini_vision(keyframe_path: str, model_name: str) -> dict[str, Any]:
 
1512
  raw = raw_by_clip.get(clip_id)
1513
  if instr is None or raw is None or "error" not in raw:
1514
  continue
1515
+ if instr.layout != LayoutKind.SIT_CENTER:
1516
+ continue
1517
+ if hint == LayoutKind.SPLIT_CHART_PERSON:
1518
+ updated_raw = dict(raw)
1519
+ updated_raw["layout_hint_fallback_rejected"] = hint.value
1520
+ updated_raw["layout_hint_rejected_reason"] = "vision_failed_without_regions"
1521
+ raw_by_clip[clip_id] = updated_raw
1522
+ continue
1523
+ instructions[clip_id] = instr.model_copy(update={"layout": hint})
1524
  updated_raw = dict(raw)
1525
  updated_raw["layout"] = hint.value
1526
  updated_raw["layout_hint_fallback"] = hint.value
src/humeo/pipeline.py CHANGED
@@ -79,17 +79,21 @@ _PRESENTATION_REFERENCE_RE = re.compile(
79
 
80
 
81
  def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return instruction.model_copy(
83
- update={
84
- "layout": LayoutKind.SIT_CENTER,
85
- "zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
86
- "split_chart_region": None,
87
- "split_person_region": None,
88
- "split_second_chart_region": None,
89
- "split_second_person_region": None,
90
- "chart_x_norm": 0.0,
91
- "top_band_ratio": 0.5,
92
- }
93
  )
94
 
95
 
 
79
 
80
 
81
  def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
82
+ updates = {
83
+ "layout": LayoutKind.SIT_CENTER,
84
+ "zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
85
+ "person_tracking": [],
86
+ "split_chart_region": None,
87
+ "split_person_region": None,
88
+ "split_second_chart_region": None,
89
+ "split_second_person_region": None,
90
+ "chart_x_norm": 0.0,
91
+ "top_band_ratio": 0.5,
92
+ }
93
+ if instruction.split_person_region is not None:
94
+ updates["person_x_norm"] = float(instruction.split_person_region.center_x)
95
  return instruction.model_copy(
96
+ update=updates
 
 
 
 
 
 
 
 
 
97
  )
98
 
99