Spaces:
Sleeping
Sleeping
Fix caption edge clipping and real clip previews
Browse files- app.py +15 -4
- src/humeo/cutter.py +39 -14
- src/humeo/layout_vision.py +31 -9
- src/humeo/pipeline.py +14 -10
app.py
CHANGED
|
@@ -210,12 +210,18 @@ def _duration_label(path: Path) -> str:
|
|
| 210 |
|
| 211 |
def _publish_files(job: Job) -> None:
|
| 212 |
for path in sorted(job.output_dir.glob("short_*.mp4")):
|
| 213 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
job.clips[path.name] = ClipFile(
|
| 215 |
name=path.name,
|
| 216 |
url=f"/api/jobs/{job.id}/files/{path.name}",
|
| 217 |
-
duration=
|
| 218 |
)
|
|
|
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
def _validate_credentials() -> None:
|
|
@@ -552,7 +558,9 @@ INDEX_HTML = r"""<!DOCTYPE html>
|
|
| 552 |
.clip-card { border-radius: var(--radius); overflow: hidden; cursor: pointer; background: var(--white); border: 1px solid var(--border); box-shadow: 0 2px 10px rgba(42,31,14,0.06); transition: all 0.2s; animation: clipAppear 0.5s ease both; }
|
| 553 |
.clip-card:hover { transform: translateY(-3px); box-shadow: 0 8px 24px rgba(42,31,14,0.13); }
|
| 554 |
@keyframes clipAppear { from { opacity: 0; transform: scale(0.9) translateY(10px); } to { opacity: 1; transform: scale(1) translateY(0); } }
|
| 555 |
-
.clip-thumb { aspect-ratio: 9/16; display: flex; align-items: center; justify-content: center; position: relative; overflow: hidden; }
|
|
|
|
|
|
|
| 556 |
.clip-play { width: 44px; height: 44px; background: rgba(255,255,255,0.88); border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 1.1rem; z-index: 2; box-shadow: 0 2px 12px rgba(0,0,0,0.2); transition: transform 0.2s; }
|
| 557 |
.clip-card:hover .clip-play { transform: scale(1.1); }
|
| 558 |
.clip-meta { padding: 10px 12px; } .clip-num { font-size: 0.72rem; color: var(--ink-muted); text-transform: uppercase; letter-spacing: 0.08em; font-weight: 500; }
|
|
@@ -889,7 +897,10 @@ INDEX_HTML = r"""<!DOCTYPE html>
|
|
| 889 |
const grid = document.getElementById('clips-grid');
|
| 890 |
const card = document.createElement('div');
|
| 891 |
card.className = 'clip-card';
|
| 892 |
-
card.innerHTML = `<div class="clip-thumb
|
|
|
|
|
|
|
|
|
|
| 893 |
card.onclick = () => openModal(idx);
|
| 894 |
grid.appendChild(card);
|
| 895 |
}
|
|
|
|
| 210 |
|
| 211 |
def _publish_files(job: Job) -> None:
|
| 212 |
for path in sorted(job.output_dir.glob("short_*.mp4")):
|
| 213 |
+
if not path.is_file():
|
| 214 |
+
continue
|
| 215 |
+
duration = _duration_label(path)
|
| 216 |
+
existing = job.clips.get(path.name)
|
| 217 |
+
if existing is None:
|
| 218 |
job.clips[path.name] = ClipFile(
|
| 219 |
name=path.name,
|
| 220 |
url=f"/api/jobs/{job.id}/files/{path.name}",
|
| 221 |
+
duration=duration,
|
| 222 |
)
|
| 223 |
+
elif existing.duration == "0:00" and duration != "0:00":
|
| 224 |
+
existing.duration = duration
|
| 225 |
|
| 226 |
|
| 227 |
def _validate_credentials() -> None:
|
|
|
|
| 558 |
.clip-card { border-radius: var(--radius); overflow: hidden; cursor: pointer; background: var(--white); border: 1px solid var(--border); box-shadow: 0 2px 10px rgba(42,31,14,0.06); transition: all 0.2s; animation: clipAppear 0.5s ease both; }
|
| 559 |
.clip-card:hover { transform: translateY(-3px); box-shadow: 0 8px 24px rgba(42,31,14,0.13); }
|
| 560 |
@keyframes clipAppear { from { opacity: 0; transform: scale(0.9) translateY(10px); } to { opacity: 1; transform: scale(1) translateY(0); } }
|
| 561 |
+
.clip-thumb { aspect-ratio: 9/16; display: flex; align-items: center; justify-content: center; position: relative; overflow: hidden; background:#000; }
|
| 562 |
+
.clip-thumb video { width:100%; height:100%; object-fit:cover; display:block; background:#000; }
|
| 563 |
+
.clip-thumb::after { content:""; position:absolute; inset:0; background:linear-gradient(180deg, rgba(0,0,0,0.02), rgba(0,0,0,0.18)); pointer-events:none; }
|
| 564 |
.clip-play { width: 44px; height: 44px; background: rgba(255,255,255,0.88); border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 1.1rem; z-index: 2; box-shadow: 0 2px 12px rgba(0,0,0,0.2); transition: transform 0.2s; }
|
| 565 |
.clip-card:hover .clip-play { transform: scale(1.1); }
|
| 566 |
.clip-meta { padding: 10px 12px; } .clip-num { font-size: 0.72rem; color: var(--ink-muted); text-transform: uppercase; letter-spacing: 0.08em; font-weight: 500; }
|
|
|
|
| 897 |
const grid = document.getElementById('clips-grid');
|
| 898 |
const card = document.createElement('div');
|
| 899 |
card.className = 'clip-card';
|
| 900 |
+
card.innerHTML = `<div class="clip-thumb"><video src="${clip.url}#t=0.2" muted playsinline preload="metadata"></video><div class="clip-play">▶</div></div><div class="clip-meta"><div class="clip-num">Clip ${idx + 1}</div><div class="clip-dur">${clip.duration || '0:00'}</div><a class="clip-download" href="${clip.url}" download onclick="event.stopPropagation()">Download</a></div>`;
|
| 901 |
+
const preview = card.querySelector('video');
|
| 902 |
+
card.addEventListener('mouseenter', () => { preview.play().catch(() => {}); });
|
| 903 |
+
card.addEventListener('mouseleave', () => { preview.pause(); preview.currentTime = 0.2; });
|
| 904 |
card.onclick = () => openModal(idx);
|
| 905 |
grid.appendChild(card);
|
| 906 |
}
|
src/humeo/cutter.py
CHANGED
|
@@ -18,13 +18,14 @@ from humeo.transcript_align import (
|
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
-
_NATIVE_HIGHLIGHT_FONT_NAME = "
|
| 22 |
_NATIVE_HIGHLIGHT_PURPLE = "&H00F65C8B"
|
| 23 |
_NATIVE_HIGHLIGHT_LEAD_SEC = 0.06
|
| 24 |
_NATIVE_HIGHLIGHT_MIN_DWELL_SEC = 0.16
|
| 25 |
_NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC = 0.035
|
| 26 |
_NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC = 1.65
|
| 27 |
-
_NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO = 0.
|
|
|
|
| 28 |
_NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE = r"\blur3.0"
|
| 29 |
_NATIVE_HIGHLIGHT_STOPWORDS = {
|
| 30 |
"a",
|
|
@@ -207,10 +208,24 @@ def _should_render_native_highlight_group(words) -> bool:
|
|
| 207 |
return any(token.lower() not in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned)
|
| 208 |
|
| 209 |
|
| 210 |
-
def _native_highlight_font_path() -> Path | None:
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
if path.is_file():
|
| 215 |
return path
|
| 216 |
return None
|
|
@@ -448,7 +463,14 @@ def _format_native_highlight_ass(
|
|
| 448 |
line_height = max(font_size, _text_height(font) + 6)
|
| 449 |
line_gap = max(8, int(round(font_size * 0.08)))
|
| 450 |
bottom_anchor = play_res_y - margin_v
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
header = (
|
| 454 |
"[Script Info]\n"
|
|
@@ -496,7 +518,10 @@ def _format_native_highlight_ass(
|
|
| 496 |
continue
|
| 497 |
line_text = " ".join(word.word for word in line_words)
|
| 498 |
line_top = block_top + line_idx * (line_height + line_gap)
|
| 499 |
-
|
|
|
|
|
|
|
|
|
|
| 500 |
events.append(
|
| 501 |
"Dialogue: 1,"
|
| 502 |
f"{_fmt_ass_time(cue_start)},{_fmt_ass_time(cue_end)},Base,,0,0,0,,"
|
|
@@ -587,12 +612,12 @@ def generate_ass(
|
|
| 587 |
prefer_break_on_punctuation = True
|
| 588 |
min_words_before_break = 5
|
| 589 |
elif render_theme == RenderTheme.NATIVE_HIGHLIGHT:
|
| 590 |
-
cue_words =
|
| 591 |
-
cue_sec = 2.
|
| 592 |
-
cue_font_size = max(font_size, 86)
|
| 593 |
-
cue_margin_v = max(margin_v, 300)
|
| 594 |
-
prefer_break_on_punctuation = True
|
| 595 |
-
min_words_before_break =
|
| 596 |
|
| 597 |
aligned_words = aligned.words
|
| 598 |
if render_theme == RenderTheme.NATIVE_HIGHLIGHT and repair_word_timings:
|
|
|
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
+
_NATIVE_HIGHLIGHT_FONT_NAME = "League Spartan"
|
| 22 |
_NATIVE_HIGHLIGHT_PURPLE = "&H00F65C8B"
|
| 23 |
_NATIVE_HIGHLIGHT_LEAD_SEC = 0.06
|
| 24 |
_NATIVE_HIGHLIGHT_MIN_DWELL_SEC = 0.16
|
| 25 |
_NATIVE_HIGHLIGHT_MIN_VALID_WORD_SEC = 0.035
|
| 26 |
_NATIVE_HIGHLIGHT_MAX_VALID_WORD_SEC = 1.65
|
| 27 |
+
_NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO = 0.74
|
| 28 |
+
_NATIVE_HIGHLIGHT_SAFE_MARGIN_X = 96
|
| 29 |
_NATIVE_HIGHLIGHT_ROUNDING_OVERRIDE = r"\blur3.0"
|
| 30 |
_NATIVE_HIGHLIGHT_STOPWORDS = {
|
| 31 |
"a",
|
|
|
|
| 208 |
return any(token.lower() not in _NATIVE_HIGHLIGHT_STOPWORDS for token in cleaned)
|
| 209 |
|
| 210 |
|
| 211 |
+
def _native_highlight_font_path() -> Path | None:
|
| 212 |
+
try:
|
| 213 |
+
import humeo_core
|
| 214 |
+
|
| 215 |
+
bundled = (
|
| 216 |
+
Path(humeo_core.__file__).resolve().parent
|
| 217 |
+
/ "assets"
|
| 218 |
+
/ "fonts"
|
| 219 |
+
/ "LeagueSpartan-Bold.ttf"
|
| 220 |
+
)
|
| 221 |
+
if bundled.is_file():
|
| 222 |
+
return bundled
|
| 223 |
+
except Exception:
|
| 224 |
+
pass
|
| 225 |
+
|
| 226 |
+
windows_fonts = Path(os.environ.get("WINDIR", r"C:\Windows")) / "Fonts"
|
| 227 |
+
for filename in ("arialbd.ttf", "Arialbd.ttf", "ARIALBD.TTF", "arial.ttf"):
|
| 228 |
+
path = windows_fonts / filename
|
| 229 |
if path.is_file():
|
| 230 |
return path
|
| 231 |
return None
|
|
|
|
| 463 |
line_height = max(font_size, _text_height(font) + 6)
|
| 464 |
line_gap = max(8, int(round(font_size * 0.08)))
|
| 465 |
bottom_anchor = play_res_y - margin_v
|
| 466 |
+
safe_margin_x = min(
|
| 467 |
+
int(round(play_res_x * 0.12)),
|
| 468 |
+
max(24, _NATIVE_HIGHLIGHT_SAFE_MARGIN_X),
|
| 469 |
+
)
|
| 470 |
+
max_line_width = min(
|
| 471 |
+
play_res_x * _NATIVE_HIGHLIGHT_MAX_LINE_WIDTH_RATIO,
|
| 472 |
+
play_res_x - (safe_margin_x * 2),
|
| 473 |
+
)
|
| 474 |
|
| 475 |
header = (
|
| 476 |
"[Script Info]\n"
|
|
|
|
| 518 |
continue
|
| 519 |
line_text = " ".join(word.word for word in line_words)
|
| 520 |
line_top = block_top + line_idx * (line_height + line_gap)
|
| 521 |
+
line_width = _text_width(font, line_text)
|
| 522 |
+
centered_left = (play_res_x - line_width) / 2.0
|
| 523 |
+
max_left = play_res_x - safe_margin_x - line_width
|
| 524 |
+
line_left = max(float(safe_margin_x), min(centered_left, max_left))
|
| 525 |
events.append(
|
| 526 |
"Dialogue: 1,"
|
| 527 |
f"{_fmt_ass_time(cue_start)},{_fmt_ass_time(cue_end)},Base,,0,0,0,,"
|
|
|
|
| 612 |
prefer_break_on_punctuation = True
|
| 613 |
min_words_before_break = 5
|
| 614 |
elif render_theme == RenderTheme.NATIVE_HIGHLIGHT:
|
| 615 |
+
cue_words = 6
|
| 616 |
+
cue_sec = 2.0
|
| 617 |
+
cue_font_size = max(font_size, 86)
|
| 618 |
+
cue_margin_v = max(margin_v, 300)
|
| 619 |
+
prefer_break_on_punctuation = True
|
| 620 |
+
min_words_before_break = 3
|
| 621 |
|
| 622 |
aligned_words = aligned.words
|
| 623 |
if render_theme == RenderTheme.NATIVE_HIGHLIGHT and repair_word_timings:
|
src/humeo/layout_vision.py
CHANGED
|
@@ -4,10 +4,11 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
import base64
|
| 6 |
import hashlib
|
| 7 |
-
import json
|
| 8 |
-
import logging
|
| 9 |
-
import os
|
| 10 |
-
import
|
|
|
|
| 11 |
import subprocess
|
| 12 |
from collections.abc import Iterable
|
| 13 |
from io import BytesIO
|
|
@@ -145,6 +146,21 @@ def _json_object_from_vision_response(raw: object) -> dict[str, Any]:
|
|
| 145 |
for item in raw:
|
| 146 |
if isinstance(item, dict):
|
| 147 |
return item
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
raise TypeError(f"Expected vision JSON object, got {type(raw).__name__}")
|
| 149 |
|
| 150 |
|
|
@@ -1319,7 +1335,7 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
|
|
| 1319 |
)
|
| 1320 |
if not response.text:
|
| 1321 |
raise RuntimeError("Gemini vision returned empty response")
|
| 1322 |
-
return _json_object_from_vision_response(
|
| 1323 |
|
| 1324 |
client = OpenAI(
|
| 1325 |
api_key=resolve_openrouter_api_key(),
|
|
@@ -1345,7 +1361,7 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
|
|
| 1345 |
text = _openai_message_text(response.choices[0].message.content)
|
| 1346 |
if not text:
|
| 1347 |
raise RuntimeError("OpenRouter vision returned empty response")
|
| 1348 |
-
return _json_object_from_vision_response(
|
| 1349 |
|
| 1350 |
|
| 1351 |
def _call_gemini_vision(keyframe_path: str, model_name: str) -> dict[str, Any]:
|
|
@@ -1496,9 +1512,15 @@ def _apply_layout_hint_fallbacks(
|
|
| 1496 |
raw = raw_by_clip.get(clip_id)
|
| 1497 |
if instr is None or raw is None or "error" not in raw:
|
| 1498 |
continue
|
| 1499 |
-
if instr.layout != LayoutKind.SIT_CENTER:
|
| 1500 |
-
continue
|
| 1501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1502 |
updated_raw = dict(raw)
|
| 1503 |
updated_raw["layout"] = hint.value
|
| 1504 |
updated_raw["layout_hint_fallback"] = hint.value
|
|
|
|
| 4 |
|
| 5 |
import base64
|
| 6 |
import hashlib
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
import struct
|
| 12 |
import subprocess
|
| 13 |
from collections.abc import Iterable
|
| 14 |
from io import BytesIO
|
|
|
|
| 146 |
for item in raw:
|
| 147 |
if isinstance(item, dict):
|
| 148 |
return item
|
| 149 |
+
if isinstance(raw, str):
|
| 150 |
+
text = raw.strip()
|
| 151 |
+
if text.startswith("```"):
|
| 152 |
+
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
|
| 153 |
+
text = re.sub(r"\s*```$", "", text)
|
| 154 |
+
text = "".join(ch if ch >= " " or ch in "\r\n\t" else " " for ch in text)
|
| 155 |
+
starts = [idx for idx in (text.find("{"), text.find("[")) if idx >= 0]
|
| 156 |
+
if starts:
|
| 157 |
+
decoder = json.JSONDecoder()
|
| 158 |
+
for start in sorted(starts):
|
| 159 |
+
try:
|
| 160 |
+
parsed, _ = decoder.raw_decode(text[start:])
|
| 161 |
+
except json.JSONDecodeError:
|
| 162 |
+
continue
|
| 163 |
+
return _json_object_from_vision_response(parsed)
|
| 164 |
raise TypeError(f"Expected vision JSON object, got {type(raw).__name__}")
|
| 165 |
|
| 166 |
|
|
|
|
| 1335 |
)
|
| 1336 |
if not response.text:
|
| 1337 |
raise RuntimeError("Gemini vision returned empty response")
|
| 1338 |
+
return _json_object_from_vision_response(response.text)
|
| 1339 |
|
| 1340 |
client = OpenAI(
|
| 1341 |
api_key=resolve_openrouter_api_key(),
|
|
|
|
| 1361 |
text = _openai_message_text(response.choices[0].message.content)
|
| 1362 |
if not text:
|
| 1363 |
raise RuntimeError("OpenRouter vision returned empty response")
|
| 1364 |
+
return _json_object_from_vision_response(text)
|
| 1365 |
|
| 1366 |
|
| 1367 |
def _call_gemini_vision(keyframe_path: str, model_name: str) -> dict[str, Any]:
|
|
|
|
| 1512 |
raw = raw_by_clip.get(clip_id)
|
| 1513 |
if instr is None or raw is None or "error" not in raw:
|
| 1514 |
continue
|
| 1515 |
+
if instr.layout != LayoutKind.SIT_CENTER:
|
| 1516 |
+
continue
|
| 1517 |
+
if hint == LayoutKind.SPLIT_CHART_PERSON:
|
| 1518 |
+
updated_raw = dict(raw)
|
| 1519 |
+
updated_raw["layout_hint_fallback_rejected"] = hint.value
|
| 1520 |
+
updated_raw["layout_hint_rejected_reason"] = "vision_failed_without_regions"
|
| 1521 |
+
raw_by_clip[clip_id] = updated_raw
|
| 1522 |
+
continue
|
| 1523 |
+
instructions[clip_id] = instr.model_copy(update={"layout": hint})
|
| 1524 |
updated_raw = dict(raw)
|
| 1525 |
updated_raw["layout"] = hint.value
|
| 1526 |
updated_raw["layout_hint_fallback"] = hint.value
|
src/humeo/pipeline.py
CHANGED
|
@@ -79,17 +79,21 @@ _PRESENTATION_REFERENCE_RE = re.compile(
|
|
| 79 |
|
| 80 |
|
| 81 |
def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return instruction.model_copy(
|
| 83 |
-
update=
|
| 84 |
-
"layout": LayoutKind.SIT_CENTER,
|
| 85 |
-
"zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
|
| 86 |
-
"split_chart_region": None,
|
| 87 |
-
"split_person_region": None,
|
| 88 |
-
"split_second_chart_region": None,
|
| 89 |
-
"split_second_person_region": None,
|
| 90 |
-
"chart_x_norm": 0.0,
|
| 91 |
-
"top_band_ratio": 0.5,
|
| 92 |
-
}
|
| 93 |
)
|
| 94 |
|
| 95 |
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
|
| 82 |
+
updates = {
|
| 83 |
+
"layout": LayoutKind.SIT_CENTER,
|
| 84 |
+
"zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
|
| 85 |
+
"person_tracking": [],
|
| 86 |
+
"split_chart_region": None,
|
| 87 |
+
"split_person_region": None,
|
| 88 |
+
"split_second_chart_region": None,
|
| 89 |
+
"split_second_person_region": None,
|
| 90 |
+
"chart_x_norm": 0.0,
|
| 91 |
+
"top_band_ratio": 0.5,
|
| 92 |
+
}
|
| 93 |
+
if instruction.split_person_region is not None:
|
| 94 |
+
updates["person_x_norm"] = float(instruction.split_person_region.center_x)
|
| 95 |
return instruction.model_copy(
|
| 96 |
+
update=updates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
|