Spaces:

moonlantern1
/

clipforge

Sleeping

App Files Files Community

moonlantern1 commited on 11 days ago

Commit

c86f8a6

verified ·

1 Parent(s): ed35a5c

Fix preview modal and false split layouts

Browse files

Files changed (4) hide show

.env.example +54 -0
app.py +3 -3
src/humeo/layout_vision.py +16 -15
src/humeo/pipeline.py +24 -28

.env.example ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copy to ".env" in the project root (or export in your shell). See docs/ENVIRONMENT.md.
+# --- Gemini / OpenRouter (clip selection + hooks + layout vision) ---
+# Choose the backend explicitly. Recommended for this pipeline: openrouter.
+HUMEO_LLM_PROVIDER=openrouter
+# Google AI Studio key (only needed when HUMEO_LLM_PROVIDER=google or auto).
+GOOGLE_API_KEY=
+# Legacy alias — only used if GOOGLE_API_KEY is unset:
+# GEMINI_API_KEY=
+# Recommended backend for all Gemini stages:
+OPENROUTER_API_KEY=
+# Model ids (override per run with: humeo ... --gemini-model <id>)
+GEMINI_MODEL=google/gemini-2.5-pro
+# Optional override for layout vision; leave blank to reuse GEMINI_MODEL.
+# GEMINI_VISION_MODEL=google/gemini-2.5-pro
+# Optional zip or directory with the viral hook library corpus.
+# HUMEO_HOOK_LIBRARY_PATH=C:\Users\you\Downloads\5000_viral_hooks.zip
+# Optional: directory with clip_selection_system.jinja2 and clip_selection_user.jinja2
+# HUMEO_PROMPTS_DIR=
+# --- Transcription ---
+# Recommended default: ElevenLabs Scribe v2 with No Verbatim cleanup.
+ELEVENLABS_API_KEY=
+ELEVENLABS_NO_VERBATIM=true
+HUMEO_TRANSCRIBE_PROVIDER=elevenlabs
+# Optional fallback if you deliberately use the OpenAI Whisper API instead.
+# OPENAI_API_KEY=sk-...
+# Optional local fallback:
+# HUMEO_TRANSCRIBE_PROVIDER=whisperx
+# --- Tracking fallback ---
+# REPLICATE_API_TOKEN=
+HUMEO_SEGMENTATION_PROVIDER=replicate
+# HUMEO_SEGMENTATION_MODEL=meta/sam-2-video
+# --- YouTube downloads on cloud hosts ---
+# Hugging Face/cloud IPs are sometimes blocked by YouTube. If link downloads fail
+# with a bot/sign-in message, export browser cookies in Netscape cookies.txt format,
+# base64 encode the file, and set this as a Hugging Face Space secret.
+# YTDLP_COOKIES_B64=
+# Advanced yt-dlp override:
+# YTDLP_EXTRACTOR_ARGS=youtube:player_client=default,web_creator
+# --- Video cache (optional) ---
+# Override default: ~/.cache/humeo (Unix) or %LOCALAPPDATA%/humeo (Windows)
+# HUMEO_CACHE_ROOT=

app.py CHANGED Viewed

@@ -570,10 +570,10 @@ INDEX_HTML = r"""<!DOCTYPE html>
   .regen-btn:hover { background: var(--ink-soft); }
   .modal-overlay { display: none; position: fixed; inset: 0; background: rgba(42,31,14,0.65); backdrop-filter: blur(6px); z-index: 500; align-items: center; justify-content: center; padding: 20px; animation: fadeIn 0.25s ease; }
   .modal-overlay.open { display: flex; }
-  .modal-box { background: var(--white); border-radius: var(--radius-lg); width: 100%; max-width: 390px; overflow: hidden; box-shadow: 0 24px 64px rgba(42,31,14,0.25); animation: slideUp 0.3s ease; }
   @keyframes slideUp { from { opacity: 0; transform: translateY(20px) scale(0.97); } to { opacity: 1; transform: translateY(0) scale(1); } }
-  .modal-video { aspect-ratio: 9/16; max-height: 70vh; display: flex; align-items: center; justify-content: center; position: relative; background:var(--ink); }
-  .modal-video video { width:100%; height:100%; object-fit:contain; background:#000; }
   .modal-footer { padding: 16px 20px; border-top: 1px solid var(--border); display: flex; align-items: center; justify-content: space-between; gap:12px; }
   .modal-clip-label { font-family: 'Cormorant Garamond', serif; font-size: 1.1rem; font-weight: 500; }
   .modal-actions { display:flex; align-items:center; gap:8px; }

   .regen-btn:hover { background: var(--ink-soft); }
   .modal-overlay { display: none; position: fixed; inset: 0; background: rgba(42,31,14,0.65); backdrop-filter: blur(6px); z-index: 500; align-items: center; justify-content: center; padding: 20px; animation: fadeIn 0.25s ease; }
   .modal-overlay.open { display: flex; }
+  .modal-box { background: var(--white); border-radius: var(--radius-lg); width: min(390px, calc((100vh - 130px) * 9 / 16), calc(100vw - 40px)); max-width: none; overflow: hidden; box-shadow: 0 24px 64px rgba(42,31,14,0.25); animation: slideUp 0.3s ease; }
   @keyframes slideUp { from { opacity: 0; transform: translateY(20px) scale(0.97); } to { opacity: 1; transform: translateY(0) scale(1); } }
+  .modal-video { width: 100%; aspect-ratio: 9/16; display: flex; align-items: center; justify-content: center; position: relative; background:#000; }
+  .modal-video video { width:100%; height:100%; object-fit:cover; background:#000; display:block; }
   .modal-footer { padding: 16px 20px; border-top: 1px solid var(--border); display: flex; align-items: center; justify-content: space-between; gap:12px; }
   .modal-clip-label { font-family: 'Cormorant Garamond', serif; font-size: 1.1rem; font-weight: 500; }
   .modal-actions { display:flex; align-items:center; gap:8px; }

src/humeo/layout_vision.py CHANGED Viewed

@@ -46,8 +46,8 @@ logger = logging.getLogger(__name__)
 LAYOUT_VISION_CACHE_VERSION = 8
 LAYOUT_VISION_META = "layout_vision.meta.json"
 LAYOUT_VISION_JSON = "layout_vision.json"
-TRACKING_SAMPLE_FRACTIONS = tuple(i / 10.0 for i in range(1, 10))
-TRACKING_MIN_SPREAD_NORM = 0.08
 TRACKING_OUTLIER_DELTA_NORM = 0.16
 TRACKING_OUTLIER_NEIGHBOR_MAX_NORM = 0.10
 TRACKING_DEADBAND_NORM = 0.025
@@ -1304,9 +1304,9 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
     provider = resolve_llm_provider()
     resolved_model = model_name_for_provider(model_name, provider)
-    if provider == "google":
-        client = genai.Client(api_key=resolve_gemini_api_key())
-        response = client.models.generate_content(
             model=resolved_model,
             contents=[
                 types.Part.from_text(text=prompt),
@@ -1338,10 +1338,10 @@ def _call_vision_json(keyframe_path: str, model_name: str, prompt: str) -> dict[
                     {"type": "image_url", "image_url": {"url": data_url}},
                 ],
             },
-        ],
-        temperature=0.2,
-        response_format={"type": "json_object"},
-    )
     text = _openai_message_text(response.choices[0].message.content)
     if not text:
         raise RuntimeError("OpenRouter vision returned empty response")
@@ -1376,12 +1376,13 @@ def infer_layout_instructions(
         sid = s.scene_id
         if not s.keyframe_path:
             logger.warning("No keyframe for %s; using sit_center.", sid)
-            out[sid] = LayoutInstruction(clip_id=sid, layout=LayoutKind.SIT_CENTER)
-            raw_by_clip[sid] = {"error": "no keyframe", "layout": "sit_center"}
-            continue
-        try:
-            data = _call_gemini_vision(s.keyframe_path, model_name)
-            image_size = _keyframe_dimensions(s.keyframe_path)
             instr = _instruction_from_gemini_json(
                 sid,
                 data,

 LAYOUT_VISION_CACHE_VERSION = 8
 LAYOUT_VISION_META = "layout_vision.meta.json"
 LAYOUT_VISION_JSON = "layout_vision.json"
+TRACKING_SAMPLE_FRACTIONS = tuple(i / 10.0 for i in range(1, 10))
+TRACKING_MIN_SPREAD_NORM = 0.08
 TRACKING_OUTLIER_DELTA_NORM = 0.16
 TRACKING_OUTLIER_NEIGHBOR_MAX_NORM = 0.10
 TRACKING_DEADBAND_NORM = 0.025
     provider = resolve_llm_provider()
     resolved_model = model_name_for_provider(model_name, provider)
+    if provider == "google":
+        client = genai.Client(api_key=resolve_gemini_api_key())
+        response = client.models.generate_content(
             model=resolved_model,
             contents=[
                 types.Part.from_text(text=prompt),
                     {"type": "image_url", "image_url": {"url": data_url}},
                 ],
             },
+        ],
+        temperature=0.2,
+        response_format={"type": "json_object"},
+    )
     text = _openai_message_text(response.choices[0].message.content)
     if not text:
         raise RuntimeError("OpenRouter vision returned empty response")
         sid = s.scene_id
         if not s.keyframe_path:
             logger.warning("No keyframe for %s; using sit_center.", sid)
+            out[sid] = LayoutInstruction(clip_id=sid, layout=LayoutKind.SIT_CENTER)
+            raw_by_clip[sid] = {"error": "no keyframe", "layout": "sit_center"}
+            continue
+        try:
+            logger.info("Layout vision for %s (model=%s)...", sid, model_name)
+            data = _call_gemini_vision(s.keyframe_path, model_name)
+            image_size = _keyframe_dimensions(s.keyframe_path)
             instr = _instruction_from_gemini_json(
                 sid,
                 data,

src/humeo/pipeline.py CHANGED Viewed

@@ -69,13 +69,28 @@ _NATIVE_HIGHLIGHT_MAX_TOP_ANCHORED_PERSON_Y1 = 0.12
 _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM = 1.20
 _PRESENTATION_REFERENCE_RE = re.compile(
     r"\b("
-    r"as you can see|you can see|what you can see|look at|take a look|shown here|"
     r"shown on|on the screen|on this slide|this chart|the chart|this graph|"
     r"the graph|this slide|this matrix|the matrix|red line|yellow line|"
     r"blue line|green line|top there|bottom there|x-axis|y-axis"
     r")\b",
     flags=re.IGNORECASE,
 )
 def _rerun_config(config: PipelineConfig, steering_notes: list[str]) -> PipelineConfig:
@@ -286,34 +301,15 @@ def _normalize_layout_for_render(
 ) -> LayoutInstruction:
     if render_theme != RenderTheme.NATIVE_HIGHLIGHT:
         return instruction
-    if instruction.layout != LayoutKind.SPLIT_CHART_PERSON:
-        return instruction
-    chart = instruction.split_chart_region
-    person = instruction.split_person_region
-    if chart is None or person is None:
-        return instruction
-    chart_dominates = chart.y2 >= _NATIVE_HIGHLIGHT_CHART_DOMINANCE_Y2
-    person_too_small = person.width <= _NATIVE_HIGHLIGHT_MIN_PERSON_WIDTH
-    # Keep Bryan's newer head-and-shoulders presenter crops in split mode even
-    # when the speaker strip is narrow; the older fallback-to-center rule was
-    # written for lower-anchored full-body crops that rendered badly here.
-    person_is_top_anchored = person.y1 <= _NATIVE_HIGHLIGHT_MAX_TOP_ANCHORED_PERSON_Y1
-    if not (chart_dominates and person_too_small and not person_is_top_anchored):
-        return instruction
-    if clip is not None and _clip_references_presentation(clip):
         return instruction
-    return instruction.model_copy(
-        update={
-            "layout": LayoutKind.SIT_CENTER,
-            "zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
-            "split_chart_region": None,
-            "split_person_region": None,
-            "split_second_chart_region": None,
-            "split_second_person_region": None,
-            "chart_x_norm": 0.0,
-            "top_band_ratio": 0.5,
-        }
-    )
 def _load_layout_raw_by_clip(work_dir: Path) -> dict[str, dict]:

 _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM = 1.20
 _PRESENTATION_REFERENCE_RE = re.compile(
     r"\b("
+    r"as you can(?: also)? see|you can(?: also)? see|what you can(?: also)? see|look at|take a look|shown here|"
     r"shown on|on the screen|on this slide|this chart|the chart|this graph|"
     r"the graph|this slide|this matrix|the matrix|red line|yellow line|"
     r"blue line|green line|top there|bottom there|x-axis|y-axis"
     r")\b",
     flags=re.IGNORECASE,
 )
+def _split_chart_person_to_center(instruction: LayoutInstruction) -> LayoutInstruction:
+    return instruction.model_copy(
+        update={
+            "layout": LayoutKind.SIT_CENTER,
+            "zoom": max(float(instruction.zoom), _NATIVE_HIGHLIGHT_SPLIT_TO_CENTER_MIN_ZOOM),
+            "split_chart_region": None,
+            "split_person_region": None,
+            "split_second_chart_region": None,
+            "split_second_person_region": None,
+            "chart_x_norm": 0.0,
+            "top_band_ratio": 0.5,
+        }
+    )
 def _rerun_config(config: PipelineConfig, steering_notes: list[str]) -> PipelineConfig:
 ) -> LayoutInstruction:
     if render_theme != RenderTheme.NATIVE_HIGHLIGHT:
         return instruction
+    if instruction.layout != LayoutKind.SPLIT_CHART_PERSON:
         return instruction
+    if clip is None or not _clip_references_presentation(clip):
+        return _split_chart_person_to_center(instruction)
+    chart = instruction.split_chart_region
+    person = instruction.split_person_region
+    if chart is None or person is None:
+        return _split_chart_person_to_center(instruction)
+    return instruction
 def _load_layout_raw_by_clip(work_dir: Path) -> dict[str, dict]: