Spaces:

Rafii
/

videovoice-dramabox

Running on Zero

App Files Files Community

github-actions[bot] commited on 8 days ago

Commit

0422215

1 Parent(s): 10b6cf0

deploy: switch to dramabox requirements @ a95fda4

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +35 -0
.gitattributes +14 -35
.gitignore +31 -0
CLAUDE.md +13 -0
LICENSE +21 -0
README.md +330 -8
app.py +80 -0
graphify-out/.graphify_python +1 -0
graphify-out/.graphify_root +1 -0
graphify-out/GRAPH_REPORT.md +465 -0
graphify-out/graph.html +0 -0
packages.txt +4 -0
pipeline.py +363 -0
pyproject.toml +59 -0
requirements-cbox.txt +51 -0
requirements-omni.txt +157 -0
requirements-qwen3.txt +55 -0
requirements.txt +62 -0
scripts/prefetch_models.py +47 -0
server.py +929 -0
social_distributor/.env.example +16 -0
social_distributor/.gitignore +8 -0
social_distributor/README.md +205 -0
social_distributor/post.py +311 -0
social_distributor/poster/__init__.py +0 -0
social_distributor/poster/auth/__init__.py +0 -0
social_distributor/poster/auth/session.py +111 -0
social_distributor/poster/caption_gen.py +164 -0
social_distributor/poster/config.py +88 -0
social_distributor/poster/creator_extract.py +149 -0
social_distributor/poster/models.py +29 -0
social_distributor/poster/platforms/__init__.py +0 -0
social_distributor/poster/platforms/base.py +57 -0
social_distributor/poster/platforms/instagram.py +206 -0
social_distributor/poster/platforms/tiktok.py +155 -0
social_distributor/poster/platforms/youtube.py +165 -0
social_distributor/poster/post_log.py +45 -0
social_distributor/poster/video_loader.py +101 -0
social_distributor/pyproject.toml +20 -0
social_distributor/uv.lock +0 -0
steps/__init__.py +1 -0
steps/lang/__init__.py +38 -0
steps/lang/_shared.py +150 -0
steps/lang/omnivoice_languages.py +652 -0
steps/lang/qwen3_languages.py +15 -0
steps/lang/urdu.py +324 -0
steps/s1_extract_audio.py +68 -0
steps/s1b_separate.py +152 -0
steps/s2_transcribe.py +395 -0
steps/s3_translate.py +195 -0

.env.example ADDED Viewed

	@@ -0,0 +1,35 @@

+# VideoVoice — Environment Variables
+# Copy this to .env and fill in values
+# Server port (default 8000)
+PORT=8000
+# Where per-job artifact folders get written. On HF Spaces this is resolved
+# automatically (/data/jobs with persistent storage, /tmp/videovoice_jobs
+# without). For local dev, set this to ./data so jobs land next to the repo
+# — same layout the old `main` used.
+ARTIFACTS_ROOT=./data
+# OpenAI API key (for translation step)
+OPENAI_API_KEY=sk-...
+# Pollinations API key (optional, for Whisper transcription fallback)
+POLLINATIONS_API_KEY=
+POLLEN_TRANSCRIBE_MODEL=whisper-large-v3
+POLLEN_MODEL=gemini-search
+# Stripe (optional, for paid tiers)
+STRIPE_PUBLISHABLE_KEY=
+STRIPE_SECRET_KEY=
+# AWS S3 (optional, for cloud storage)
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_S3_BUCKET=
+AWS_REGION=us-east-1
+# AWS Bedrock (optional, fallback translator for Urdu)
+AWS_BEDROCK_API_KEY=
+BEDROCK_MODEL=qwen.qwen3-next-80b-a3b
+HF_TOKEN=

.gitattributes CHANGED Viewed

@@ -1,35 +1,14 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Files in this repo that are dev-only and must NOT ship to the HF Spaces.
+# `deploy.sh` honors this via `git archive --worktree-attributes`.
+# Rule of thumb: if HF Spaces would never import/execute it, export-ignore it.
+# Do NOT export-ignore server.py — app.py imports from it at runtime on HF.
+.github/          export-ignore
+SPLIT_STRATEGY.md export-ignore
+deploy.sh         export-ignore
+Dockerfile        export-ignore
+.dockerignore     export-ignore
+social_media_distributor/ export-ignore
+frontend/         export-ignore
+batch_translate.py export-ignore
+client_insta_links.jsonl export-ignore

.gitignore ADDED Viewed

	@@ -0,0 +1,31 @@

+*.wav
+*.mp4
+*.mov
+*.webp
+*.ass
+*.txt
+!requirements.txt
+!requirements-cbox.txt
+!requirements-omni.txt
+!requirements-qwen3.txt
+!packages.txt
+!SPLIT_STRATEGY.md
+*.DS_Store
+.env
+.venv/
+__pycache__/
+**/__pycache__/
+*.py[cod]
+*$py.class
+*.json
+!data/showcase.json
+tmp/
+uploads/
+outputs/
+data/
+batch_outputs/
+# Subproject runtime artifacts (not for HF Space)
+social_distributor/.venv/
+social_distributor/poster/auth/storage/
+social_distributor/debug_*.png
+fine_tuning/

CLAUDE.md ADDED Viewed

	@@ -0,0 +1,13 @@

+## Deployment
+HF Spaces deployment is fully automated via `.github/workflows/deploy-hf.yml`. Pushing to `origin/main` triggers the workflow which runs `./deploy.sh --force` and pushes to all three Spaces (Chatterbox, OmniVoice, Qwen3). Do not run `./deploy.sh` locally after a push — it is redundant. To verify a deploy, use `gh run list --workflow=deploy-hf.yml`.
+## graphify
+This project has a graphify knowledge graph at graphify-out/.
+Rules:
+- Before answering architecture or codebase questions, read graphify-out/GRAPH_REPORT.md for god nodes and community structure
+- If graphify-out/wiki/index.md exists, navigate it instead of reading raw files
+- For cross-module "how does X relate to Y" questions, prefer `graphify query "<question>"`, `graphify path "<A>" "<B>"`, or `graphify explain "<concept>"` over grep — these traverse the graph's EXTRACTED + INFERRED edges instead of scanning files
+- After modifying code files in this session, run `graphify update .` to keep the graph current (AST-only, no API cost)

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 Raafi
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,335 @@
 ---
-title: Videovoice Dramabox
-emoji: 👀
-colorFrom: indigo
-colorTo: purple
 sdk: gradio
-sdk_version: 6.14.0
-python_version: '3.12'
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: VideoVoice API
 sdk: gradio
+sdk_version: 6.12.0
 app_file: app.py
+python_version: "3.10"
 ---
+<!--
+  ZeroGPU is enabled from the Space Settings UI (not via frontmatter).
+  PRO account required. `app.py` mounts the FastAPI pipeline onto Gradio
+  so the React client keeps calling `/api/*` over CORS unchanged.
+-->
+# VideoVoice
+**AI-powered short video translation with zero-shot voice cloning.**
+Translate any short video (≤60s) into 23+ languages while preserving the original speaker's voice. Paste an Instagram Reel, YouTube Short, or upload any video file.
+---
+## How It Works
+1. **Upload or Paste URL** — Drop a video file or paste a social media link
+2. **AI Translates & Clones** — Our 6-step pipeline transcribes, translates, and synthesizes new speech using a voice clone of the original speaker
+3. **Preview & Download** — Watch your translated video and download in full quality
+### Pipeline Architecture
+```
+Video → Extract Audio → Whisper Transcription → LLM Translation
+      → Chatterbox Voice Clone + TTS → Time-Sync → Final Merge
+```
+| Step | Component | Description |
+|------|-----------|-------------|
+| 1 | FFmpeg | Extract audio track from video |
+| 2 | Whisper Large V3 | Transcribe with word-level timestamps |
+| 3 | GPT-4o-mini | Context-aware subtitle translation |
+| 4 | Chatterbox Multilingual | Zero-shot voice cloning + TTS synthesis |
+| 5 | Dynamic Time-Stretch | Align translated audio to original timing |
+| 6 | FFmpeg | Merge new audio track back into video |
+---
+## Running Locally
+### Prerequisites
+- Python 3.10+ (`requires-python = ">=3.10,<3.13"`)
+- FFmpeg (`brew install ffmpeg` on macOS, `sudo apt install ffmpeg` on Ubuntu)
+- An OpenAI API key
+### First-time setup
+```bash
+# 1. Install uv (skip if you already have it)
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# 2. Clone and enter the repo
+git clone https://github.com/Video-Voice/VideoVoice-be.git
+cd VideoVoice-be
+# 3. Install deps with the chatterbox TTS engine (default for local dev)
+#    Use `--extra omnivoice` instead if you want OmniVoice. The two extras
+#    are mutually exclusive — pick one.
+uv sync --extra chatterbox
+# 4. Configure env vars
+cp .env.example .env
+# Edit .env — at minimum set OPENAI_API_KEY and ARTIFACTS_ROOT=./data
+```
+### One-time: hide the vendored chatterbox folder
+The repo ships a vendored `./chatterbox/` folder that the HF Chatterbox Space needs (it has ZeroGPU-specific tweaks). Locally we want Python to import the PyPI `chatterbox-tts` package instead, so tell git to ignore the working-tree state for that folder and delete it locally:
+```bash
+git ls-files chatterbox/ | xargs git update-index --skip-worktree
+rm -rf chatterbox/
+```
+HEAD still contains the folder, so HF deploys are unaffected. Reverse with `git update-index --no-skip-worktree` + `git checkout HEAD -- chatterbox/`.
+### Run the server
+```bash
+uv run python server.py
+```
+Open [http://localhost:8000](http://localhost:8000). `/api/*` are the backend routes; `/` serves the legacy static UI in `frontend/`. If the port is in use, set `PORT=8001`.
+Per-job artifacts land in `$ARTIFACTS_ROOT/<job_id>/`. With `ARTIFACTS_ROOT=./data` (in `.env`) that's `./data/<job_id>/` next to the repo — same layout the repo has always used.
+### Run the pipeline headlessly
+```bash
+uv run python pipeline.py --input data/my_video.mp4 --target-lang Spanish
+```
+---
+## API Reference
+The following endpoints are available on the backend (FastAPI/Gradio Server). When running on Hugging Face, replace `localhost:8000` with your Space's API URL (e.g., `https://rafii-videovoice.hf.space`).
+### Core Endpoints
+#### `POST /api/jobs`
+Submit a video for translation. You can provide either a local file or a URL.
+**Form Data:**
+- `file`: (Optional) Video file upload (MP4, MOV, WebM, ≤90MB).
+- `url`: (Optional) Social media URL (Instagram, YouTube, TikTok).
+- `target_language`: (Required) Name of target language (e.g., "Spanish", "Hindi").
+- `source_language`: (Optional) ISO code of source (default: "en").
+- `voice_mode`: (Optional) `chatterbox` or `omnivoice` (must match Space engine).
+- `captions`: (Optional) "true" or "false" (default: "true").
+- `preserve_music`: (Optional) "true" or "false" (default: "false").
+**Example:**
+```bash
+curl -X POST http://localhost:8000/api/jobs \
+  -F "file=@my_video.mp4" \
+  -F "target_language=French"
+```
+#### `GET /api/jobs/{job_id}`
+Poll for the real-time status and progress messages of a specific job.
+**Query Parameters:**
+- `after`: (Optional) Index of the last message received to fetch only new ones.
+**Example:**
+```bash
+curl http://localhost:8000/api/jobs/abc123_1?after=5
+```
+#### `GET /api/jobs/{job_id}/result`
+Download the final translated video file.
+**Example:**
+```bash
+curl -O -L http://localhost:8000/api/jobs/abc123_1/result
+```
+---
+### Utility & Configuration
+#### `GET /api/config`
+Fetch server configuration, including supported languages, max file size, and the active TTS engine.
+#### `GET /api/health`
+Check if the server is alive and see GPU availability/queue depth.
+#### `GET /api/showcase`
+Retrieve curated "before & after" demo entries defined in `data/showcase.json`.
+#### `GET /api/demo-videos`
+List all whitelisted demo videos available for streaming from the `outputs/` and `data/` folders.
+#### `GET /api/demo-videos/{video_id}/stream`
+Stream a specific demo video by its opaque ID.
+---
+### Interactive / Preview Endpoints
+#### `GET /api/jobs/{job_id}/preview/{model_name}`
+Retrieve a short audio snippet of the cloned voice for a specific TTS model before proceeding with full synthesis.
+#### `POST /api/jobs/{job_id}/select-model`
+Confirm which TTS model to use after listening to previews (used in multi-model workflows).
+---
+### ZeroGPU / Gradio Internal API
+#### `POST /run_pipeline` (Gradio API)
+Internal endpoint used by ZeroGPU to trigger the heavy ML processing logic. Recommended for use via `gradio_client`.
+**Example (Python):**
+```python
+from gradio_client import Client
+client = Client("Rafii/videovoice")
+client.predict(job_id="abc123_1", api_name="/run_pipeline")
+```
+---
+## Testing the API (Hugging Face Spaces)
+When running on Hugging Face Spaces (using `app.py`), you can test the API using standard HTTP tools or the Gradio Client. Choose the Space corresponding to the desired TTS engine:
+| TTS Engine | Space URL | API Endpoint |
+|------------|-----------|--------------|
+| **Chatterbox** | `Rafii/videovoice` | `https://rafii-videovoice.hf.space` |
+| **OmniVoice** | `Rafii/videovoice-omni` | `https://rafii-videovoice-omni.hf.space` |
+### 1. Using `curl` (FastAPI Routes)
+You can check the health of the API and verify that it's running:
+```bash
+# Chatterbox Space
+curl https://rafii-videovoice.hf.space/api/health
+# OmniVoice Space
+curl https://rafii-videovoice-omni.hf.space/api/health
+```
+To submit a job via the standard API:
+```bash
+curl -X POST https://rafii-videovoice.hf.space/api/jobs \
+  -F "url=https://www.instagram.com/reels/XYZ/" \
+  -F "target_language=Spanish"
+```
+### 2. Using `gradio_client` (Gradio API Routes)
+The `gradio.Server` endpoints are optimized for ZeroGPU and can be accessed using the Python `gradio_client`:
+```python
+from gradio_client import Client
+# Change to "Rafii/videovoice-omni" for OmniVoice
+client = Client("Rafii/videovoice")
+result = client.predict(
+    job_id="abc123",
+    api_name="/run_pipeline"
+)
+print(result)
+```
+### 3. Using JavaScript (Frontend)
+The new `gradio.Server` mode is designed for custom frontends. You can use the `@gradio/client` JS library:
+```javascript
+import { Client } from "@gradio/client";
+// Connect to the specific Space
+const client = await Client.connect("Rafii/videovoice");
+const result = await client.predict("/run_pipeline", {
+    job_id: "abc123",
+});
+```
+---
+## Supported Languages
+Spanish, French, German, Hindi, Portuguese, Italian, Japanese, Chinese, Arabic, Korean — and more.
+---
+## Project Structure
+```
+VideoVoice/
+├── server.py            # FastAPI backend
+├── pipeline.py          # Core translation pipeline
+├── steps/               # Pipeline step modules
+│   ├── s1_extract_audio.py
+│   ├── s2_transcribe.py
+│   ├── s3_translate.py
+│   ├── s4_tts.py
+│   ├── s5_sync.py
+│   └── s6_merge.py
+├── frontend/            # Static web UI
+│   ├── index.html
+│   ├── style.css
+│   └── app.js
+├── pyproject.toml       # Dependencies & project config
+├── uv.lock              # Lockfile (reproducible installs)
+├── .env.example
+└── README.md
+```
+---
+## Entrypoints
+Two files intentionally exist, run in different contexts, but **ship the same code**:
+| File | When it runs | What it does |
+|------|-------------|--------------|
+| `server.py` | Local dev (`uv run python server.py`) | Plain FastAPI app — defines every `/api/*` route. |
+| `app.py`    | Hugging Face Spaces               | Gradio Server that imports `server.py`'s router and wraps it with `@spaces.GPU` for ZeroGPU. |
+`app.py` depends on `server.py`, so server.py must ship to HF. Do not strip it.
+## Deployment
+### Hugging Face Spaces (production)
+Push to `main` → GitHub Actions runs `.github/workflows/deploy-hf.yml` → both Spaces (`Rafii/videovoice` and `Rafii/videovoice-omni`) redeploy automatically. No manual step.
+One-time CI setup:
+1. Create an HF access token with write access to both Spaces: https://huggingface.co/settings/tokens
+2. Add it as `HF_TOKEN` under **Settings → Secrets and variables → Actions** in the GitHub repo.
+Manual fallback (from a local clean checkout with `space` and `space-omni` remotes configured):
+```bash
+./deploy.sh          # skips if remote is already at HEAD
+./deploy.sh --force  # always redeploy
+```
+Files filtered out of every Space deploy are listed in `.gitattributes` (`export-ignore`).
+### Branching
+`main` is canonical. Use short-lived `feat/<thing>` branches, open a PR, merge, delete. Never maintain a parallel deploy branch — every change on main reaches both Spaces via CI.
+### AWS (alternative GPU host)
+```bash
+# On a g4dn.xlarge instance
+sudo apt update && sudo apt install -y ffmpeg
+curl -LsSf https://astral.sh/uv/install.sh | sh
+uv sync
+uv run python server.py
+```
+Recommended: use `systemd` service for auto-restart, CloudFront for CDN, S3 for video storage with 24h auto-delete lifecycle policy.
+---
+## License
+MIT License — see [LICENSE](LICENSE).

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+ZeroGPU-compatible entrypoint using gradio.Server.
+Server extends FastAPI, so all your existing API routes work unchanged.
+"""
+from __future__ import annotations
+import os
+# 1. Lightweight imports only at top level
+import spaces
+import gradio as gr
+from gradio import Server
+from gradio.data_classes import FileData
+from fastapi import Request
+from slowapi.errors import RateLimitExceeded
+from slowapi import _rate_limit_exceeded_handler
+TTS_ENGINE = os.getenv("TTS_ENGINE", "chatterbox").lower()
+# 2. Create Server instead of FastAPI
+# Name it 'demo' so HF Space picks it up automatically
+demo = Server()
+# -----------------------------------------------------
+# INTEGRATE SERVER.PY ROUTES
+# -----------------------------------------------------
+from server import router, limiter, enforce_content_length_limit
+from tools_api import router as tools_router
+demo.include_router(router)
+demo.include_router(tools_router)
+demo.state.limiter = limiter
+demo.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+# Apply content length middleware to the main app
+@demo.middleware("http")
+async def content_length_middleware(request: Request, call_next):
+    return await enforce_content_length_limit(request, call_next)
+@demo.get("/api/health")
+def health():
+    return {"status": "ok", "tts": TTS_ENGINE}
+# -----------------------------------------------------
+# ZERO GPU FUNCTION — lazy-loads torch/CUDA
+# -----------------------------------------------------
+@spaces.GPU(duration=60)
+def run_pipeline(job_id: str):
+    from pipeline import process_job
+    return process_job(job_id)
+# -----------------------------------------------------
+# GRADIO API INTEGRATION (this is what ZeroGPU detects)
+# -----------------------------------------------------
+@demo.api(name="run_pipeline")
+def api_run_pipeline(job_id: str):
+    """
+    Exposed through Gradio's API engine.
+    ZeroGPU will allocate a GPU when this endpoint is called.
+    """
+    return run_pipeline(job_id)
+# -----------------------------------------------------
+# OPTIONAL: Gradio UI (if you still want a basic UI)
+# -----------------------------------------------------
+with gr.Blocks(title="VideoVoice API") as ui:
+    gr.Markdown(f"# VideoVoice API ({TTS_ENGINE.upper()})")
+    job_id_box = gr.Textbox(label="Job ID")
+    output_box = gr.Textbox(label="Result")
+    btn = gr.Button("Run Pipeline")
+    btn.click(fn=run_pipeline, inputs=job_id_box, outputs=output_box)
+# Mount the UI onto the Server instance
+gr.mount_gradio_app(demo, ui, path="/ui")
+# -----------------------------------------------------
+# ENTRYPOINT
+# -----------------------------------------------------
+if __name__ == "__main__":
+    demo.launch(show_error=True)

graphify-out/.graphify_python ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/rafa/.local/share/uv/tools/graphifyy/bin/python

graphify-out/.graphify_root ADDED Viewed

	@@ -0,0 +1 @@


1	+ /Users/rafa/MscAi/VideoVoice-be

graphify-out/GRAPH_REPORT.md ADDED Viewed

	@@ -0,0 +1,465 @@

+# Graph Report - VideoVoice-be  (2026-05-17)
+## Corpus Check
+- 60 files · ~254,726 words
+- Verdict: corpus is large enough that graph structure adds value.
+## Summary
+- 1065 nodes · 1859 edges · 64 communities detected
+- Extraction: 79% EXTRACTED · 21% INFERRED · 0% AMBIGUOUS · INFERRED: 397 edges (avg confidence: 0.62)
+- Token cost: 0 input · 0 output
+## Community Hubs (Navigation)
+- [[_COMMUNITY_Community 0|Community 0]]
+- [[_COMMUNITY_Community 1|Community 1]]
+- [[_COMMUNITY_Community 2|Community 2]]
+- [[_COMMUNITY_Community 3|Community 3]]
+- [[_COMMUNITY_Community 4|Community 4]]
+- [[_COMMUNITY_Community 5|Community 5]]
+- [[_COMMUNITY_Community 6|Community 6]]
+- [[_COMMUNITY_Community 7|Community 7]]
+- [[_COMMUNITY_Community 8|Community 8]]
+- [[_COMMUNITY_Community 9|Community 9]]
+- [[_COMMUNITY_Community 10|Community 10]]
+- [[_COMMUNITY_Community 11|Community 11]]
+- [[_COMMUNITY_Community 12|Community 12]]
+- [[_COMMUNITY_Community 13|Community 13]]
+- [[_COMMUNITY_Community 14|Community 14]]
+- [[_COMMUNITY_Community 15|Community 15]]
+- [[_COMMUNITY_Community 16|Community 16]]
+- [[_COMMUNITY_Community 17|Community 17]]
+- [[_COMMUNITY_Community 18|Community 18]]
+- [[_COMMUNITY_Community 19|Community 19]]
+- [[_COMMUNITY_Community 20|Community 20]]
+- [[_COMMUNITY_Community 21|Community 21]]
+- [[_COMMUNITY_Community 22|Community 22]]
+- [[_COMMUNITY_Community 23|Community 23]]
+- [[_COMMUNITY_Community 25|Community 25]]
+- [[_COMMUNITY_Community 33|Community 33]]
+- [[_COMMUNITY_Community 34|Community 34]]
+- [[_COMMUNITY_Community 35|Community 35]]
+- [[_COMMUNITY_Community 36|Community 36]]
+- [[_COMMUNITY_Community 37|Community 37]]
+- [[_COMMUNITY_Community 38|Community 38]]
+- [[_COMMUNITY_Community 39|Community 39]]
+- [[_COMMUNITY_Community 40|Community 40]]
+- [[_COMMUNITY_Community 41|Community 41]]
+- [[_COMMUNITY_Community 42|Community 42]]
+- [[_COMMUNITY_Community 43|Community 43]]
+- [[_COMMUNITY_Community 44|Community 44]]
+- [[_COMMUNITY_Community 45|Community 45]]
+- [[_COMMUNITY_Community 46|Community 46]]
+- [[_COMMUNITY_Community 47|Community 47]]
+- [[_COMMUNITY_Community 48|Community 48]]
+- [[_COMMUNITY_Community 49|Community 49]]
+- [[_COMMUNITY_Community 50|Community 50]]
+- [[_COMMUNITY_Community 51|Community 51]]
+- [[_COMMUNITY_Community 52|Community 52]]
+- [[_COMMUNITY_Community 53|Community 53]]
+- [[_COMMUNITY_Community 54|Community 54]]
+- [[_COMMUNITY_Community 55|Community 55]]
+- [[_COMMUNITY_Community 56|Community 56]]
+- [[_COMMUNITY_Community 57|Community 57]]
+- [[_COMMUNITY_Community 58|Community 58]]
+- [[_COMMUNITY_Community 59|Community 59]]
+- [[_COMMUNITY_Community 60|Community 60]]
+- [[_COMMUNITY_Community 61|Community 61]]
+- [[_COMMUNITY_Community 62|Community 62]]
+- [[_COMMUNITY_Community 63|Community 63]]
+- [[_COMMUNITY_Community 64|Community 64]]
+- [[_COMMUNITY_Community 65|Community 65]]
+- [[_COMMUNITY_Community 66|Community 66]]
+- [[_COMMUNITY_Community 67|Community 67]]
+- [[_COMMUNITY_Community 68|Community 68]]
+- [[_COMMUNITY_Community 69|Community 69]]
+- [[_COMMUNITY_Community 70|Community 70]]
+- [[_COMMUNITY_Community 71|Community 71]]
+## God Nodes (most connected - your core abstractions)
+1. `Qwen3TTSSpeakerEncoderConfig` - 49 edges
+2. `Qwen3TTSTalkerCodePredictorConfig` - 49 edges
+3. `Qwen3TTSTalkerConfig` - 49 edges
+4. `Qwen3TTSConfig` - 48 edges
+5. `Qwen3TTSModel` - 21 edges
+6. `PostResult` - 19 edges
+7. `Qwen3TTSTalkerForConditionalGeneration` - 19 edges
+8. `Qwen3TTSTalkerCodePredictorModelForConditionalGeneration` - 17 edges
+9. `generate()` - 15 edges
+10. `BasePoster` - 14 edges
+## Surprising Connections (you probably didn't know these)
+- `chatterbox-tts==0.1.7 --no-deps` --semantically_similar_to--> `omnivoice>=0.1.4`  [INFERRED] [semantically similar]
+  requirements.txt → requirements-omni.txt
+- `gradio==6.8.0` --semantically_similar_to--> `gradio==6.12.0 (omni)`  [INFERRED] [semantically similar]
+  requirements.txt → requirements-omni.txt
+- `enforce_content_length_limit()` --calls--> `content_length_middleware()`  [INFERRED]
+  server.py → app.py
+- `run_pipeline()` --calls--> `separate_audio()`  [INFERRED]
+  pipeline.py → steps/s1b_separate.py
+- `run_pipeline()` --calls--> `transcribe()`  [INFERRED]
+  pipeline.py → steps/s2_transcribe.py
+## Hyperedges (group relationships)
+- **Six-step translation pipeline** —  [EXTRACTED 1.00]
+- **TTS engine split (env, two reqs files, two spaces, conditional imports)** —  [EXTRACTED 1.00]
+- **Live pipeline run (s1b->s2->s3->s4->s5->s6)** —  [EXTRACTED 1.00]
+## Communities
+### Community 0 - "Community 0"
+Cohesion: 0.04
+Nodes (70): Qwen3TTSConfig, Qwen3TTSSpeakerEncoderConfig, Qwen3TTSTalkerCodePredictorConfig, Qwen3TTSTalkerConfig, r"""     This is the configuration class to store the configuration of a [`Qwen3, r"""     This is the configuration class to store the configuration of a [`Qwen3, This is the configuration class to store the configuration of a [`Qwen3TTSForCon, r"""     This is the configuration class to store the configuration of a [`Qwen3 (+62 more)
+### Community 1 - "Community 1"
+Cohesion: 0.02
+Nodes (118): api_run_pipeline(), content_length_middleware(), ZeroGPU-compatible entrypoint using gradio.Server. Server extends FastAPI, so al, Exposed through Gradio's API engine.     ZeroGPU will allocate a GPU when this e, run_pipeline(), BaseHTTPMiddleware, BaseModel, _artifact_reaper_loop() (+110 more)
+### Community 2 - "Community 2"
+Cohesion: 0.04
+Nodes (38): default(), DistributedGroupResidualVectorQuantization, DistributedResidualVectorQuantization, ema_inplace(), EuclideanCodebook, kmeans(), laplace_smoothing(), postprocess_emb() (+30 more)
+### Community 3 - "Community 3"
+Cohesion: 0.05
+Nodes (57): ABC, BasePoster, Abstract base class for platform posters., Save a debug screenshot on failure., BasePoster, _build_system_prompt(), _build_user_prompt(), format_caption() (+49 more)
+### Community 4 - "Community 4"
+Cohesion: 0.06
+Nodes (31): _audio_to_tuple(), _build_choices_and_map(), build_demo(), build_parser(), _collect_gen_kwargs(), _detect_model_kind(), _dtype_from_str(), main() (+23 more)
+### Community 5 - "Community 5"
+Cohesion: 0.06
+Nodes (59): post(), _assign_words_to_segments(), _extract_words(), _get_faster_whisper_model(), _get_local_whisper_backend(), _get_openai_whisper_model(), _normalise_segments(), Step 3: Transcribe audio with timestamps.  Primary local backend (device-depende (+51 more)
+### Community 6 - "Community 6"
+Cohesion: 0.07
+Nodes (50): forward(), generate(), generate_speaker_prompt(), from_pretrained(), _clip_audio(), _ensure_browser_wav(), _filter_preview_segments(), _free_memory() (+42 more)
+### Community 7 - "Community 7"
+Cohesion: 0.05
+Nodes (49): FFmpeg concat list (synced TTS), Try-Now app panel, app.js script ref, Comparison table (HeyGen, Rask, ElevenLabs, Synthesia), Hero section + 23+ languages, Frontend index.html, Source/target language selectors, Pricing tiers (Free/Starter/Creator) (+41 more)
+### Community 8 - "Community 8"
+Cohesion: 0.07
+Nodes (35): _collect_output(), _log_step_done(), main(), pipeline.py — Core pipeline: CLI entrypoint + importable run_pipeline() for Grad, Print duration + separator line for a completed step., Collect all yields and the return value from the generator., Run the full translation pipeline, yielding progress messages.      Args:, run_pipeline() (+27 more)
+### Community 9 - "Community 9"
+Cohesion: 0.09
+Nodes (27): $(), clearFile(), createDemoCard(), detectPlatform(), formatBytes(), formatDemoDate(), formatDemoTitle(), getUsedVideos() (+19 more)
+### Community 10 - "Community 10"
+Cohesion: 0.09
+Nodes (34): Step 4: Translate segment texts using Pollinations chat completions API (OpenAI-, Translate a batch of segments into target_language., _translate_batch(), bedrock_converse(), bedrock_fallback(), build_client(), log_llm_call(), parse_json_array() (+26 more)
+### Community 11 - "Community 11"
+Cohesion: 0.08
+Nodes (32): _apply_demucs(), _get_model(), _load_and_normalise(), Step 1b: Separate vocals from accompaniment using Demucs (Python API).  In-proce, Lazy-load htdemucs once per process. Module-level semantics; we load     on firs, GPU-bound inference call. `mix` shape: [1, channels, time]., Load WAV, resample/remix to match model requirements, z-normalise., Separate vocals from accompaniment using Demucs htdemucs (Python API).      Args (+24 more)
+### Community 12 - "Community 12"
+Cohesion: 0.1
+Nodes (28): tools_api — Standalone endpoints for creator quick tools.  Lives alongside the m, audio_cleanup_endpoint(), dramabox_endpoint(), _ext_to_media_type(), APIRouter for /api/tools/* endpoints.  Each endpoint is sync request-response (n, Serve a generated artifact. Run dirs auto-expire after RUN_TTL_SECONDS., Manual reap trigger (mostly for testing). Auto-reap runs on a timer., Serve a generated artifact. Run dirs auto-expire after RUN_TTL_SECONDS. (+20 more)
+### Community 13 - "Community 13"
+Cohesion: 0.12
+Nodes (27): build_for_job(), ensure_transcription(), extract_audio_hq(), extract_reference_audio(), get_audio_duration(), get_device(), load_chatterbox(), main() (+19 more)
+### Community 14 - "Community 14"
+Cohesion: 0.12
+Nodes (23): build_t3_cond(), main(), prepare_sample(), prepare_sample.py — Turn one dataset.jsonl row into the exact tensors T3.loss(), Build the speaker conditioning (frozen during training)., MTLTokenizer + SOT/EOT padding (mirrors what generate() does internally)., S3Tokenizer on the target dubbed audio → speech tokens (the LABEL).      Critica, Turn one dataset row into ready-to-train tensors. (+15 more)
+### Community 15 - "Community 15"
+Cohesion: 0.13
+Nodes (26): _compress_silences(), _detect_pauses(), _distribute_padding(), _find_tts_silences(), _generate_silence(), _get_wav_duration(), _pad_silence(), _pause_aware_sync() (+18 more)
+### Community 16 - "Community 16"
+Cohesion: 0.19
+Nodes (18): _burn_in(), _clamp(), _extract_audio(), _force_style_for(), _format_timestamp_srt(), _format_timestamp_vtt(), generate_subtitles(), _is_video() (+10 more)
+### Community 17 - "Community 17"
+Cohesion: 0.22
+Nodes (12): download_result(), _is_noise(), main(), Batch translate Instagram reels to English via the VideoVoice server API.  Usage, Extract the Instagram reel shortcode from a URL, e.g. 'DWn_yPoDsYw'., Submit a single video URL and return the job_id., Return True if a log line is internal noise we don't want in the log., Poll job status until complete or error. Returns final messages and collected lo (+4 more)
+### Community 18 - "Community 18"
+Cohesion: 0.23
+Nodes (12): evaluate(), load_baseline(), load_with_lora(), main(), pick_held_out_samples(), print_summary(), eval.py — Evaluate the fine-tuned LoRA against the un-tuned baseline.  Picks N s, Return overshoot samples (duration_diff > 0.2) — these are NOT in the     asymme (+4 more)
+### Community 19 - "Community 19"
+Cohesion: 0.24
+Nodes (11): extract_creator(), _extract_instagram(), _extract_tiktok(), _extract_youtube(), _load_cache(), Extract original creator @username from video URLs., YouTube: visit video page, extract channel name from meta tags., Extract the @username of the original creator from the video URL.      Uses Play (+3 more)
+### Community 20 - "Community 20"
+Cohesion: 0.27
+Nodes (9): get_fallback_mode(), _get_handler(), get_translation_prompt(), post_translate(), Language-specific handlers for the translation pipeline.  Each language that nee, Return a language-specific translation prompt, or the default., Return 'bedrock' or 'google' depending on the language., Run any language-specific post-processing after translation. (+1 more)
+### Community 21 - "Community 21"
+Cohesion: 0.38
+Nodes (6): _ensure_server(), _generate_impl(), generate_scene(), Dramabox — Resemble AI directable speech engine.  Single-Space tool: generates a, Lazy-import the Dramabox model + load checkpoints once. Raises a clean     Runti, Run Dramabox on `prompt` and write the resulting WAV under `out_dir`.      Retur
+### Community 22 - "Community 22"
+Cohesion: 0.53
+Nodes (5): main(), _prefetch_chatterbox(), _prefetch_demucs(), _prefetch_faster_whisper(), Prefetch model weights into HF_HOME for faster cold starts on Spaces.
+### Community 23 - "Community 23"
+Cohesion: 0.33
+Nodes (6): app.py validation, pipeline.py simplified, steps/s4_preview.py, steps/s4_tts.py conditional imports, server.py /api/config, TTS_ENGINE env var
+### Community 25 - "Community 25"
+Cohesion: 1.0
+Nodes (2): gradio==6.8.0, gradio==6.12.0 (omni)
+### Community 33 - "Community 33"
+Cohesion: 1.0
+Nodes (1): Load a Qwen3 TTS model and its processor in HuggingFace `from_pretrained` style.
+### Community 34 - "Community 34"
+Cohesion: 1.0
+Nodes (1): Build voice-clone prompt items from reference audio (and optionally reference te
+### Community 35 - "Community 35"
+Cohesion: 1.0
+Nodes (1): Voice clone speech using the Base model.          You can provide either:
+### Community 36 - "Community 36"
+Cohesion: 1.0
+Nodes (1): Generate speech with the VoiceDesign model using natural-language style instruct
+### Community 37 - "Community 37"
+Cohesion: 1.0
+Nodes (1): Generate speech with the CustomVoice model using a predefined speaker id, option
+### Community 38 - "Community 38"
+Cohesion: 1.0
+Nodes (1): Delete stale per-job artifact directories from ARTIFACTS_ROOT.
+### Community 39 - "Community 39"
+Cohesion: 1.0
+Nodes (1): Reject oversized uploads before body parsing.
+### Community 40 - "Community 40"
+Cohesion: 1.0
+Nodes (1): Run the translation pipeline in a background thread, pushing progress to the job
+### Community 41 - "Community 41"
+Cohesion: 1.0
+Nodes (1): List whitelisted MP4 demo videos from outputs/ and data/.
+### Community 42 - "Community 42"
+Cohesion: 1.0
+Nodes (1): Return curated showcase entries with resolved streaming URLs.
+### Community 43 - "Community 43"
+Cohesion: 1.0
+Nodes (1): Submit a video for translation.
+### Community 44 - "Community 44"
+Cohesion: 1.0
+Nodes (1): Poll endpoint returning new messages since index `after`, plus live wait status.
+### Community 45 - "Community 45"
+Cohesion: 1.0
+Nodes (1): User selects a TTS model after previewing.
+### Community 46 - "Community 46"
+Cohesion: 1.0
+Nodes (1): Serve a preview audio WAV file.
+### Community 47 - "Community 47"
+Cohesion: 1.0
+Nodes (1): Download the translated video.
+### Community 48 - "Community 48"
+Cohesion: 1.0
+Nodes (1): Create artifact directories and start background cleanup.
+### Community 49 - "Community 49"
+Cohesion: 1.0
+Nodes (1): Sync TTS audio using pause-aware strategy: compress silences first, then atempo.
+### Community 50 - "Community 50"
+Cohesion: 1.0
+Nodes (1): Rewrite WAV with silence regions compressed to keep_ratio of their original dura
+### Community 51 - "Community 51"
+Cohesion: 1.0
+Nodes (1): Insert extra silence distributed across detected pause points.
+### Community 52 - "Community 52"
+Cohesion: 1.0
+Nodes (1): Generate a silent WAV file of given duration.
+### Community 53 - "Community 53"
+Cohesion: 1.0
+Nodes (1): Sync each TTS segment to its original timestamp window and stitch into a single
+### Community 54 - "Community 54"
+Cohesion: 1.0
+Nodes (1): Translate the text of each segment into target_language in batches.      Args:
+### Community 55 - "Community 55"
+Cohesion: 1.0
+Nodes (1): Load + run Chatterbox inside a single GPU-decorated scope.      ZeroGPU only int
+### Community 56 - "Community 56"
+Cohesion: 1.0
+Nodes (1): Remove trailing noise/artifacts after speech ends.
+### Community 57 - "Community 57"
+Cohesion: 1.0
+Nodes (1): Hard-trim TTS output to orig_dur * headroom, with a short fade-out.
+### Community 58 - "Community 58"
+Cohesion: 1.0
+Nodes (1): Clip audio to max_sec to prevent excessively slow voice cloning.
+### Community 59 - "Community 59"
+Cohesion: 1.0
+Nodes (1): Numpy variant of _trim_trailing_noise for engines returning np.ndarray.
+### Community 60 - "Community 60"
+Cohesion: 1.0
+Nodes (1): Perform full OmniVoice processing (load + generate batch) inside a GPU-decorated
+### Community 61 - "Community 61"
+Cohesion: 1.0
+Nodes (1): Generate speech for all segments using OmniVoice voice cloning.
+### Community 62 - "Community 62"
+Cohesion: 1.0
+Nodes (1): Synthesise translated text for each segment using voice cloned from reference au
+### Community 63 - "Community 63"
+Cohesion: 1.0
+Nodes (1): torch==2.6.0
+### Community 64 - "Community 64"
+Cohesion: 1.0
+Nodes (1): fastapi
+### Community 65 - "Community 65"
+Cohesion: 1.0
+Nodes (1): yt-dlp
+### Community 66 - "Community 66"
+Cohesion: 1.0
+Nodes (1): diffusers==0.29.0
+### Community 67 - "Community 67"
+Cohesion: 1.0
+Nodes (1): ARTIFACTS_ROOT env
+### Community 68 - "Community 68"
+Cohesion: 1.0
+Nodes (1): AWS g4dn.xlarge alternative
+### Community 69 - "Community 69"
+Cohesion: 1.0
+Nodes (1): nodejs (system pkg)
+### Community 70 - "Community 70"
+Cohesion: 1.0
+Nodes (1): fonts-noto-core / cjk
+### Community 71 - "Community 71"
+Cohesion: 1.0
+Nodes (1): graphify project rules
+## Knowledge Gaps
+- **329 isolated node(s):** `server.py — FastAPI backend for VideoVoice.  Endpoints:   POST /api/jobs`, `Download video from Instagram/YouTube using yt-dlp.`, `Allow only trusted social platforms for yt-dlp.`, `Read media duration from ffprobe.`, `Report CUDA/MPS availability.` (+324 more)
+  These have ≤1 connection - possible missing edges or undocumented components.
+- **Thin community `Community 25`** (2 nodes): `gradio==6.8.0`, `gradio==6.12.0 (omni)`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 33`** (1 nodes): `Load a Qwen3 TTS model and its processor in HuggingFace `from_pretrained` style.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 34`** (1 nodes): `Build voice-clone prompt items from reference audio (and optionally reference te`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 35`** (1 nodes): `Voice clone speech using the Base model.          You can provide either:`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 36`** (1 nodes): `Generate speech with the VoiceDesign model using natural-language style instruct`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 37`** (1 nodes): `Generate speech with the CustomVoice model using a predefined speaker id, option`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 38`** (1 nodes): `Delete stale per-job artifact directories from ARTIFACTS_ROOT.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 39`** (1 nodes): `Reject oversized uploads before body parsing.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 40`** (1 nodes): `Run the translation pipeline in a background thread, pushing progress to the job`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 41`** (1 nodes): `List whitelisted MP4 demo videos from outputs/ and data/.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 42`** (1 nodes): `Return curated showcase entries with resolved streaming URLs.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 43`** (1 nodes): `Submit a video for translation.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 44`** (1 nodes): `Poll endpoint returning new messages since index `after`, plus live wait status.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 45`** (1 nodes): `User selects a TTS model after previewing.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 46`** (1 nodes): `Serve a preview audio WAV file.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 47`** (1 nodes): `Download the translated video.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 48`** (1 nodes): `Create artifact directories and start background cleanup.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 49`** (1 nodes): `Sync TTS audio using pause-aware strategy: compress silences first, then atempo.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 50`** (1 nodes): `Rewrite WAV with silence regions compressed to keep_ratio of their original dura`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 51`** (1 nodes): `Insert extra silence distributed across detected pause points.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 52`** (1 nodes): `Generate a silent WAV file of given duration.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 53`** (1 nodes): `Sync each TTS segment to its original timestamp window and stitch into a single`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 54`** (1 nodes): `Translate the text of each segment into target_language in batches.      Args:`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 55`** (1 nodes): `Load + run Chatterbox inside a single GPU-decorated scope.      ZeroGPU only int`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 56`** (1 nodes): `Remove trailing noise/artifacts after speech ends.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 57`** (1 nodes): `Hard-trim TTS output to orig_dur * headroom, with a short fade-out.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 58`** (1 nodes): `Clip audio to max_sec to prevent excessively slow voice cloning.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 59`** (1 nodes): `Numpy variant of _trim_trailing_noise for engines returning np.ndarray.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 60`** (1 nodes): `Perform full OmniVoice processing (load + generate batch) inside a GPU-decorated`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 61`** (1 nodes): `Generate speech for all segments using OmniVoice voice cloning.`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 62`** (1 nodes): `Synthesise translated text for each segment using voice cloned from reference au`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 63`** (1 nodes): `torch==2.6.0`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 64`** (1 nodes): `fastapi`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 65`** (1 nodes): `yt-dlp`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 66`** (1 nodes): `diffusers==0.29.0`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 67`** (1 nodes): `ARTIFACTS_ROOT env`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 68`** (1 nodes): `AWS g4dn.xlarge alternative`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 69`** (1 nodes): `nodejs (system pkg)`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 70`** (1 nodes): `fonts-noto-core / cjk`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+- **Thin community `Community 71`** (1 nodes): `graphify project rules`
+  Too small to be a meaningful cluster - may be noise or needs more connections extracted.
+## Suggested Questions
+_Questions this graph is uniquely positioned to answer:_
+- **Why does `synthesise_segments()` connect `Community 6` to `Community 8`, `Community 11`?**
+  _High betweenness centrality (0.324) - this node is a cross-community bridge._
+- **Why does `generate()` connect `Community 6` to `Community 0`, `Community 4`?**
+  _High betweenness centrality (0.200) - this node is a cross-community bridge._
+- **Are the 44 inferred relationships involving `Qwen3TTSSpeakerEncoderConfig` (e.g. with `Res2NetBlock` and `SqueezeExcitationBlock`) actually correct?**
+  _`Qwen3TTSSpeakerEncoderConfig` has 44 INFERRED edges - model-reasoned connections that need verification._
+- **Are the 44 inferred relationships involving `Qwen3TTSTalkerCodePredictorConfig` (e.g. with `Res2NetBlock` and `SqueezeExcitationBlock`) actually correct?**
+  _`Qwen3TTSTalkerCodePredictorConfig` has 44 INFERRED edges - model-reasoned connections that need verification._
+- **Are the 44 inferred relationships involving `Qwen3TTSTalkerConfig` (e.g. with `Res2NetBlock` and `SqueezeExcitationBlock`) actually correct?**
+  _`Qwen3TTSTalkerConfig` has 44 INFERRED edges - model-reasoned connections that need verification._
+- **Are the 44 inferred relationships involving `Qwen3TTSConfig` (e.g. with `Res2NetBlock` and `SqueezeExcitationBlock`) actually correct?**
+  _`Qwen3TTSConfig` has 44 INFERRED edges - model-reasoned connections that need verification._
+- **What connects `server.py — FastAPI backend for VideoVoice.  Endpoints:   POST /api/jobs`, `Download video from Instagram/YouTube using yt-dlp.`, `Allow only trusted social platforms for yt-dlp.` to the rest of the system?**
+  _329 weakly-connected nodes found - possible documentation gaps or missing edges._

graphify-out/graph.html ADDED Viewed

The diff for this file is too large to render. See raw diff

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+ffmpeg
+nodejs
+fonts-noto-core
+fonts-noto-cjk

pipeline.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+pipeline.py — Core pipeline: CLI entrypoint + importable run_pipeline() for Gradio.
+Usage:
+    python pipeline.py --input data/test_video_3.mp4 --target-lang Spanish
+"""
+import argparse
+import os
+import io
+import logging
+import os
+import shutil
+import sys
+import threading
+import time
+from pathlib import Path
+from typing import Generator
+from steps.s1_extract_audio import extract_audio, extract_audio_hq
+from steps.s2_transcribe import transcribe, POLLEN_TRANSCRIBE_MODEL
+from steps.s3_translate import translate
+from steps.s4_tts import synthesise_segments
+from steps.s5_sync import sync_and_stitch
+from steps.s6_captions import generate_captions
+from steps.s6_merge import merge_audio_video
+def _log_step_done(label: str, start: float):
+    """Print duration + separator line for a completed step."""
+    elapsed = time.time() - start
+    if elapsed >= 60:
+        mins, secs = divmod(elapsed, 60)
+        print(f"[{label}] Duration: {int(mins)}m {int(secs)}s")
+    else:
+        print(f"[{label}] Duration: {int(elapsed)}s")
+    print("=" * 40)
+LANGUAGE_CODES = {
+    "Arabic": "ar",
+    "Chinese": "zh",
+    "Danish": "da",
+    "Dutch": "nl",
+    "English": "en",
+    "Finnish": "fi",
+    "French": "fr",
+    "German": "de",
+    "Greek": "el",
+    "Hebrew": "he",
+    "Hindi": "hi",
+    "Italian": "it",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Malay": "ms",
+    "Norwegian": "no",
+    "Polish": "pl",
+    "Portuguese": "pt",
+    "Russian": "ru",
+    "Spanish": "es",
+    "Swahili": "sw",
+    "Swedish": "sv",
+    "Turkish": "tr",
+    "Urdu": "hi",
+}
+def run_pipeline(
+    video_path: str,
+    target_language: str = "Spanish",
+    source_language: str = "auto",
+    output_path: str | None = None,
+    voice_mode: str = "chatterbox",
+    preview_event: threading.Event | None = None,
+    job_state: dict | None = None,
+    captions: bool = True,
+    preserve_music: bool = False,
+    data_dir: str | None = None,
+    video_link: str | None = None,
+) -> Generator[str | dict, None, str]:
+    """
+    Run the full translation pipeline, yielding progress messages.
+    Args:
+        video_path: Path to the input video file.
+        target_language: Target language name (e.g. "Spanish").
+        source_language: ISO-639-1 code of the source language, or "auto" for
+            Whisper to auto-detect (default "auto"). Forcing a wrong code makes
+            Whisper silently translate-and-transcribe instead of transcribing.
+        output_path: Where to save the output video. Auto-generated if None.
+        voice_mode: TTS engine to use ("chatterbox" or "omnivoice").
+                    In Space deployments, this must match TTS_ENGINE env var.
+        preview_event: Deprecated - kept for compatibility, but unused in single-engine mode.
+        job_state: Shared dict with the server.
+    Yields:
+        str: Progress messages for each step.
+        dict: Special sentinel when previews are ready.
+    Returns:
+        str: Path to the translated output video.
+    """
+    # Single-engine mode: voice_mode must match TTS_ENGINE if set
+    space_engine = os.getenv("TTS_ENGINE")
+    if space_engine and voice_mode != space_engine:
+        yield f"⚠️ Warning: voice_mode='{voice_mode}' but Space TTS_ENGINE='{space_engine}'. Using {space_engine}.\n"
+        voice_mode = space_engine
+    # Fixed step count (no more preview_both mode)
+    total_steps = 6 + (1 if preserve_music else 0)
+    # Prepare output path
+    if output_path is None:
+        if data_dir:
+            output_path = str(Path(data_dir) / "output.mp4")
+        else:
+            stem = Path(video_path).stem
+            output_path = f"output_{stem}_{target_language.lower()}.mp4"
+    # Clean tmp dir
+    shutil.rmtree("tmp", ignore_errors=True)
+    os.makedirs("tmp/audio/source", exist_ok=True)
+    # Set up logging to tmp/logs.txt (clean logs only, no torch/chatterbox noise)
+    log_path = "tmp/logs.txt"
+    _log_file = open(log_path, "w", encoding="utf-8")
+    _orig_stdout = sys.stdout
+    _orig_stderr = sys.stderr
+    # Patterns to filter out of log file (still shown in terminal)
+    _NOISE = (
+        "Sampling:", "sampling", "UserWarning", "FutureWarning", "DeprecationWarning",
+        "torch.backends", "torch.functional", "torch.fft", "torchaudio/compliance",
+        "sdp_kernel", "LoRACompatible", "pkg_resources", "Fetching",
+        "output_attentions", "TRANSFORMERS_VERBOSITY",
+        "istft", "stft", "resize_", "inverse_transform",
+        "PerthNet", "loaded Perth", "diffusers/models",
+        "chatterbox/models/s3gen", "alignment_stream_analyzer",
+        "WARNING:chatterbox",
+    )
+    class _Tee(io.TextIOBase):
+        """Write to both the original stream and the log file (filtered)."""
+        def __init__(self, original, filter_noise=False):
+            self._original = original
+            self._filter = filter_noise
+        def write(self, s):
+            self._original.write(s)
+            if self._filter and any(p in s for p in _NOISE):
+                return len(s)
+            if not _log_file.closed:
+                _log_file.write(s)
+                _log_file.flush()
+            return len(s)
+        def flush(self):
+            self._original.flush()
+            if not _log_file.closed:
+                _log_file.flush()
+    sys.stdout = _Tee(_orig_stdout, filter_noise=True)
+    sys.stderr = _Tee(_orig_stderr, filter_noise=True)
+    try:
+        yield f"🎬 Starting pipeline: {video_path} → {target_language}\n"
+        # Step 1: Extract audio
+        yield f"🔊 Step 1/{total_steps}: Extracting audio...\n"
+        _t0 = time.time()
+        audio_path = extract_audio(video_path, "tmp/audio/source/extracted_audio.wav")
+        yield f"   ✓ Audio extracted: {audio_path}\n"
+        # Step 1b: Source separation (conditional)
+        vocals_path = audio_path  # default: use full mix
+        music_path = None
+        if preserve_music:
+            from steps.s1b_separate import separate_audio
+            audio_hq = extract_audio_hq(video_path, "tmp/audio/source/extracted_audio_hq.wav")
+            _log_step_done("s1", _t0)
+            yield f"🎵 Step 2/{total_steps}: Separating vocals from background music...\n"
+            _t0 = time.time()
+            vocals_path, music_path = separate_audio(audio_hq, "tmp/audio/source")
+            yield f"   ✓ Vocals and accompaniment separated\n"
+            _log_step_done("s1b", _t0)
+        else:
+            _log_step_done("s1", _t0)
+        # Step offset: steps after separation shift by 1 when preserve_music is on
+        step_offset = 1 if preserve_music else 0
+        # Step 2: Transcribe
+        yield f"📝 Step {2 + step_offset}/{total_steps}: Transcribing (Pollinations Whisper / mlx-whisper)...\n"
+        _t0 = time.time()
+        segments = transcribe(vocals_path, language=source_language)
+        yield f"   ✓ {len(segments)} segments transcribed\n"
+        for seg in segments:
+            yield f"   [{seg['start']:.1f}s–{seg['end']:.1f}s] {seg['text']}\n"
+        # Dump transcription to tmp for inspection
+        import json as _json
+        from urllib.parse import urlparse, urlunparse
+        with open("tmp/transcription.json", "w", encoding="utf-8") as _tf:
+            out_data = {
+                "model_provider": "pollinations",
+                "model_name": POLLEN_TRANSCRIBE_MODEL,
+                "source_language": source_language,
+                "audio_path": vocals_path,
+                "segment_count": len(segments),
+                "total_duration": round(segments[-1]["end"], 2) if segments else 0,
+                "segments": [
+                    {
+                        "index": i,
+                        "start": seg["start"],
+                        "end": seg["end"],
+                        "duration": round(seg["end"] - seg["start"], 2),
+                        "text": seg["text"],
+                        **({"words": seg["words"]} if "words" in seg else {}),
+                    }
+                    for i, seg in enumerate(segments)
+                ],
+            }
+            if video_link:
+                parsed = urlparse(video_link)
+                clean_link = urlunparse(parsed._replace(query="", fragment=""))
+                out_data = {"video_link": clean_link, **out_data}
+            _json.dump(out_data, _tf, indent=2, ensure_ascii=False)
+        _log_step_done("s2", _t0)
+        # Step 3: Translate
+        yield f"🌍 Step {3 + step_offset}/{total_steps}: Translating to {target_language}...\n"
+        _t0 = time.time()
+        segments = translate(segments, target_language)
+        yield f"   ✓ Translation complete\n"
+        for seg in segments:
+            yield f"   → {seg['translated_text']}\n"
+        target_lang_code = LANGUAGE_CODES.get(target_language, "es")
+        _log_step_done("s3", _t0)
+        # ── Step 4: TTS Synthesis ───────────────────────────────
+        model_name = voice_mode  # Uses TTS_ENGINE env var in Space deployments
+        yield f"🗣️ Step {4 + step_offset}/{total_steps}: Synthesising speech ({model_name})...\n"
+        _t0 = time.time()
+        tts_gen = synthesise_segments(
+            segments, vocals_path,
+            language_id=target_lang_code,
+            output_dir="tmp/audio/tts",
+            model_name=model_name,
+        )
+        for msg in tts_gen:
+            if isinstance(msg, dict) and "__TTS_RESULT__" in msg:
+                segments = msg["__TTS_RESULT__"]
+            else:
+                yield msg
+        yield f"   ✓ {len(segments)} segments synthesised\n"
+        _log_step_done("s4_tts", _t0)
+        # Step 5: Sync
+        yield f"⏱️ Step {5 + step_offset}/{total_steps}: Syncing audio to original timestamps...\n"
+        _t0 = time.time()
+        final_audio = sync_and_stitch(segments, "tmp/audio/final_audio.wav", "tmp/audio/tts_synced")
+        yield f"   ✓ Audio synced: {final_audio}\n"
+        _log_step_done("s5", _t0)
+        # Captions + Merge
+        captions_path = None
+        _t0 = time.time()
+        if captions:
+            captions_path = generate_captions(segments, "tmp/captions.ass", target_language=target_language)
+            yield f"   ✓ Captions generated: {captions_path}\n"
+        # Step 6: Merge
+        music_label = " + music" if music_path else ""
+        yield f"🎞️ Step {6 + step_offset}/{total_steps}: Merging translated audio{' + captions' if captions_path else ''}{music_label} into video...\n"
+        result = merge_audio_video(video_path, final_audio, output_path, captions_path=captions_path, music_path=music_path)
+        _log_step_done("s6", _t0)
+        yield f"\n✅ Done! Output saved to: {result}\n"
+    finally:
+        sys.stdout = _orig_stdout
+        sys.stderr = _orig_stderr
+        if not _log_file.closed:
+            _log_file.close()
+        if data_dir:
+            def _safe_copy(src, dst_name):
+                if os.path.exists(src):
+                    shutil.copy2(src, os.path.join(data_dir, dst_name))
+            _safe_copy(log_path, "logs.txt")
+            _safe_copy("tmp/transcription.json", "transcription.json")
+            _safe_copy("tmp/llm_calls.json", "llm_calls.json")
+            _safe_copy("tmp/audio/tts/tts_manifest.json", "tts_manifest.json")
+            _safe_copy("tmp/audio/tts/segment_comparison.json", "segment_comparison.json")
+        print(f"[pipeline] Logs saved → {log_path}")
+    return result
+def _collect_output(gen: Generator) -> tuple[list[str], str]:
+    """Collect all yields and the return value from the generator."""
+    messages = []
+    output_path = None
+    try:
+        while True:
+            msg = next(gen)
+            if isinstance(msg, dict):
+                # Ignore preview sentinels in CLI mode (deprecated preview_both flow)
+                continue
+            messages.append(msg)
+            print(msg, end="", flush=True)
+    except StopIteration as e:
+        output_path = e.value
+    return messages, output_path
+def main():
+    parser = argparse.ArgumentParser(description="Video Translation Pipeline")
+    parser.add_argument("--input", required=True, help="Input video path")
+    parser.add_argument(
+        "--target-lang",
+        default="Spanish",
+        choices=list(LANGUAGE_CODES.keys()),
+        help="Target language (default: Spanish)",
+    )
+    parser.add_argument(
+        "--source-lang",
+        default="auto",
+        help="Source language ISO-639-1 code or 'auto' to let Whisper detect (default: auto)",
+    )
+    parser.add_argument("--output", default=None, help="Output video path")
+    parser.add_argument(
+        "--voice-mode",
+        default="chatterbox",
+        choices=["chatterbox", "omnivoice", "qwen3"],
+        help="TTS engine to use (default: chatterbox). Must match TTS_ENGINE env var in Space deployments.",
+    )
+    parser.add_argument(
+        "--preserve-music",
+        action="store_true",
+        help="Separate and preserve background music using Demucs",
+    )
+    args = parser.parse_args()
+    gen = run_pipeline(
+        video_path=args.input,
+        target_language=args.target_lang,
+        source_language=args.source_lang,
+        output_path=args.output,
+        voice_mode=args.voice_mode,
+        preserve_music=args.preserve_music,
+    )
+    _, output = _collect_output(gen)
+    print(f"\nFinal output: {output}")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,59 @@

+[project]
+name = "videovoice"
+version = "1.0.0"
+description = "AI-powered short video translation with zero-shot voice cloning"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.10,<3.13"
+dependencies = [
+    "openai>=2.30.0",
+    "requests>=2.33.0",
+    "python-dotenv>=1.2.2",
+    "pydub>=0.25.1",
+    "ffmpeg-python>=0.2.0",
+    "mlx-whisper>=0.4.3",
+    "tqdm>=4.67.3",
+    "fastapi>=0.135.2",
+    "uvicorn[standard]>=0.42.0",
+    "python-multipart>=0.0.22",
+    "yt-dlp>=2026.3.17",
+    "sse-starlette>=3.3.4",
+    "soundfile>=0.13.1",
+    "deep-translator>=1.11.4",
+    "demucs>=4.0.1",
+    "boto3>=1.42.82",
+    "torch==2.6.0",
+    "torchaudio==2.6.0",
+    "slowapi>=0.1.9",
+    "faster-whisper>=1.2.1",
+    "spaces>=0.48.3",
+    "openai-whisper>=20240930",
+    "gradio>=6.12.0",
+    "accelerate>=1.12.0",
+    "transformers>=4.57.3",
+]
+[project.optional-dependencies]
+# HF Spaces install from requirements-{cbox,omni}.txt and ignore these.
+# Locally: `uv sync --extra chatterbox` installs the PyPI chatterbox-tts
+# (we skip-worktree the vendored ./chatterbox/ folder so it doesn't shadow
+# the PyPI package). `--extra omnivoice` is heavier and optional.
+chatterbox = ["chatterbox-tts>=0.1.7"]
+omnivoice = ["omnivoice>=0.1.4"]
+[tool.uv]
+# Declare chatterbox and omnivoice extras as mutually exclusive so uv
+# doesn't try to resolve them into one lockfile view.
+conflicts = [
+    [{ extra = "chatterbox" }, { extra = "omnivoice" }],
+]
+override-dependencies = [
+    # onnxruntime 1.24.x metadata claims py3.10 support but no 3.10 wheels
+    # ship on PyPI — force resolution to the last version that has 3.10 wheels.
+    "onnxruntime<1.24",
+    # chatterbox-tts==0.1.7 pins gradio==6.8.0, but app.py needs >=6.12.0
+    # for gradio.Server. Override so the extras can coexist in a lockfile;
+    # gradio is only loaded by app.py (HF), so the local chatterbox install
+    # never exercises gradio code.
+    "gradio>=6.12.0",
+]

requirements-cbox.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+setuptools<70.0.0
+# Core ML
+torch==2.8.0
+torchaudio==2.8.0
+accelerate==1.12.0
+transformers>=4.57.3
+diffusers==0.29.0
+safetensors==0.5.3
+# Audio processing
+librosa==0.11.0
+soundfile
+pydub
+demucs==4.0.1
+openunmix
+pyloudnorm
+# Transcription
+faster-whisper
+# Translation
+deep-translator
+# TTS
+conformer==0.3.2
+omegaconf
+pykakasi==2.3.0
+resemble-perth>=1.0.0
+s3tokenizer
+spacy-pkuseg
+# API / server
+fastapi
+uvicorn
+slowapi
+sse-starlette
+python-multipart
+python-dotenv
+pydantic
+# HuggingFace
+huggingface-hub
+spaces
+# Utilities
+openai
+boto3
+yt-dlp
+ffmpeg-python
+numpy<2.0.0
+pandas<2.3.0

requirements-omni.txt ADDED Viewed

	@@ -0,0 +1,157 @@

+# Requirements for OmniVoice TTS Space (ZeroGPU / Python 3.10)
+# TTS Engine: OmniVoice (set TTS_ENGINE=omnivoice in Space Secrets)
+#
+# This Space serves only the OmniVoice TTS engine, avoiding dependency
+# conflicts with chatterbox-tts (which pins transformers==5.2.0).
+accelerate==1.12.0
+aiofiles
+annotated-types
+anyio
+audioread
+av
+beautifulsoup4
+boto3
+botocore
+brotli
+catalogue
+certifi
+cffi
+cfgv
+charset-normalizer
+click
+cloudpickle
+coloredlogs
+conformer
+ctranslate2
+decorator
+deep-translator
+demucs==4.0.1
+deprecated
+diffusers
+distlib
+distro
+dora-search
+einops
+fastapi
+faster-whisper
+ffmpeg-python
+ffmpy
+filelock
+flatbuffers
+fsspec
+future
+gradio==6.12.0
+gradio-client
+h11
+httpcore
+httptools
+httpx
+huggingface-hub
+humanfriendly
+identify
+idna
+importlib-metadata
+jaconv
+jinja2
+jiter
+jmespath
+joblib
+julius
+lameenc
+lazy-loader
+librosa
+limits
+llvmlite
+markdown-it-py
+markupsafe
+mdurl
+ml-dtypes
+mlx; sys_platform == 'darwin'
+mlx-whisper; sys_platform == 'darwin'
+more-itertools
+mpmath
+msgpack
+networkx
+nodeenv
+numba
+numpy<2.0.0
+omegaconf
+onnx
+onnxruntime
+openai
+openai-whisper
+openunmix
+orjson
+packaging
+pandas<2.3.0
+pillow
+platformdirs
+pooch
+pre-commit
+protobuf
+psutil
+pycparser
+pydantic
+pydantic-core
+pydub
+pygments
+pykakasi
+pyloudnorm
+python-dateutil
+python-discovery
+python-dotenv
+python-multipart
+pytz
+pyyaml
+regex
+resemble-perth
+retrying
+rich
+s3tokenizer
+s3transfer
+safehttpx
+safetensors
+scikit-learn
+scipy
+semantic-version
+setuptools
+shellingham
+six
+slowapi
+sniffio
+soundfile
+soupsieve
+soxr
+spaces
+spacy-pkuseg
+srsly
+sse-starlette
+starlette
+submitit
+sympy
+threadpoolctl
+tiktoken
+tokenizers
+tomlkit
+torch==2.8.0
+torchaudio==2.8.0
+tqdm
+transformers>=4.57.3
+treetable
+typer
+typing-extensions
+typing-inspection
+tzdata
+urllib3
+uvicorn
+uvloop; sys_platform != 'win32'
+virtualenv
+watchfiles
+websockets
+wrapt
+yt-dlp
+zipp
+# OmniVoice TTS
+omnivoice>=0.1.4

requirements-qwen3.txt ADDED Viewed

	@@ -0,0 +1,55 @@

+# Requirements for Qwen3-TTS Space (ZeroGPU / Python 3.10)
+# TTS Engine: Qwen3-TTS Base 1.7B (set TTS_ENGINE=qwen3 in Space Secrets)
+#
+# This Space serves only the Qwen3-TTS engine, mirroring the chatterbox/
+# omnivoice split. Pins are derived from the official Qwen/Qwen3-TTS Space
+# (torch 2.8, transformers 4.57.3) plus the VideoVoice pipeline's
+# transcription/translation/audio dependencies.
+# ── Qwen3-TTS core (matches Qwen/Qwen3-TTS Space) ────────────
+# NOTE: `qwen_tts` is NOT a PyPI package. The Qwen3TTSModel class is loaded
+# from a vendored `qwen_tts/` directory at the repo root, mirroring the
+# vendored `chatterbox/` folder pattern. Copy that directory from
+# https://huggingface.co/spaces/Qwen/Qwen3-TTS/tree/main/qwen_tts into this
+# repo before deploying.
+torch==2.8.0
+torchaudio==2.8.0
+transformers==4.57.3
+accelerate==1.12.0
+einops
+librosa
+soundfile
+sox
+onnxruntime
+kernels
+spaces
+# ── VideoVoice pipeline (transcription + translation + IO) ──
+fastapi
+uvicorn
+slowapi
+sse-starlette
+python-multipart
+python-dotenv
+pydantic
+faster-whisper
+openai-whisper
+mlx; sys_platform == 'darwin'
+mlx-whisper; sys_platform == 'darwin'
+deep-translator
+openai
+demucs==4.0.1
+openunmix
+pyloudnorm
+pydub
+ffmpeg-python
+huggingface-hub
+boto3
+yt-dlp
+gradio==6.12.0
+numpy<2.0.0
+pandas<2.3.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+# Requirements for Dramabox Space (ZeroGPU / Python 3.10)
+# TTS Engine: Resemble Dramabox (set TTS_ENGINE=dramabox in Space Secrets)
+#
+# This Space serves the Dramabox "directable speech" model via the
+# /api/tools/dramabox tools endpoint. The dub pipeline is reachable but
+# rejects voice_mode != "dramabox" (server.py), and the frontend never
+# routes dub requests here.
+#
+# NOTE: The Dramabox inference glue (TTSServer, model_downloader) is NOT
+# a PyPI package. Vendor it from
+#   https://huggingface.co/spaces/ResembleAI/Dramabox/tree/main/src
+# into this repo as `dramabox_src/` before deploying. The tools_api/dramabox
+# worker adds that path to sys.path on first request.
+# ── Dramabox core (verbatim from upstream ResembleAI/Dramabox Space) ──
+torch==2.8.0
+torchaudio==2.8.0
+# pydantic 2.11+ emits bool-shorthand `additionalProperties: True` which
+# crashes gradio_client's get_type. 2.10.6 is the last version emitting
+# the dict form — Dramabox requires this pin.
+pydantic==2.10.6
+safetensors>=0.4.0
+accelerate>=0.25.0
+peft>=0.7.0
+av>=12.0.0
+einops>=0.7.0
+PyYAML>=6.0
+sentencepiece>=0.1.99
+transformers>=4.45.0
+huggingface_hub>=0.20.0,<1.0
+bitsandbytes>=0.45.0
+gradio==5.7.1
+spaces>=0.30.0
+soundfile>=0.12.0
+resemble-perth @ git+https://github.com/resemble-ai/Perth.git@master
+# ── VideoVoice pipeline (server.py / app.py imports these at startup) ──
+fastapi
+uvicorn
+slowapi
+sse-starlette
+python-multipart
+python-dotenv
+faster-whisper
+openai-whisper
+mlx; sys_platform == 'darwin'
+mlx-whisper; sys_platform == 'darwin'
+deep-translator
+openai
+demucs==4.0.1
+openunmix
+pyloudnorm
+pydub
+ffmpeg-python
+boto3
+yt-dlp
+numpy<2.0.0
+pandas<2.3.0

scripts/prefetch_models.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Prefetch model weights into HF_HOME for faster cold starts on Spaces."""
+import os
+def _prefetch_chatterbox() -> None:
+    from chatterbox.mtl_tts import ChatterboxMultilingualTTS
+    print("[prefetch] Chatterbox Multilingual TTS")
+    _ = ChatterboxMultilingualTTS.from_pretrained("cpu")
+def _prefetch_faster_whisper() -> None:
+    from faster_whisper import WhisperModel
+    raw = os.getenv("FASTER_WHISPER_MODELS")
+    if raw:
+        models = [m.strip() for m in raw.split(",") if m.strip()]
+    else:
+        models = [os.getenv("FASTER_WHISPER_MODEL", "large-v3")]
+    for model_name in models:
+        print(f"[prefetch] faster-whisper {model_name}")
+        _ = WhisperModel(model_name, device="cpu", compute_type="int8")
+def _prefetch_demucs() -> None:
+    from demucs.pretrained import get_model
+    print("[prefetch] Demucs htdemucs")
+    _ = get_model("htdemucs")
+def main() -> None:
+    tts_engine = os.getenv("TTS_ENGINE", "chatterbox").lower()
+    print(f"[prefetch] HF_HOME={os.getenv('HF_HOME', '<unset>')}")
+    if tts_engine == "chatterbox":
+        _prefetch_chatterbox()
+    else:
+        print(f"[prefetch] skipping chatterbox prefetch for TTS_ENGINE={tts_engine}")
+    _prefetch_faster_whisper()
+    _prefetch_demucs()
+    print("[prefetch] done")
+if __name__ == "__main__":
+    main()

server.py ADDED Viewed

	@@ -0,0 +1,929 @@

+"""
+server.py — FastAPI backend for VideoVoice.
+Endpoints:
+  POST /api/jobs          — Submit a video for translation (file upload or URL)
+  GET  /api/jobs/{id}     — SSE stream of pipeline progress
+  GET  /api/jobs/{id}/result — Download the translated video
+  POST /api/jobs/{id}/select-model — Select TTS model after preview
+  GET  /api/jobs/{id}/preview/{model} — Stream preview audio
+  GET  /api/demo-videos   — List available demo videos (outputs + data)
+  GET  /api/demo-videos/{video_id}/stream — Stream demo video by ID
+  GET  /api/showcase      — Curated before/after showcase entries
+"""
+import asyncio
+import hashlib
+import json
+import os
+import subprocess
+import shutil
+import threading
+import time
+import uuid
+import re
+from pathlib import Path
+from urllib.parse import urlparse
+from typing import Optional
+from dotenv import load_dotenv
+from fastapi import FastAPI, APIRouter, File, Form, HTTPException, Request, UploadFile, Header
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import Request
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.middleware import SlowAPIMiddleware
+from slowapi.util import get_remote_address
+from sse_starlette.sse import EventSourceResponse
+load_dotenv()
+# TTS_ENGINE controls which TTS backend this Space serves
+TTS_ENGINE = os.getenv("TTS_ENGINE", "chatterbox").lower()
+if TTS_ENGINE not in ("chatterbox", "omnivoice", "qwen3", "dramabox"):
+    raise ValueError(f"Invalid TTS_ENGINE: {TTS_ENGINE}. Use 'chatterbox', 'omnivoice', 'qwen3', or 'dramabox'.")
+# ── Config ────────────────────────────────────────────────
+PORT = int(os.getenv("PORT", "7860"))
+MAX_FILE_SIZE_MB = 90
+MAX_DURATION_SEC = 90
+MAX_UPLOAD_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
+def _default_artifacts_root() -> Path:
+    # Prefer /data/jobs when the Space has persistent storage mounted
+    # (Docker deploys, or Gradio SDK Spaces with persistent storage enabled).
+    # Fall back to /tmp when /data is not writable, which is the case on
+    # Zero GPU / Gradio SDK Spaces without the paid persistent-storage add-on.
+    preferred = Path("/data/jobs")
+    try:
+        preferred.parent.mkdir(parents=True, exist_ok=True)
+        if os.access(preferred.parent, os.W_OK):
+            return preferred
+    except (PermissionError, OSError):
+        pass
+    return Path("/tmp/videovoice_jobs")
+ARTIFACTS_ROOT = Path(os.getenv("ARTIFACTS_ROOT") or _default_artifacts_root())
+ALLOWED_YTDLP_HOSTS = {
+    "instagram.com",
+    "youtube.com",
+    "youtu.be",
+    "tiktok.com",
+    "vm.tiktok.com",
+}
+PERSISTENT_ARTIFACT_DIRS = {"uploads", "outputs", "data", "tmp", "tools"}
+REAPER_INTERVAL_SECONDS = 10 * 60
+REAPER_MAX_AGE_SECONDS = 2 * 60 * 60
+def _parse_allowed_origins(value: str) -> list[str]:
+    origins = [origin.strip() for origin in value.split(",") if origin.strip()]
+    return origins or ["http://localhost:5173"]
+ALLOWED_ORIGINS = _parse_allowed_origins(
+    os.getenv("ALLOWED_ORIGINS", "http://localhost:5173")
+)
+# ── App ────────────────────────────────────────────────
+router = APIRouter()
+_RATE_LIMIT_ENABLED = os.getenv("DISABLE_RATE_LIMIT", "").lower() not in ("1", "true", "yes")
+limiter = Limiter(key_func=get_remote_address, enabled=_RATE_LIMIT_ENABLED)
+# Note: app.state.limiter, exception handlers, and SlowAPIMiddleware
+# are now configured on the main Server instance in app.py.
+# ── In-memory job store ────────────────────────────────
+# Structure: { job_id: { status, messages[], result_path, error, created_at,
+#              voice_mode, preview_paths, preview_event, selected_model } }
+jobs: dict = {}
+# ── GPU job queue ─────────────────────────────────────
+# Only 1 GPU job at a time — others wait in FIFO order
+gpu_semaphore = threading.Semaphore(1)
+gpu_queue: list[str] = []          # ordered list of queued job_ids waiting for GPU
+gpu_active: dict = {               # the currently running job's live info
+    "job_id": None,
+    "started_at": None,
+    "step": 0,
+    "total_steps": 6,
+    "step_label": "",
+}
+# Per-step timing history: { step_num: [durations] } — learns real per-step costs
+step_durations: dict[int, list[float]] = {}
+session_active_jobs: dict[str, str] = {}
+artifact_reaper_task: Optional[asyncio.Task] = None
+UPLOAD_DIR = ARTIFACTS_ROOT / "uploads"
+OUTPUT_DIR = ARTIFACTS_ROOT / "outputs"
+SHOWCASE_DIR = ARTIFACTS_ROOT / "data" / "showcase"
+SHOWCASE_FILE = ARTIFACTS_ROOT / "data" / "showcase.json"
+DEMO_VIDEO_DIRS = {
+    "outputs": OUTPUT_DIR,
+    "data": ARTIFACTS_ROOT / "data",
+    "showcase": SHOWCASE_DIR,
+}
+# ── Helpers ────────────────────────────────────────────
+def _download_url(url: str, dest: str) -> str:
+    """Download video from Instagram/YouTube using yt-dlp."""
+    result = subprocess.run(
+        [
+            "yt-dlp",
+            "--no-playlist",
+            "--max-filesize", "100M",
+            "--js-runtimes", "node",
+            "--extractor-args", "youtube:player_client=android,ios,web_safari",
+            "-f", "mp4/best[ext=mp4]/best",
+            "-o", dest,
+            url,
+        ],
+        capture_output=True,
+        text=True,
+        timeout=120,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"yt-dlp failed: {result.stderr[:300]}")
+    return dest
+def _is_allowed_video_host(url: str) -> bool:
+    """Allow only trusted social platforms for yt-dlp."""
+    parsed = urlparse(url)
+    host = (parsed.hostname or "").lower()
+    if not host:
+        return False
+    return (
+        host in ALLOWED_YTDLP_HOSTS
+        or host.endswith(".instagram.com")
+        or host.endswith(".youtube.com")
+        or host.endswith(".tiktok.com")
+    )
+def _probe_duration_seconds(path: str) -> float:
+    """Read media duration from ffprobe."""
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "csv=p=0",
+            path,
+        ],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"ffprobe failed: {result.stderr[:300]}")
+    try:
+        return float(result.stdout.strip())
+    except ValueError as exc:
+        raise RuntimeError("ffprobe returned an invalid duration value") from exc
+def _gpu_available() -> bool:
+    """Report CUDA/MPS availability."""
+    try:
+        import torch
+        mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+        return bool(torch.cuda.is_available() or mps_available)
+    except Exception:
+        return False
+def _queue_depth() -> int:
+    """Total queue pressure: active job + queued jobs."""
+    return len(gpu_queue) + (1 if gpu_active["job_id"] else 0)
+def _is_job_active(job_id: str) -> bool:
+    """Whether a job is still active (queued/running)."""
+    job = jobs.get(job_id)
+    if not job:
+        return False
+    return job.get("status") in {"queued", "running"}
+def _release_session_lock(job: dict) -> None:
+    session_id = job.get("session_id")
+    if not session_id:
+        return
+    if session_active_jobs.get(session_id) == job.get("job_id"):
+        session_active_jobs.pop(session_id, None)
+def _demo_video_id(folder: str, filename: str) -> str:
+    """Generate a stable opaque ID for a whitelisted demo video."""
+    raw = f"{folder}/{filename}".encode("utf-8")
+    return hashlib.sha256(raw).hexdigest()[:20]
+def _collect_demo_videos():
+    """Discover demo videos and return (metadata list, id -> path lookup)."""
+    videos = []
+    video_lookup = {}
+    for folder, directory in DEMO_VIDEO_DIRS.items():
+        if not directory.exists() or not directory.is_dir():
+            continue
+        for file_path in directory.iterdir():
+            if not file_path.is_file() or file_path.suffix.lower() != ".mp4":
+                continue
+            stat = file_path.stat()
+            video_id = _demo_video_id(folder, file_path.name)
+            videos.append(
+                {
+                    "id": video_id,
+                    "name": file_path.name,
+                    "url": f"/api/demo-videos/{video_id}/stream",
+                    "folder": folder,
+                    "size_bytes": stat.st_size,
+                    "modified_at": int(stat.st_mtime),
+                }
+            )
+            video_lookup[video_id] = file_path
+    videos.sort(
+        key=lambda item: (
+            item["name"].lower(),
+            item["folder"].lower(),
+            item["url"].lower(),
+        )
+    )
+    return videos, video_lookup
+def _queue_status_for(job_id: str) -> str | None:
+    """Build a live queue status string for a waiting job."""
+    if job_id not in gpu_queue:
+        return None
+    pos = gpu_queue.index(job_id) + 1  # 1-based position
+    active = gpu_active
+    if not active["job_id"]:
+        return f"Queue position: {pos} — GPU starting up..."
+    step = active["step"]
+    total = active["total_steps"]
+    label = active["step_label"]
+    # Build ETA from per-step history if we have it
+    eta_part = ""
+    if step > 0 and step_durations:
+        remaining_secs = 0
+        for s in range(step, total + 1):
+            hist = step_durations.get(s, [])
+            remaining_secs += (sum(hist) / len(hist)) if hist else 15
+        # Multiply by queue position (jobs ahead)
+        remaining_secs = int(remaining_secs * pos)
+        if remaining_secs > 0:
+            if remaining_secs < 60:
+                eta_part = f" — ~{remaining_secs}s remaining"
+            else:
+                m, s_ = divmod(remaining_secs, 60)
+                eta_part = f" — ~{m}m {s_:02d}s remaining"
+    jobs_word = "job" if pos == 1 else "jobs"
+    if label:
+        return f"{pos} {jobs_word} ahead (Step {step}/{total} — {label}){eta_part}"
+    else:
+        return f"{pos} {jobs_word} ahead (Step {step}/{total}){eta_part}"
+def _config_languages() -> list[str]:
+    """Expose supported language names from the pipeline (Chatterbox set)."""
+    from pipeline import LANGUAGE_CODES
+    return list(LANGUAGE_CODES.keys())
+def _chatterbox_language_options() -> list[dict]:
+    from pipeline import LANGUAGE_CODES
+    return [{"name": name, "code": code} for name, code in LANGUAGE_CODES.items()]
+def _omnivoice_language_options() -> list[dict]:
+    from steps.lang.omnivoice_languages import OMNIVOICE_LANGUAGE_CODES
+    return [{"name": name, "code": code} for name, code in OMNIVOICE_LANGUAGE_CODES.items()]
+def _qwen3_language_options() -> list[dict]:
+    from steps.lang.qwen3_languages import QWEN3_LANGUAGE_CODES
+    return [{"name": name, "code": code} for name, code in QWEN3_LANGUAGE_CODES.items()]
+async def _artifact_reaper_loop():
+    """Delete stale per-job artifact directories from ARTIFACTS_ROOT."""
+    while True:
+        try:
+            now = time.time()
+            for path in ARTIFACTS_ROOT.iterdir():
+                if not path.is_dir():
+                    continue
+                if path.name in PERSISTENT_ARTIFACT_DIRS:
+                    continue
+                age = now - path.stat().st_mtime
+                if age > REAPER_MAX_AGE_SECONDS:
+                    shutil.rmtree(path, ignore_errors=True)
+            stale_jobs = [
+                job_id
+                for job_id, state in jobs.items()
+                if state.get("status") in {"complete", "error"}
+                and (now - state.get("created_at", now)) > REAPER_MAX_AGE_SECONDS
+            ]
+            for job_id in stale_jobs:
+                jobs.pop(job_id, None)
+        except Exception as exc:
+            print(f"[reaper] cleanup error: {exc}")
+        await asyncio.sleep(REAPER_INTERVAL_SECONDS)
+async def enforce_content_length_limit(request: Request, call_next):
+    """Reject oversized uploads before body parsing."""
+    if request.method.upper() == "POST" and request.url.path == "/api/jobs":
+        content_length = request.headers.get("content-length")
+        if content_length:
+            try:
+                if int(content_length) > MAX_UPLOAD_BYTES:
+                    return JSONResponse(
+                        status_code=413,
+                        content={"detail": f"File too large (max {MAX_FILE_SIZE_MB}MB)."},
+                    )
+            except ValueError:
+                return JSONResponse(
+                    status_code=400,
+                    content={"detail": "Invalid Content-Length header."},
+                )
+    return await call_next(request)
+async def _run_pipeline_async(
+    job_id: str, video_path: str, target_lang: str, source_lang: str, voice_mode: str, captions: bool = True, preserve_music: bool = True, video_link: Optional[str] = None
+):
+    """Run the translation pipeline in a background thread, pushing progress to the job store."""
+    from pipeline import run_pipeline
+    job = jobs[job_id]
+    job["status"] = "queued"
+    # Join the queue
+    gpu_queue.append(job_id)
+    job["_wait_status"] = _queue_status_for(job_id) or "Waiting for GPU..."
+    # Wait for GPU without blocking the event loop — update queue status each tick
+    while not gpu_semaphore.acquire(blocking=False):
+        job["_wait_status"] = _queue_status_for(job_id) or "Waiting for GPU..."
+        await asyncio.sleep(1)
+    # Leave the queue, mark as running
+    if job_id in gpu_queue:
+        gpu_queue.remove(job_id)
+    job["_wait_status"] = None
+    job["status"] = "running"
+    # Fixed 6 pipeline steps: extract, separate, transcribe, translate, tts, sync, merge
+    # (+1 if preserve_music for music restoration)
+    total_steps = 6 + (1 if preserve_music else 0)
+    gpu_active["job_id"] = job_id
+    gpu_active["started_at"] = time.time()
+    gpu_active["step"] = 0
+    gpu_active["total_steps"] = total_steps
+    gpu_active["step_label"] = ""
+    job["messages"].append({"type": "progress", "message": "GPU acquired — starting pipeline...", "step": 0})
+    start = time.time()
+    step_start = time.time()
+    try:
+        data_dir = str(ARTIFACTS_ROOT / job_id)
+        os.makedirs(data_dir, exist_ok=True)
+        output_path = str(Path(data_dir) / "output.mp4")
+        # Note: preview_both mode removed in single-engine Spaces
+        # Each Space only serves one TTS engine (TTS_ENGINE env var)
+        preview_event = None
+        gen = run_pipeline(
+            video_path=video_path,
+            target_language=target_lang,
+            source_language=source_lang,
+            output_path=output_path,
+            voice_mode=voice_mode,
+            preview_event=preview_event,
+            job_state=job,
+            captions=captions,
+            preserve_music=preserve_music,
+            data_dir=data_dir,
+            video_link=video_link,
+        )
+        step = 0
+        def _run_gen():
+            nonlocal step, step_start
+            output = None
+            try:
+                while True:
+                    msg = next(gen)
+                    # Handle preview-ready sentinel dict
+                    if isinstance(msg, dict) and msg.get("__PREVIEW_READY__"):
+                        preview_paths = msg["paths"]
+                        job["preview_paths"] = preview_paths
+                        # Build preview URLs
+                        preview_urls = {}
+                        for model_name, path in preview_paths.items():
+                            if path:
+                                preview_urls[model_name] = (
+                                    f"/api/jobs/{job_id}/preview/{model_name}"
+                                )
+                        job["messages"].append({
+                            "type": "voice_preview",
+                            "step": 4,
+                            "previews": preview_urls,
+                        })
+                        continue
+                    # Regular string message
+                    if isinstance(msg, str):
+                        # Detect step transitions and record per-step timing
+                        if "Step" in msg and f"/{total_steps}" in msg:
+                            try:
+                                new_step = int(
+                                    msg.split("Step")[1].split("/")[0].strip()
+                                )
+                                # Record duration of the step that just ended
+                                if step > 0:
+                                    dur = time.time() - step_start
+                                    step_durations.setdefault(step, [])
+                                    step_durations[step].append(dur)
+                                    if len(step_durations[step]) > 10:
+                                        step_durations[step].pop(0)
+                                step = new_step
+                                step_start = time.time()
+                                # Extract step label (text after "Step X/Y: ")
+                                label = msg.split(":", 1)[1].strip() if ":" in msg else ""
+                                # Remove emoji prefix
+                                label = label.lstrip("🔊📝🌍🗣️⏱️🎞️🎧 ")
+                                gpu_active["step"] = step
+                                gpu_active["step_label"] = label
+                            except (ValueError, IndexError):
+                                pass
+                        job["messages"].append({
+                            "type": "progress",
+                            "message": msg.strip(),
+                            "step": step,
+                        })
+            except StopIteration as e:
+                output = e.value
+            except Exception as e:
+                # Pipeline crashed — set error status directly from
+                # the thread so the frontend sees it immediately,
+                # rather than relying on exception propagation through
+                # run_in_executor (which can silently swallow errors
+                # when stdout/stderr are in a broken state).
+                import traceback
+                tb = traceback.format_exc()
+                print(f"[pipeline] CRASH in job {job_id}: {e}\n{tb}")
+                job["status"] = "error"
+                job["messages"].append({
+                    "type": "error",
+                    "message": f"Pipeline crashed: {e}",
+                })
+                return None
+            # Record the final step's duration
+            if step > 0:
+                dur = time.time() - step_start
+                step_durations.setdefault(step, [])
+                step_durations[step].append(dur)
+                if len(step_durations[step]) > 10:
+                    step_durations[step].pop(0)
+            return output
+        loop = asyncio.get_event_loop()
+        result_path = await loop.run_in_executor(None, _run_gen)
+        if job["status"] == "error":
+            # Error already reported by _run_gen — skip marking as complete
+            pass
+        else:
+            elapsed = round(time.time() - start)
+            job["status"] = "complete"
+            job["result_path"] = result_path or output_path
+            job["messages"].append({"type": "complete", "elapsed": elapsed})
+    except Exception as e:
+        job["status"] = "error"
+        job["messages"].append({"type": "error", "message": str(e)})
+    finally:
+        # Free GPU memory between jobs
+        import gc
+        import torch
+        gc.collect()
+        if hasattr(torch, "mps") and torch.backends.mps.is_available():
+            torch.mps.empty_cache()
+        gpu_active["job_id"] = None
+        gpu_active["started_at"] = None
+        gpu_active["step"] = 0
+        gpu_active["step_label"] = ""
+        if job_id in gpu_queue:
+            gpu_queue.remove(job_id)
+        _release_session_lock(job)
+        gpu_semaphore.release()
+# ── Routes ─────────────────────────────────────────────
+@router.get("/api/health")
+async def health():
+    return JSONResponse(
+        {
+            "status": "ok",
+            "gpu_available": _gpu_available(),
+            "queue_depth": _queue_depth(),
+            "active_job_id": gpu_active["job_id"],
+        }
+    )
+@router.get("/api/config")
+async def config():
+    return JSONResponse(
+        {
+            "max_file_size_mb": MAX_FILE_SIZE_MB,
+            "max_duration_sec": MAX_DURATION_SEC,
+            "languages": _config_languages(),
+            "chatterbox_languages": _chatterbox_language_options(),
+            "omnivoice_languages": _omnivoice_language_options(),
+            "qwen3_languages": _qwen3_language_options(),
+            "tts_models": [TTS_ENGINE],
+            "tts_engine": TTS_ENGINE,
+        }
+    )
+@router.get("/api/demo-videos")
+async def list_demo_videos():
+    """List whitelisted MP4 demo videos from outputs/ and data/."""
+    videos, _ = _collect_demo_videos()
+    return JSONResponse({"videos": videos})
+@router.get("/api/demo-videos/{video_id}/stream")
+async def stream_demo_video(video_id: str):
+    """Stream a demo video by opaque ID (no client-provided path)."""
+    _, video_lookup = _collect_demo_videos()
+    video_path = video_lookup.get(video_id)
+    if not video_path:
+        raise HTTPException(404, "Demo video not found.")
+    return FileResponse(
+        str(video_path),
+        media_type="video/mp4",
+        filename=video_path.name,
+    )
+@router.get("/api/showcase")
+async def get_showcase():
+    """Return curated showcase entries with resolved streaming URLs."""
+    if not SHOWCASE_FILE.exists():
+        return JSONResponse({"showcases": []})
+    try:
+        data = json.loads(SHOWCASE_FILE.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return JSONResponse({"showcases": []})
+    showcases = data.get("showcases", [])
+    for entry in showcases:
+        for key in ("their_dub", "our_dub"):
+            dub = entry.get(key)
+            if dub and dub.get("type") == "local" and dub.get("filename"):
+                video_id = _demo_video_id("showcase", dub["filename"])
+                dub["url"] = f"/api/demo-videos/{video_id}/stream"
+    return JSONResponse({"showcases": showcases})
+@router.post("/api/jobs")
+@limiter.limit("3/hour")
+async def create_job(
+    request: Request,
+    file: Optional[UploadFile] = File(None),
+    url: Optional[str] = Form(None),
+    target_language: str = Form("Spanish"),
+    source_language: str = Form("auto"),
+    voice_mode: str = Form("chatterbox"),
+    captions: str = Form("true"),
+    preserve_music: str = Form("false"),
+    x_session_id: Optional[str] = Header(default=None, alias="X-Session-Id"),
+):
+    """Submit a video for translation."""
+    if not file and not url:
+        raise HTTPException(400, "Provide either a file upload or a URL.")
+    if x_session_id:
+        existing_job_id = session_active_jobs.get(x_session_id)
+        if existing_job_id and _is_job_active(existing_job_id):
+            return JSONResponse(
+                status_code=409,
+                content={"existing_job_id": existing_job_id},
+            )
+        if existing_job_id and not _is_job_active(existing_job_id):
+            session_active_jobs.pop(x_session_id, None)
+    # Validate voice_mode - only TTS_ENGINE is valid for this Space
+    # "preview_both" is disabled in single-engine mode (no way to choose between engines)
+    valid_modes = (TTS_ENGINE,)
+    if voice_mode not in valid_modes:
+        voice_mode = TTS_ENGINE
+    job_id = None
+    if url:
+        if not _is_allowed_video_host(url):
+            raise HTTPException(400, "Unsupported URL host.")
+        # Instagram
+        m = re.search(r'/(?:reel|reels|p)/([A-Za-z0-9_-]+)', url)
+        if m:
+            job_id = m.group(1)
+        # YouTube
+        if not job_id:
+            m = re.search(r'(?:v=|youtu\.be/)([\w-]+)', url)
+            if m:
+                job_id = m.group(1)
+        # TikTok (vm.tiktok.com)
+        if not job_id:
+            m = re.search(r'vm\.tiktok\.com/([\w-]+)', url)
+            if m:
+                job_id = m.group(1)
+        # TikTok (standard /video/xxx)
+        if not job_id:
+            m = re.search(r'/video/(\d+)', url)
+            if m:
+                job_id = m.group(1)
+    if not job_id:
+        job_id = str(uuid.uuid4())[:12]
+    base_job_id = job_id
+    counter = 1
+    job_dir = ARTIFACTS_ROOT / job_id
+    while job_dir.exists():
+        job_id = f"{base_job_id}_{counter}"
+        job_dir = ARTIFACTS_ROOT / job_id
+        counter += 1
+    job_dir.mkdir(parents=True, exist_ok=True)
+    video_path = ""
+    if file:
+        # Save uploaded file
+        ext = Path(file.filename or "video.mp4").suffix or ".mp4"
+        video_path = str(job_dir / f"input{ext}")
+        with open(video_path, "wb") as f:
+            content = await file.read()
+            f.write(content)
+    elif url:
+        # Download from URL
+        video_path = str(job_dir / "input.mp4")
+        try:
+            _download_url(url, video_path)
+        except Exception as e:
+            shutil.rmtree(job_dir, ignore_errors=True)
+            raise HTTPException(400, f"Failed to download video: {e}")
+    try:
+        duration_seconds = _probe_duration_seconds(video_path)
+    except Exception as exc:
+        shutil.rmtree(job_dir, ignore_errors=True)
+        raise HTTPException(400, f"Could not validate video duration: {exc}")
+    if duration_seconds > MAX_DURATION_SEC:
+        shutil.rmtree(job_dir, ignore_errors=True)
+        raise HTTPException(400, f"Video exceeds {MAX_DURATION_SEC} seconds limit.")
+    # Initialize job
+    jobs[job_id] = {
+        "job_id": job_id,
+        "status": "queued",
+        "messages": [],
+        "result_path": None,
+        "error": None,
+        "created_at": time.time(),
+        "voice_mode": voice_mode,
+        "preview_paths": None,
+        "preview_event": None,
+        "selected_model": None,
+        "session_id": x_session_id,
+    }
+    if x_session_id:
+        session_active_jobs[x_session_id] = job_id
+    # Start pipeline in background
+    enable_captions = captions.lower() == "true"
+    enable_music = preserve_music.lower() == "true"
+    asyncio.create_task(
+        _run_pipeline_async(job_id, video_path, target_language, source_language, voice_mode, enable_captions, enable_music, url)
+    )
+    return JSONResponse({"job_id": job_id, "status": "queued"})
+@router.get("/api/jobs/{job_id}")
+@limiter.limit("20/second")
+async def job_status_poll(request: Request, job_id: str, after: int = 0):
+    """Poll endpoint returning new messages since index `after`, plus live wait status."""
+    if job_id not in jobs:
+        raise HTTPException(404, "Job not found.")
+    job = jobs[job_id]
+    messages = job["messages"][after:]
+    # Include live wait ETA (updated in-place, not a queued message)
+    wait_status = job.get("_wait_status")
+    return JSONResponse(
+        {"messages": messages, "next": after + len(messages), "wait_status": wait_status},
+        headers={"Cache-Control": "no-cache, no-store"},
+    )
+class ModelSelection(BaseModel):
+    model: str
+@router.post("/api/jobs/{job_id}/select-model")
+async def select_model(job_id: str, selection: ModelSelection):
+    """User selects a TTS model after previewing."""
+    job = jobs.get(job_id)
+    if not job:
+        raise HTTPException(404, "Job not found.")
+    if selection.model != TTS_ENGINE:
+        raise HTTPException(400, f"Invalid model. This Space only serves {TTS_ENGINE}.")
+    job["selected_model"] = selection.model
+    # Unblock the pipeline
+    if job.get("preview_event"):
+        job["preview_event"].set()
+    return JSONResponse({"status": "ok", "selected": selection.model})
+@router.get("/api/jobs/{job_id}/preview/{model_name}")
+async def get_preview_audio(job_id: str, model_name: str):
+    """Serve a preview audio WAV file."""
+    job = jobs.get(job_id)
+    if not job:
+        raise HTTPException(404, "Job not found.")
+    if model_name != TTS_ENGINE:
+        raise HTTPException(400, f"Invalid model name. This Space serves {TTS_ENGINE} only.")
+    preview_paths = job.get("preview_paths")
+    if not preview_paths:
+        raise HTTPException(404, "Previews not yet generated.")
+    path = preview_paths.get(model_name)
+    if not path or not Path(path).exists():
+        raise HTTPException(404, f"Preview for '{model_name}' not available.")
+    return FileResponse(
+        path,
+        media_type="audio/wav",
+        filename=f"preview_{model_name}.wav",
+    )
+@router.get("/api/jobs/{job_id}/result")
+async def job_result(job_id: str):
+    """Download the translated video."""
+    job = jobs.get(job_id)
+    if not job:
+        raise HTTPException(404, "Job not found.")
+    if job["status"] != "complete":
+        raise HTTPException(400, f"Job is {job['status']}, not complete.")
+    if not job["result_path"] or not Path(job["result_path"]).exists():
+        raise HTTPException(404, "Result file not found.")
+    return FileResponse(
+        job["result_path"],
+        media_type="video/mp4",
+        filename=f"videovoice_{job_id}.mp4",
+    )
+@router.on_event("startup")
+async def startup_event():
+    """Create artifact directories and start background cleanup."""
+    global artifact_reaper_task
+    ARTIFACTS_ROOT.mkdir(parents=True, exist_ok=True)
+    UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    (ARTIFACTS_ROOT / "data").mkdir(parents=True, exist_ok=True)
+    (ARTIFACTS_ROOT / "tmp").mkdir(parents=True, exist_ok=True)
+    if os.getenv("DISABLE_CLEANUP", "").lower() in ("1", "true", "yes"):
+        print("[reaper] DISABLE_CLEANUP is set — artifact reaper will not run")
+    elif artifact_reaper_task is None or artifact_reaper_task.done():
+        artifact_reaper_task = asyncio.create_task(_artifact_reaper_loop())
+@router.on_event("shutdown")
+async def shutdown_event():
+    global artifact_reaper_task
+    if artifact_reaper_task is not None and not artifact_reaper_task.done():
+        artifact_reaper_task.cancel()
+        try:
+            await artifact_reaper_task
+        except asyncio.CancelledError:
+            pass
+# ── No-cache headers for dev/tunnel (ensures Cloudflare serves fresh files) ──
+from starlette.middleware.base import BaseHTTPMiddleware
+# Phase 1.7 marker: remove legacy static middleware when React FE fully owns UI.
+class NoCacheStaticMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        response = await call_next(request)
+        if request.url.path.endswith(('.css', '.js', '.html')) or request.url.path == '/':
+            response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
+            response.headers['Pragma'] = 'no-cache'
+        return response
+# Standalone middleware and static mounts removed (now handled in app.py/main app)
+# ── Local dev entrypoint ──────────────────────────────
+# On HF Spaces `app.py` creates its own Server and imports this router, so
+# the block below is skipped. Locally, `python server.py` builds a minimal
+# FastAPI wrapper around the router so there's something for uvicorn to run.
+if __name__ == "__main__":
+    local_app = FastAPI(title="VideoVoice API (local)")
+    local_app.state.limiter = limiter
+    local_app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+    local_app.add_middleware(SlowAPIMiddleware)
+    local_app.add_middleware(NoCacheStaticMiddleware)
+    local_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=ALLOWED_ORIGINS,
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    @local_app.middleware("http")
+    async def _local_content_length(request: Request, call_next):
+        return await enforce_content_length_limit(request, call_next)
+    local_app.include_router(router)
+    # Tools API — independent of pipeline; safe to include here too.
+    from tools_api import router as tools_router
+    local_app.include_router(tools_router)
+    # Serve the legacy static frontend at / so `python server.py` keeps the
+    # old dev UX (open http://localhost:8000 to hit frontend/index.html).
+    # The React SPA in production is deployed separately to S3.
+    frontend_dir = Path(__file__).parent / "frontend"
+    if frontend_dir.exists():
+        local_app.mount("/", StaticFiles(directory=str(frontend_dir), html=True), name="frontend")
+    import uvicorn
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(local_app, host="0.0.0.0", port=port)

social_distributor/.env.example ADDED Viewed

	@@ -0,0 +1,16 @@

+# VideoVoice data directory (default: ../data relative to poster/)
+VIDEOVOICE_DATA_DIR=/Users/rafa/MscAi/VideoVoice/data
+# Pollinations LLM (for caption generation)
+POLLEN_MODEL=gemini-search
+POLLEN_API_KEY=pollinations
+# AWS Bedrock fallback (for caption generation)
+AWS_REGION=us-east-1
+BEDROCK_MODEL=qwen.qwen3-next-80b-a3b
+# AWS_ACCESS_KEY_ID=
+# AWS_SECRET_ACCESS_KEY=
+# Posting settings
+POST_DELAY=30
+HEADLESS=true

social_distributor/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+.venv/
+__pycache__/
+poster/auth/storage/
+*.pyc
+.env
+post_history.json
+creator_cache.json
+errors/

social_distributor/README.md ADDED Viewed

	@@ -0,0 +1,205 @@

+# Social Media Distributor
+Automated social media posting for VideoVoice dubbed videos. Posts AI-dubbed videos to Instagram, TikTok, and YouTube with AI-generated captions.
+## Features
+- **Multi-platform posting**: Instagram, TikTok, YouTube
+- **AI-generated captions**: Uses Pollinations LLM with AWS Bedrock fallback
+- **Creator handle extraction**: Automatically pulls creator info from source videos
+- **Smart scheduling**: Configurable delays between posts to avoid rate limits
+- **Session management**: Persistent browser sessions (no repeated logins)
+- **Post tracking**: Tracks what was posted to avoid duplicates
+## Setup
+### 1. Install Dependencies
+```bash
+# Using uv (recommended)
+uv sync
+# Or using pip
+pip install -r pyproject.toml
+playwright install
+```
+### 2. Configure Environment
+Copy the example environment file and edit:
+```bash
+cp .env.example .env
+```
+Edit `.env`:
+```env
+# VideoVoice data directory (where dubbed video folders are)
+VIDEOVOICE_DATA_DIR=/path/to/VideoVoice/data
+# LLM for caption generation (Pollinations)
+POLLEN_MODEL=gemini-search
+POLLEN_API_KEY=pollinations
+# Optional: AWS Bedrock fallback
+AWS_REGION=us-east-1
+BEDROCK_MODEL=qwen.qwen3-next-80b-a3b
+AWS_ACCESS_KEY_ID=...
+AWS_SECRET_ACCESS_KEY=...
+# Posting behavior
+POST_DELAY=30          # Seconds between posts
+HEADLESS=true          # Run browser headlessly
+```
+### 3. Login to Platforms
+You need to authenticate with each platform once. This opens a browser window for you to log in:
+```bash
+# Login to Instagram
+python post.py login instagram
+# Login to TikTok
+python post.py login tiktok
+# Login to YouTube
+python post.py login youtube
+```
+Sessions are saved in `poster/auth/storage/` — you won't need to log in again.
+## Usage
+### Post Videos
+Post all videos from a data folder:
+```bash
+# Post all platforms (default)
+python post.py post /path/to/VideoVoice/data/Dxxxxxxxxx
+# Post to specific platforms only
+python post.py post /path/to/data/Folder1 -p instagram,tiktok
+# Post multiple folders
+python post.py post Folder1 Folder2 Folder3
+# Dry run (generate captions but don't post)
+python post.py post Folder1 --dry-run
+# Force re-post even if already posted
+python post.py post Folder1 --force
+# Override language (e.g., if auto-detection is wrong)
+python post.py post Folder1 --lang-override "Urdu"
+# Customize delay between posts
+python post.py post Folder1 --delay 60
+# Run with visible browser (for debugging)
+python post.py post Folder1 --no-headless
+```
+### Preview Captions
+Generate and preview captions without posting:
+```bash
+# Preview captions for all platforms
+python post.py caption /path/to/data/Folder1
+# Preview for specific platforms
+python post.py caption Folder1 -p youtube
+# Preview multiple folders
+python post.py caption Folder1 Folder2 Folder3
+```
+### Check Posting History
+```bash
+python post.py status
+```
+Shows a table of all posted videos with timestamps and status.
+## Command Reference
+| Command | Description |
+|---------|-------------|
+| `python post.py login <platform>` | Authenticate with a platform |
+| `python post.py post <folders...>` | Post videos to social media |
+| `python post.py caption <folders...>` | Preview generated captions |
+| `python post.py status` | View posting history |
+### Post Options
+| Option | Description |
+|--------|-------------|
+| `-p, --platforms` | Comma-separated platforms (default: instagram,tiktok,youtube) |
+| `--force` | Re-post even if already posted |
+| `--dry-run` | Generate captions but don't post |
+| `--delay <seconds>` | Seconds between posts |
+| `--headless / --no-headless` | Run browser headlessly |
+| `--lang-override <name>` | Override target language (e.g., "Urdu") |
+## How It Works
+1. **Loads videos** from VideoVoice data folders
+2. **Extracts creator info** from the original video link
+3. **Generates captions** using AI (Pollinations LLM)
+4. **Posts to each platform** with platform-optimized formatting
+5. **Tracks posts** in `post_history.json`
+## File Structure
+```
+social_distributor/
+├── post.py              # CLI entry point
+├── poster/
+│   ├── auth/
+│   │   ├── session.py   # Browser session management
+│   │   └── storage/     # Saved session files
+│   ├── platforms/
+│   │   ├── base.py      # Base poster class
+│   │   ├── instagram.py # Instagram automation
+│   │   ├── tiktok.py    # TikTok automation
+│   │   └── youtube.py   # YouTube automation
+│   ├── caption_gen.py   # AI caption generation
+│   ├── creator_extract.py # Creator handle extraction
+│   ├── video_loader.py  # Video metadata loading
+│   ├── post_log.py      # Post history tracking
+│   ├── config.py        # Configuration & constants
+│   └── models.py        # Data models
+├── .env                 # Your environment config
+└── post_history.json    # Auto-generated post log
+```
+## Troubleshooting
+**Login fails / session expires:**
+```bash
+# Re-login to the platform
+python post.py login instagram
+```
+**Caption generation fails:**
+- Check your `POLLEN_API_KEY` in `.env`
+- Or configure AWS Bedrock credentials as fallback
+**Post fails on specific platform:**
+- Use `--no-headless` to see the browser and debug
+- Check `post_history.json` for error messages
+- Platforms may require re-authentication periodically
+**Videos not found:**
+- Ensure `VIDEOVOICE_DATA_DIR` points to your VideoVoice `data/` folder
+- Folder names should match VideoVoice video IDs (e.g., `Dxxxxxxxxx`)
+## Notes
+- Instagram and TikTok use browser automation (Playwright)
+- YouTube posts via web upload (requires logged-in session)
+- First login for each platform opens a real browser window
+- Headless mode runs faster but hides the browser (use `--no-headless` to debug)

social_distributor/post.py ADDED Viewed

	@@ -0,0 +1,311 @@

+#!/usr/bin/env python3
+"""VideoVoice Social Media Poster — CLI entrypoint."""
+from __future__ import annotations
+import asyncio
+import time
+import click
+from rich.console import Console
+from rich.table import Table
+console = Console()
+ALL_PLATFORMS = ["instagram", "tiktok", "youtube"]
+@click.group()
+def cli():
+    """VideoVoice Social Media Poster — post dubbed videos to Instagram, TikTok, and YouTube."""
+    pass
+# ── Login command ────────────────────────────────────────────────────────
+@cli.command()
+@click.argument("platform", type=click.Choice(ALL_PLATFORMS))
+def login(platform: str):
+    """Interactively log in to a platform (opens a browser window)."""
+    from poster.auth.session import interactive_login
+    asyncio.run(interactive_login(platform))
+# ── Caption preview command ──────────────────────────────────────────────
+@cli.command()
+@click.argument("folders", nargs=-1, required=True)
+@click.option("--platforms", "-p", default="instagram,tiktok,youtube", help="Comma-separated platforms")
+@click.option("--lang-override", default=None, help="Override target language name (e.g. 'Urdu')")
+def caption(folders: tuple[str, ...], platforms: str, lang_override: str | None):
+    """Preview generated captions without posting."""
+    from poster.caption_gen import format_caption, generate_caption
+    from poster.video_loader import load_videos
+    target_platforms = [p.strip() for p in platforms.split(",")]
+    videos = load_videos(list(folders), lang_override)
+    if not videos:
+        console.print("[red]No valid videos found.[/red]")
+        return
+    for video in videos:
+        console.print(f"\n[bold]{'=' * 60}[/bold]")
+        console.print(f"[bold]Video:[/bold] {video.video_id}")
+        console.print(f"[bold]Source:[/bold] {video.source_language} -> {video.target_language_name}")
+        console.print(f"[bold]Link:[/bold] {video.video_link or 'N/A'}")
+        for platform in target_platforms:
+            console.print(f"\n[cyan]--- {platform.upper()} ---[/cyan]")
+            try:
+                caption_data = generate_caption(video, platform)
+                result = format_caption(caption_data, video, platform)
+                if platform == "youtube":
+                    title, desc = result
+                    console.print(f"[bold]Title:[/bold] {title}")
+                    console.print(f"[bold]Description:[/bold]\n{desc}")
+                else:
+                    console.print(f"[bold]Caption:[/bold]\n{result}")
+            except Exception as e:
+                console.print(f"[red]Caption generation failed: {e}[/red]")
+# ── Post command ─────────────────────────────────────────────────────────
+@cli.command()
+@click.argument("folders", nargs=-1, required=True)
+@click.option("--platforms", "-p", default="instagram,tiktok,youtube", help="Comma-separated platforms")
+@click.option("--force", is_flag=True, help="Re-post even if already posted")
+@click.option("--dry-run", is_flag=True, help="Generate captions but don't post")
+@click.option("--delay", default=None, type=int, help="Seconds between posts (default: from env)")
+@click.option("--headless/--no-headless", default=None, help="Run browser headlessly")
+@click.option("--lang-override", default=None, help="Override target language name (e.g. 'Urdu')")
+def post(
+    folders: tuple[str, ...],
+    platforms: str,
+    force: bool,
+    dry_run: bool,
+    delay: int | None,
+    headless: bool | None,
+    lang_override: str | None,
+):
+    """Post dubbed videos to social media platforms."""
+    asyncio.run(
+        _post_async(list(folders), platforms, force, dry_run, delay, headless, lang_override)
+    )
+async def _post_async(
+    folders: list[str],
+    platforms_str: str,
+    force: bool,
+    dry_run: bool,
+    delay: int | None,
+    headless: bool | None,
+    lang_override: str | None,
+):
+    from playwright.async_api import async_playwright
+    from poster import post_log
+    from poster.auth.session import get_context, has_session
+    from poster.caption_gen import format_caption, generate_caption
+    from poster.config import POST_DELAY
+    from poster.creator_extract import extract_creator
+    from poster.models import PostResult
+    from poster.platforms.instagram import InstagramPoster
+    from poster.platforms.tiktok import TikTokPoster
+    from poster.platforms.youtube import YouTubePoster
+    from poster.video_loader import load_videos
+    target_platforms = [p.strip() for p in platforms_str.split(",")]
+    post_delay = delay if delay is not None else POST_DELAY
+    # Validate sessions exist
+    for platform in target_platforms:
+        if not has_session(platform):
+            console.print(
+                f"[red]No session for {platform}. "
+                f"Run: python post.py login {platform}[/red]"
+            )
+            return
+    # Load videos
+    videos = load_videos(folders, lang_override)
+    if not videos:
+        console.print("[red]No valid videos found.[/red]")
+        return
+    console.print(f"\n[bold]Posting {len(videos)} video(s) to {', '.join(target_platforms)}[/bold]")
+    if dry_run:
+        console.print("[yellow]DRY RUN — captions will be generated but nothing will be posted[/yellow]")
+    results: list[PostResult] = []
+    async with async_playwright() as pw:
+        # Create browser contexts for each platform
+        contexts = {}
+        posters = {}
+        poster_classes = {
+            "instagram": InstagramPoster,
+            "tiktok": TikTokPoster,
+            "youtube": YouTubePoster,
+        }
+        for platform in target_platforms:
+            ctx = await get_context(pw, platform, headless=headless)
+            contexts[platform] = ctx
+            posters[platform] = poster_classes[platform](ctx)
+        # Use first available context for creator extraction
+        extract_ctx = next(iter(contexts.values()))
+        for i, video in enumerate(videos):
+            console.print(f"\n[bold]{'=' * 60}[/bold]")
+            console.print(f"[bold]Video {i + 1}/{len(videos)}:[/bold] {video.video_id}")
+            # Extract creator handle
+            creator_handle = await extract_creator(video.video_link, extract_ctx)
+            for platform in target_platforms:
+                console.print(f"\n[cyan]--- {platform.upper()} ---[/cyan]")
+                # Check if already posted
+                if not force and post_log.is_posted(video.video_id, platform):
+                    console.print(f"[yellow]Already posted — skipping (use --force to re-post)[/yellow]")
+                    results.append(PostResult(
+                        video_id=video.video_id,
+                        platform=platform,
+                        status="skipped",
+                        timestamp="",
+                    ))
+                    continue
+                # Generate caption
+                try:
+                    caption_data = generate_caption(video, platform, creator_handle)
+                    formatted = format_caption(caption_data, video, platform, creator_handle)
+                except Exception as e:
+                    console.print(f"[red]Caption generation failed: {e}[/red]")
+                    continue
+                if platform == "youtube":
+                    title, description = formatted
+                    console.print(f"[dim]Title: {title}[/dim]")
+                    console.print(f"[dim]Description: {description[:150]}...[/dim]")
+                else:
+                    description = formatted
+                    title = None
+                    console.print(f"[dim]Caption: {description[:150]}...[/dim]")
+                if dry_run:
+                    console.print("[yellow]DRY RUN — skipping actual post[/yellow]")
+                    continue
+                # Post
+                poster = posters[platform]
+                if platform == "youtube":
+                    result = await poster.post(
+                        video.output_path, description,
+                        video_id=video.video_id, title=title,
+                    )
+                else:
+                    result = await poster.post(
+                        video.output_path, description,
+                        video_id=video.video_id,
+                    )
+                result.caption_used = description if isinstance(description, str) else str(description)
+                results.append(result)
+                post_log.record(result)
+                if result.status == "success":
+                    console.print(f"[green]Posted to {platform}![/green]")
+                else:
+                    console.print(f"[red]Failed: {result.error}[/red]")
+                # Delay between posts
+                if post_delay > 0:
+                    console.print(f"[dim]Waiting {post_delay}s before next post...[/dim]")
+                    await asyncio.sleep(post_delay)
+        # Close all browser contexts
+        for ctx in contexts.values():
+            await ctx.browser.close()
+    # Print summary
+    _print_summary(results)
+def _print_summary(results: list):
+    if not results:
+        return
+    table = Table(title="Posting Summary")
+    table.add_column("Video", style="bold")
+    table.add_column("Platform")
+    table.add_column("Status")
+    table.add_column("Error")
+    for r in results:
+        status_style = {
+            "success": "green",
+            "failed": "red",
+            "skipped": "yellow",
+        }.get(r.status, "white")
+        table.add_row(
+            r.video_id,
+            r.platform,
+            f"[{status_style}]{r.status}[/{status_style}]",
+            r.error or "",
+        )
+    console.print()
+    console.print(table)
+# ── Status command ───────────────────────────────────────────────────────
+@cli.command()
+def status():
+    """Show posting history."""
+    from poster import post_log
+    data = post_log.get_all()
+    if not data:
+        console.print("[yellow]No posting history yet.[/yellow]")
+        return
+    table = Table(title="Posting History")
+    table.add_column("Video ID", style="bold")
+    table.add_column("Platform")
+    table.add_column("Status")
+    table.add_column("Timestamp")
+    table.add_column("Error")
+    for video_id, platforms in data.items():
+        for platform, info in platforms.items():
+            status_style = {
+                "success": "green",
+                "failed": "red",
+            }.get(info.get("status", ""), "white")
+            table.add_row(
+                video_id,
+                platform,
+                f"[{status_style}]{info.get('status', 'unknown')}[/{status_style}]",
+                info.get("timestamp", "")[:19],
+                info.get("error", "") or "",
+            )
+    console.print(table)
+if __name__ == "__main__":
+    cli()

social_distributor/poster/__init__.py ADDED Viewed

File without changes

social_distributor/poster/auth/__init__.py ADDED Viewed

File without changes

social_distributor/poster/auth/session.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""Browser session management — persistent login via Playwright storage state."""
+from __future__ import annotations
+from pathlib import Path
+from playwright.async_api import BrowserContext, Playwright, async_playwright
+from rich.console import Console
+from ..config import AUTH_STORAGE_DIR, HEADLESS
+console = Console()
+PLATFORM_LOGIN_URLS = {
+    "instagram": "https://www.instagram.com/accounts/login/",
+    "tiktok": "https://www.tiktok.com/login",
+    "youtube": "https://studio.youtube.com/",
+}
+# Mobile UA for Instagram (required for mobile web Reels upload)
+MOBILE_USER_AGENT = (
+    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
+    "AppleWebKit/605.1.15 (KHTML, like Gecko) "
+    "Version/17.0 Mobile/15E148 Safari/604.1"
+)
+DESKTOP_USER_AGENT = (
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/120.0.0.0 Safari/537.36"
+)
+def _state_path(platform: str) -> Path:
+    AUTH_STORAGE_DIR.mkdir(parents=True, exist_ok=True)
+    return AUTH_STORAGE_DIR / f"{platform}_state.json"
+def has_session(platform: str) -> bool:
+    return _state_path(platform).exists()
+async def interactive_login(platform: str) -> None:
+    """Launch a headed browser for the user to log in manually.
+    After login, saves the browser storage state for future use.
+    """
+    login_url = PLATFORM_LOGIN_URLS.get(platform)
+    if not login_url:
+        console.print(f"[red]Unknown platform: {platform}[/red]")
+        return
+    console.print(f"\n[bold]Opening {platform.title()} login page...[/bold]")
+    console.print("[yellow]Please log in manually in the browser window.[/yellow]")
+    console.print("[yellow]Press Enter here when you're done logging in.[/yellow]\n")
+    use_mobile = platform == "instagram"
+    async with async_playwright() as pw:
+        browser = await pw.chromium.launch(headless=False)
+        context = await browser.new_context(
+            user_agent=MOBILE_USER_AGENT if use_mobile else DESKTOP_USER_AGENT,
+            viewport={"width": 414, "height": 896} if use_mobile else {"width": 1280, "height": 800},
+            is_mobile=use_mobile,
+            has_touch=use_mobile,
+        )
+        page = await context.new_page()
+        await page.goto(login_url, wait_until="domcontentloaded")
+        # Wait for user to finish logging in
+        input(">>> Press Enter after you've logged in... ")
+        # Save state
+        state_file = _state_path(platform)
+        await context.storage_state(path=str(state_file))
+        console.print(f"[green]Session saved for {platform.title()}![/green]")
+        await browser.close()
+async def get_context(
+    pw: Playwright,
+    platform: str,
+    headless: bool | None = None,
+) -> BrowserContext:
+    """Get a browser context with saved session state.
+    Raises FileNotFoundError if no session exists — user must run login first.
+    """
+    state_file = _state_path(platform)
+    if not state_file.exists():
+        raise FileNotFoundError(
+            f"No saved session for {platform}. Run: python post.py login {platform}"
+        )
+    if headless is None:
+        headless = HEADLESS
+    use_mobile = platform == "instagram"
+    browser = await pw.chromium.launch(headless=headless)
+    context = await browser.new_context(
+        storage_state=str(state_file),
+        user_agent=MOBILE_USER_AGENT if use_mobile else DESKTOP_USER_AGENT,
+        viewport={"width": 414, "height": 896} if use_mobile else {"width": 1280, "height": 800},
+        is_mobile=use_mobile,
+        has_touch=use_mobile,
+    )
+    return context

social_distributor/poster/caption_gen.py ADDED Viewed

	@@ -0,0 +1,164 @@

+"""LLM-based caption generation for social media posts."""
+from __future__ import annotations
+import json
+from rich.console import Console
+from .config import (
+    INSTAGRAM_CAPTION_LIMIT,
+    POLLEN_MODEL,
+    TIKTOK_CAPTION_LIMIT,
+    YOUTUBE_DESCRIPTION_LIMIT,
+    YOUTUBE_TITLE_LIMIT,
+    bedrock_converse,
+    build_pollinations_client,
+)
+from .models import VideoData
+console = Console()
+PLATFORM_LIMITS = {
+    "instagram": INSTAGRAM_CAPTION_LIMIT,
+    "tiktok": TIKTOK_CAPTION_LIMIT,
+    "youtube": YOUTUBE_DESCRIPTION_LIMIT,
+}
+PLATFORM_HASHTAGS = {
+    "instagram": "#Reels #ReelsViral #ExplorePage",
+    "tiktok": "#fyp #foryou #foryoupage",
+    "youtube": "#Shorts #YouTubeShorts",
+}
+def _build_system_prompt(platform: str) -> str:
+    char_limit = PLATFORM_LIMITS.get(platform, 2200)
+    is_youtube = platform == "youtube"
+    return f"""You are a social media caption writer for VideoVoice, an AI voice-cloning video dubbing tool.
+Your job: write a catchy, engaging caption for a dubbed video posted on {platform.title()}.
+VideoVoice's key differentiator: platform tools give you an option (subtitle overlay). We give you a BRAND NEW video with cloned voice — same speaker, new language. Background music preserved. 23+ languages. "2x Reach, Same Effort."
+Rules:
+1. Highlight the magic of hearing this content in the target language with the SAME voice (AI voice cloning, not just subtitles)
+2. Be conversational, create curiosity, make people want to watch
+3. ALWAYS include the original video link to credit the original creator
+4. If a creator handle is provided, tag them with @
+5. Stay within {char_limit} characters total
+6. Include relevant hashtags: #VideoVoice #AIDubbing #VoiceCloning + language-specific + {PLATFORM_HASHTAGS.get(platform, "")}
+7. Write the caption primarily in English
+{"Return a JSON object with two fields: `title` (under " + str(YOUTUBE_TITLE_LIMIT) + " chars, punchy) and `description` (the full caption)." if is_youtube else "Return a JSON object with one field: `caption` (the full caption text)."}
+Example tone: "What's more interesting than hearing the power of English motivation but in the magic of Turkish words? Same voice. Same energy. New language."
+IMPORTANT: Return ONLY valid JSON, no markdown fences."""
+def _build_user_prompt(video: VideoData, creator_handle: str | None) -> str:
+    # Truncate original text to avoid token limits
+    original_excerpt = video.original_text[:500]
+    translated_excerpt = video.translated_text[:300]
+    parts = [
+        f"Source language: {video.source_language}",
+        f"Target language: {video.target_language_name} ({video.target_language_code})",
+        f"Original transcript (excerpt): {original_excerpt}",
+        f"Translated text (excerpt): {translated_excerpt}",
+    ]
+    if video.video_link:
+        parts.append(f"Original video link: {video.video_link}")
+    if creator_handle:
+        parts.append(f"Original creator: @{creator_handle}")
+    return "\n".join(parts)
+def _parse_response(raw: str, platform: str) -> dict:
+    """Parse the LLM JSON response, with fallback for markdown fences."""
+    raw = raw.strip()
+    # Strip markdown code fences if present
+    if raw.startswith("```"):
+        lines = raw.split("\n")
+        lines = [l for l in lines if not l.strip().startswith("```")]
+        raw = "\n".join(lines)
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        # If JSON parsing fails, treat the whole thing as a caption
+        if platform == "youtube":
+            return {"title": "Dubbed with AI Voice Cloning", "description": raw}
+        return {"caption": raw}
+def generate_caption(
+    video: VideoData,
+    platform: str,
+    creator_handle: str | None = None,
+) -> dict:
+    """Generate a caption using Pollinations LLM, with Bedrock fallback.
+    Returns dict with 'caption' key (or 'title' + 'description' for YouTube).
+    """
+    system_prompt = _build_system_prompt(platform)
+    user_prompt = _build_user_prompt(video, creator_handle)
+    # Primary: Pollinations
+    try:
+        client = build_pollinations_client()
+        response = client.chat.completions.create(
+            model=POLLEN_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.7,
+        )
+        raw = response.choices[0].message.content
+        console.print(f"[green]Caption generated via Pollinations[/green] ({platform})")
+        return _parse_response(raw, platform)
+    except Exception as e:
+        console.print(f"[yellow]Pollinations failed: {e}. Trying Bedrock...[/yellow]")
+    # Fallback: AWS Bedrock
+    try:
+        raw = bedrock_converse(system_prompt, user_prompt, temperature=0.7)
+        console.print(f"[green]Caption generated via Bedrock[/green] ({platform})")
+        return _parse_response(raw, platform)
+    except Exception as e:
+        console.print(f"[red]Bedrock also failed: {e}[/red]")
+        raise RuntimeError(f"Caption generation failed for {video.video_id} on {platform}") from e
+def format_caption(
+    caption_data: dict,
+    video: VideoData,
+    platform: str,
+    creator_handle: str | None = None,
+) -> str | tuple[str, str]:
+    """Ensure the final caption always contains the original link and creator credit.
+    Returns a string for Instagram/TikTok, or (title, description) tuple for YouTube.
+    """
+    if platform == "youtube":
+        title = caption_data.get("title", "AI Voice Dubbed")
+        desc = caption_data.get("description", caption_data.get("caption", ""))
+    else:
+        desc = caption_data.get("caption", "")
+    # Ensure original link is present
+    if video.video_link and video.video_link not in desc:
+        desc += f"\n\nOriginal: {video.video_link}"
+    # Ensure creator tag is present
+    if creator_handle and f"@{creator_handle}" not in desc:
+        desc += f"\nCredit: @{creator_handle}"
+    if platform == "youtube":
+        return title, desc
+    return desc

social_distributor/poster/config.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Central configuration — env loading, constants, language maps."""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from openai import OpenAI
+load_dotenv(Path(__file__).resolve().parent.parent / ".env")
+# ── Paths ────────────────────────────────────────────────────────────────
+POSTER_ROOT = Path(__file__).resolve().parent.parent
+VIDEOVOICE_DATA_DIR = Path(
+    os.getenv("VIDEOVOICE_DATA_DIR", str(POSTER_ROOT.parent / "data"))
+)
+AUTH_STORAGE_DIR = POSTER_ROOT / "poster" / "auth" / "storage"
+POST_LOG_PATH = POSTER_ROOT / "post_history.json"
+CREATOR_CACHE_PATH = POSTER_ROOT / "creator_cache.json"
+# ── Pollinations LLM (primary) ───────────────────────────────────────────
+POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
+POLLEN_MODEL = os.getenv("POLLEN_MODEL", "gemini-search")
+def build_pollinations_client() -> OpenAI:
+    api_key = (
+        os.getenv("POLLEN_API_KEY_SECONDARY")
+        or os.getenv("POLLEN_API_KEY")
+        or os.getenv("POLLINATIONS_API_KEY")
+        or "pollinations"
+    )
+    return OpenAI(base_url=POLLINATIONS_BASE, api_key=api_key)
+# ── Bedrock fallback ─────────────────────────────────────────────────────
+BEDROCK_REGION = os.getenv("AWS_REGION", "us-east-1")
+BEDROCK_MODEL = os.getenv("BEDROCK_MODEL", "qwen.qwen3-next-80b-a3b")
+def bedrock_converse(system_prompt: str, user_text: str, temperature: float = 0.3) -> str:
+    import boto3
+    client = boto3.client("bedrock-runtime", region_name=BEDROCK_REGION)
+    response = client.converse(
+        modelId=BEDROCK_MODEL,
+        messages=[{"role": "user", "content": [{"text": user_text}]}],
+        system=[{"text": system_prompt}],
+        inferenceConfig={"temperature": temperature},
+    )
+    return response["output"]["message"]["content"][0]["text"].strip()
+# ── Language code → name (reversed from pipeline.py LANGUAGE_CODES) ──────
+LANGUAGE_CODE_TO_NAME: dict[str, str] = {
+    "ar": "Arabic",
+    "zh": "Chinese",
+    "da": "Danish",
+    "nl": "Dutch",
+    "en": "English",
+    "fi": "Finnish",
+    "fr": "French",
+    "de": "German",
+    "el": "Greek",
+    "he": "Hebrew",
+    "hi": "Hindi",
+    "it": "Italian",
+    "ja": "Japanese",
+    "ko": "Korean",
+    "ms": "Malay",
+    "no": "Norwegian",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "ru": "Russian",
+    "es": "Spanish",
+    "sw": "Swahili",
+    "sv": "Swedish",
+    "tr": "Turkish",
+}
+# ── Platform caption limits ──────────────────────────────────────────────
+INSTAGRAM_CAPTION_LIMIT = 2200
+TIKTOK_CAPTION_LIMIT = 4000
+YOUTUBE_TITLE_LIMIT = 100
+YOUTUBE_DESCRIPTION_LIMIT = 5000
+# ── Posting settings ─────────────────────────────────────────────────────
+POST_DELAY = int(os.getenv("POST_DELAY", "30"))
+HEADLESS = os.getenv("HEADLESS", "true").lower() == "true"

social_distributor/poster/creator_extract.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Extract original creator @username from video URLs."""
+from __future__ import annotations
+import json
+import re
+from rich.console import Console
+from .config import CREATOR_CACHE_PATH
+console = Console()
+def _load_cache() -> dict[str, str]:
+    if CREATOR_CACHE_PATH.exists():
+        with open(CREATOR_CACHE_PATH) as f:
+            return json.load(f)
+    return {}
+def _save_cache(cache: dict[str, str]) -> None:
+    with open(CREATOR_CACHE_PATH, "w") as f:
+        json.dump(cache, f, indent=2)
+async def extract_creator(video_link: str | None, browser_context=None) -> str | None:
+    """Extract the @username of the original creator from the video URL.
+    Uses Playwright browser context to visit the page and extract metadata.
+    Results are cached to avoid repeated page visits.
+    """
+    if not video_link:
+        return None
+    cache = _load_cache()
+    if video_link in cache:
+        return cache[video_link]
+    username = None
+    try:
+        if "instagram.com" in video_link:
+            username = await _extract_instagram(video_link, browser_context)
+        elif "tiktok.com" in video_link:
+            username = await _extract_tiktok(video_link, browser_context)
+        elif "youtube.com" in video_link or "youtu.be" in video_link:
+            username = await _extract_youtube(video_link, browser_context)
+    except Exception as e:
+        console.print(f"[yellow]Creator extraction failed: {e}[/yellow]")
+    if username:
+        # Clean up username
+        username = username.strip().lstrip("@")
+        cache[video_link] = username
+        _save_cache(cache)
+        console.print(f"[green]Creator found:[/green] @{username}")
+    return username
+async def _extract_instagram(url: str, ctx) -> str | None:
+    """Instagram: visit reel, extract username from og:title or page URL."""
+    if not ctx:
+        return None
+    page = await ctx.new_page()
+    try:
+        await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+        await page.wait_for_timeout(2000)
+        # Try og:title meta tag: "Username on Instagram: ..."
+        og_title = await page.query_selector('meta[property="og:title"]')
+        if og_title:
+            content = await og_title.get_attribute("content")
+            if content:
+                # Pattern: "Username on Instagram" or "@username"
+                match = re.match(r"^@?(\w[\w.]+)", content)
+                if match:
+                    return match.group(1)
+        # Try the final URL which may contain /username/reel/ID
+        final_url = page.url
+        match = re.search(r"instagram\.com/([^/]+)/reel", final_url)
+        if match:
+            return match.group(1)
+    finally:
+        await page.close()
+    return None
+async def _extract_tiktok(url: str, ctx) -> str | None:
+    """TikTok: follow redirect from short URL, parse /@username from final URL."""
+    if not ctx:
+        return None
+    page = await ctx.new_page()
+    try:
+        await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+        await page.wait_for_timeout(2000)
+        final_url = page.url
+        match = re.search(r"/@([^/]+)", final_url)
+        if match:
+            return match.group(1)
+        # Fallback: check meta tags
+        og_title = await page.query_selector('meta[property="og:title"]')
+        if og_title:
+            content = await og_title.get_attribute("content")
+            if content:
+                match = re.search(r"@(\w[\w.]+)", content)
+                if match:
+                    return match.group(1)
+    finally:
+        await page.close()
+    return None
+async def _extract_youtube(url: str, ctx) -> str | None:
+    """YouTube: visit video page, extract channel name from meta tags."""
+    if not ctx:
+        return None
+    page = await ctx.new_page()
+    try:
+        await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+        await page.wait_for_timeout(2000)
+        # Try link[itemprop="name"] inside the channel section
+        author = await page.query_selector('link[itemprop="name"]')
+        if author:
+            name = await author.get_attribute("content")
+            if name:
+                return name
+        # Fallback: og:title often has "Video Title - Channel Name"
+        og_title = await page.query_selector('meta[property="og:title"]')
+        if og_title:
+            content = await og_title.get_attribute("content")
+            if content and " - " in content:
+                return content.rsplit(" - ", 1)[-1].strip()
+    finally:
+        await page.close()
+    return None

social_distributor/poster/models.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Data models for the poster pipeline."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class VideoData:
+    video_id: str
+    output_path: str
+    video_link: str | None
+    source_language: str
+    target_language_code: str
+    target_language_name: str
+    original_text: str
+    translated_text: str
+    platform_type: str | None  # "instagram" | "tiktok" | "youtube" | None
+@dataclass
+class PostResult:
+    video_id: str
+    platform: str
+    status: str  # "success" | "failed" | "skipped"
+    timestamp: str
+    caption_used: str = ""
+    error: str | None = None
+    url: str | None = None

social_distributor/poster/platforms/__init__.py ADDED Viewed

File without changes

social_distributor/poster/platforms/base.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""Abstract base class for platform posters."""
+from __future__ import annotations
+import asyncio
+import random
+from abc import ABC, abstractmethod
+from datetime import datetime, timezone
+from playwright.async_api import BrowserContext, Page
+from ..models import PostResult
+class BasePoster(ABC):
+    platform: str = ""
+    def __init__(self, context: BrowserContext):
+        self.context = context
+    @abstractmethod
+    async def post(self, video_path: str, caption: str, **kwargs) -> PostResult:
+        ...
+    @abstractmethod
+    async def is_logged_in(self) -> bool:
+        ...
+    async def _human_delay(self, min_s: float = 1.0, max_s: float = 3.0) -> None:
+        await asyncio.sleep(random.uniform(min_s, max_s))
+    async def _screenshot_on_error(self, page: Page, video_id: str) -> None:
+        """Save a debug screenshot on failure."""
+        from ..config import POSTER_ROOT
+        errors_dir = POSTER_ROOT / "errors"
+        errors_dir.mkdir(exist_ok=True)
+        ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+        path = errors_dir / f"{self.platform}_{video_id}_{ts}.png"
+        await page.screenshot(path=str(path))
+    def _make_result(
+        self,
+        video_id: str,
+        status: str,
+        caption: str = "",
+        error: str | None = None,
+        url: str | None = None,
+    ) -> PostResult:
+        return PostResult(
+            video_id=video_id,
+            platform=self.platform,
+            status=status,
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            caption_used=caption,
+            error=error,
+            url=url,
+        )

social_distributor/poster/platforms/instagram.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""Instagram Reel posting via Playwright (mobile web viewport)."""
+from __future__ import annotations
+from rich.console import Console
+from .base import BasePoster
+from ..models import PostResult
+console = Console()
+class InstagramPoster(BasePoster):
+    platform = "instagram"
+    async def is_logged_in(self) -> bool:
+        page = await self.context.new_page()
+        try:
+            await page.goto("https://www.instagram.com/", wait_until="domcontentloaded", timeout=15000)
+            await page.wait_for_timeout(3000)
+            login_form = await page.query_selector('input[name="username"]')
+            if login_form:
+                return False
+            nav = await page.query_selector('nav, div[role="navigation"]')
+            return nav is not None
+        except Exception:
+            return False
+        finally:
+            await page.close()
+    async def _dismiss_popups(self, page) -> None:
+        """Dismiss common Instagram popups (notifications, app switch, cookies)."""
+        dismiss_selectors = [
+            'button:has-text("Not Now")',
+            'button:has-text("Cancel")',
+            'button:has-text("Accept All")',
+            'button:has-text("Allow All Cookies")',
+            'button:has-text("Decline")',
+        ]
+        for selector in dismiss_selectors:
+            try:
+                btn = await page.query_selector(selector)
+                if btn and await btn.is_visible():
+                    await btn.click()
+                    await self._human_delay(0.5, 1)
+            except Exception:
+                pass
+    async def post(self, video_path: str, caption: str, **kwargs) -> PostResult:
+        video_id = kwargs.get("video_id", "unknown")
+        page = await self.context.new_page()
+        try:
+            console.print(f"[cyan]Instagram:[/cyan] Navigating to Instagram...")
+            await page.goto("https://www.instagram.com/", wait_until="domcontentloaded", timeout=20000)
+            await page.wait_for_timeout(3000)
+            await self._dismiss_popups(page)
+            login_form = await page.query_selector('input[name="username"]')
+            if login_form:
+                return self._make_result(video_id, "failed", caption, error="Not logged in — session expired")
+            await self._human_delay(1, 2)
+            # Click the create/new post button
+            console.print(f"[cyan]Instagram:[/cyan] Opening create dialog...")
+            create_selectors = [
+                'svg[aria-label="New post"]',
+                'a[href="/create/"]',
+                'div[role="menuitem"] svg[aria-label*="New"]',
+                'a[href="/create/select/"]',
+                '[aria-label="New post"]',
+                'svg[aria-label="New Post"]',
+            ]
+            create_clicked = False
+            for selector in create_selectors:
+                el = await page.query_selector(selector)
+                if el:
+                    await el.click()
+                    create_clicked = True
+                    break
+            if not create_clicked:
+                await page.goto("https://www.instagram.com/create/select/", wait_until="domcontentloaded")
+                await page.wait_for_timeout(2000)
+            await self._human_delay(2, 3)
+            await self._dismiss_popups(page)
+            # ── FIX: Instagram's file input is hidden by default.
+            # Wait for it to be *attached* to the DOM (not visible),
+            # then call set_input_files() which works on hidden inputs.
+            console.print(f"[cyan]Instagram:[/cyan] Uploading video...")
+            try:
+                file_input = await page.wait_for_selector(
+                    'input[type="file"]',
+                    state="attached",   # <-- was default "visible", which timed out
+                    timeout=15000,
+                )
+            except Exception:
+                # Fallback: query directly without waiting
+                file_input = await page.query_selector('input[type="file"]')
+            if not file_input:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="File input not found in DOM")
+            # Unhide the input via JS as a safety measure, then set the file
+            await page.evaluate(
+                """el => {
+                    el.style.display   = 'block';
+                    el.style.opacity   = '1';
+                    el.style.visibility = 'visible';
+                }""",
+                file_input,
+            )
+            await file_input.set_input_files(video_path)
+            await self._human_delay(3, 5)
+            # Instagram may show aspect ratio / crop screen — look for Reel tab
+            reel_tab = await page.query_selector('div:has-text("Reel"), button:has-text("Reel")')
+            if reel_tab:
+                await reel_tab.click()
+                await self._human_delay(1, 2)
+            # Click through editing steps (crop, filters, etc.)
+            for _ in range(3):
+                next_btn = await page.query_selector(
+                    'button:has-text("Next"), div[role="button"]:has-text("Next")'
+                )
+                if next_btn:
+                    await next_btn.click()
+                    await self._human_delay(2, 3)
+                    await self._dismiss_popups(page)
+                else:
+                    break
+            # Fill in the caption
+            console.print(f"[cyan]Instagram:[/cyan] Adding caption...")
+            caption_selectors = [
+                'textarea[aria-label*="Write a caption"]',
+                'textarea[placeholder*="Write a caption"]',
+                'div[contenteditable="true"][role="textbox"]',
+                'div[aria-label*="Write a caption"]',
+            ]
+            caption_filled = False
+            for selector in caption_selectors:
+                editor = await page.query_selector(selector)
+                if editor:
+                    await editor.click()
+                    await self._human_delay(0.5, 1)
+                    await page.keyboard.type(caption, delay=10)
+                    caption_filled = True
+                    break
+            if not caption_filled:
+                console.print("[yellow]Instagram: Could not find caption field[/yellow]")
+            await self._human_delay(2, 3)
+            # Click Share
+            console.print(f"[cyan]Instagram:[/cyan] Sharing...")
+            share_btn = await page.query_selector(
+                'button:has-text("Share"), div[role="button"]:has-text("Share")'
+            )
+            if share_btn:
+                await share_btn.click()
+            else:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Could not find Share button")
+            # Wait for upload to complete
+            console.print(f"[cyan]Instagram:[/cyan] Waiting for upload to complete...")
+            await page.wait_for_timeout(10000)
+            # Check for success
+            try:
+                await page.wait_for_selector(
+                    'div:has-text("Your reel has been shared"), '
+                    'div:has-text("Reel shared"), '
+                    'span:has-text("Your reel has been shared"), '
+                    'img[alt="Animated checkmark"]',
+                    timeout=60000,
+                )
+                console.print(f"[green]Instagram: Reel shared successfully![/green]")
+                return self._make_result(video_id, "success", caption)
+            except Exception:
+                if page.url == "https://www.instagram.com/" or "/create" not in page.url:
+                    console.print(f"[green]Instagram: Likely posted (redirected to feed)[/green]")
+                    return self._make_result(video_id, "success", caption)
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Share confirmation not detected")
+        except Exception as e:
+            try:
+                await self._screenshot_on_error(page, video_id)
+            except Exception:
+                pass
+            return self._make_result(video_id, "failed", caption, error=str(e))
+        finally:
+            await page.close()

social_distributor/poster/platforms/tiktok.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""TikTok video posting via Playwright (tiktok.com/upload)."""
+from __future__ import annotations
+from rich.console import Console
+from .base import BasePoster
+from ..models import PostResult
+console = Console()
+class TikTokPoster(BasePoster):
+    platform = "tiktok"
+    async def is_logged_in(self) -> bool:
+        page = await self.context.new_page()
+        try:
+            await page.goto("https://www.tiktok.com/upload", wait_until="domcontentloaded", timeout=15000)
+            await page.wait_for_timeout(3000)
+            # If redirected to login, we're not logged in
+            if "/login" in page.url:
+                return False
+            # Look for upload area
+            upload_area = await page.query_selector('input[type="file"]')
+            return upload_area is not None
+        except Exception:
+            return False
+        finally:
+            await page.close()
+    async def post(self, video_path: str, caption: str, **kwargs) -> PostResult:
+        video_id = kwargs.get("video_id", "unknown")
+        page = await self.context.new_page()
+        try:
+            console.print(f"[cyan]TikTok:[/cyan] Navigating to upload page...")
+            await page.goto("https://www.tiktok.com/upload", wait_until="domcontentloaded", timeout=20000)
+            await page.wait_for_timeout(3000)
+            if "/login" in page.url:
+                return self._make_result(video_id, "failed", caption, error="Not logged in — session expired")
+            # Upload video via file input
+            console.print(f"[cyan]TikTok:[/cyan] Uploading video...")
+            file_input = await page.wait_for_selector('input[type="file"]', timeout=10000)
+            await file_input.set_input_files(video_path)
+            # Wait for video to process (the upload indicator / thumbnail appears)
+            await self._human_delay(3, 5)
+            # Wait for video processing — look for the editor/preview to appear
+            # TikTok shows a video preview once upload is complete
+            try:
+                await page.wait_for_selector(
+                    'div[class*="editor"], div[class*="preview"], div[class*="video-card"]',
+                    timeout=60000,
+                )
+            except Exception:
+                console.print("[yellow]TikTok: Waiting for upload processing...[/yellow]")
+                await page.wait_for_timeout(10000)
+            await self._human_delay(2, 4)
+            # Fill in the caption
+            console.print(f"[cyan]TikTok:[/cyan] Adding caption...")
+            # TikTok uses a contenteditable div for the caption
+            # Try multiple selectors for the caption editor
+            caption_selectors = [
+                'div[contenteditable="true"]',
+                'div[data-placeholder*="caption"]',
+                'div[class*="caption"] div[contenteditable="true"]',
+                '.public-DraftEditor-content',
+            ]
+            caption_editor = None
+            for selector in caption_selectors:
+                caption_editor = await page.query_selector(selector)
+                if caption_editor:
+                    break
+            if caption_editor:
+                await caption_editor.click()
+                await self._human_delay(0.5, 1)
+                # Clear existing text and type new caption
+                await page.keyboard.press("Meta+a")
+                await page.keyboard.press("Backspace")
+                await self._human_delay(0.3, 0.5)
+                await page.keyboard.type(caption, delay=10)
+            else:
+                console.print("[yellow]TikTok: Could not find caption editor[/yellow]")
+            await self._human_delay(2, 3)
+            # Click Post button
+            console.print(f"[cyan]TikTok:[/cyan] Posting...")
+            post_button_selectors = [
+                'button:has-text("Post")',
+                'button[class*="post-button"]',
+                'div[class*="btn-post"] button',
+            ]
+            posted = False
+            for selector in post_button_selectors:
+                btn = await page.query_selector(selector)
+                if btn and await btn.is_enabled():
+                    await btn.click()
+                    posted = True
+                    break
+            if not posted:
+                # Fallback: try pressing the button by text
+                try:
+                    await page.get_by_role("button", name="Post").click()
+                    posted = True
+                except Exception:
+                    pass
+            if not posted:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Could not find Post button")
+            # Wait for upload to complete
+            console.print(f"[cyan]TikTok:[/cyan] Waiting for upload to complete...")
+            await page.wait_for_timeout(10000)
+            # Check for success indicators
+            success = False
+            try:
+                await page.wait_for_selector(
+                    'div:has-text("uploaded"), div:has-text("Your video"), div[class*="success"]',
+                    timeout=30000,
+                )
+                success = True
+            except Exception:
+                # If URL changed away from upload page, likely success
+                if "/upload" not in page.url:
+                    success = True
+            if success:
+                console.print(f"[green]TikTok: Posted successfully![/green]")
+                return self._make_result(video_id, "success", caption)
+            else:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Upload may not have completed")
+        except Exception as e:
+            try:
+                await self._screenshot_on_error(page, video_id)
+            except Exception:
+                pass
+            return self._make_result(video_id, "failed", caption, error=str(e))
+        finally:
+            await page.close()

social_distributor/poster/platforms/youtube.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""YouTube Shorts posting via Playwright (studio.youtube.com)."""
+from __future__ import annotations
+from rich.console import Console
+from .base import BasePoster
+from ..models import PostResult
+console = Console()
+class YouTubePoster(BasePoster):
+    platform = "youtube"
+    async def is_logged_in(self) -> bool:
+        page = await self.context.new_page()
+        try:
+            await page.goto("https://studio.youtube.com/", wait_until="domcontentloaded", timeout=15000)
+            await page.wait_for_timeout(3000)
+            if "accounts.google.com" in page.url:
+                return False
+            # Look for the Create button in YouTube Studio
+            create_btn = await page.query_selector('#create-icon, button[aria-label="Create"]')
+            return create_btn is not None
+        except Exception:
+            return False
+        finally:
+            await page.close()
+    async def post(self, video_path: str, caption: str, **kwargs) -> PostResult:
+        video_id = kwargs.get("video_id", "unknown")
+        title = kwargs.get("title", "AI Voice Dubbed")
+        page = await self.context.new_page()
+        try:
+            console.print(f"[cyan]YouTube:[/cyan] Navigating to YouTube Studio...")
+            await page.goto("https://studio.youtube.com/", wait_until="domcontentloaded", timeout=20000)
+            await page.wait_for_timeout(3000)
+            if "accounts.google.com" in page.url:
+                return self._make_result(video_id, "failed", caption, error="Not logged in — session expired")
+            # Click Create button
+            console.print(f"[cyan]YouTube:[/cyan] Opening upload dialog...")
+            create_btn = await page.wait_for_selector(
+                '#create-icon, button[aria-label="Create"]', timeout=10000
+            )
+            await create_btn.click()
+            await self._human_delay(1, 2)
+            # Click "Upload videos"
+            upload_option = await page.wait_for_selector(
+                'tp-yt-paper-item:has-text("Upload videos"), #text-item-0', timeout=5000
+            )
+            await upload_option.click()
+            await self._human_delay(1, 2)
+            # Upload video file
+            console.print(f"[cyan]YouTube:[/cyan] Uploading video...")
+            file_input = await page.wait_for_selector('input[type="file"]', timeout=10000)
+            await file_input.set_input_files(video_path)
+            # Wait for upload to start processing
+            await self._human_delay(3, 5)
+            # Wait for the details form to appear
+            try:
+                await page.wait_for_selector(
+                    '#textbox[aria-label*="title"], div[id="textbox"]',
+                    timeout=60000,
+                )
+            except Exception:
+                console.print("[yellow]YouTube: Waiting for upload form...[/yellow]")
+                await page.wait_for_timeout(10000)
+            await self._human_delay(1, 2)
+            # Fill in title
+            console.print(f"[cyan]YouTube:[/cyan] Setting title and description...")
+            title_input = await page.query_selector('#textbox[aria-label*="title"]')
+            if title_input:
+                await title_input.click()
+                await page.keyboard.press("Meta+a")
+                await page.keyboard.type(title[:100], delay=10)
+            await self._human_delay(1, 2)
+            # Fill in description
+            desc_input = await page.query_selector(
+                '#textbox[aria-label*="description"], '
+                'div[aria-label*="Tell viewers about your video"]'
+            )
+            if desc_input:
+                await desc_input.click()
+                await page.keyboard.type(caption, delay=5)
+            await self._human_delay(1, 2)
+            # Handle "Made for kids" — select "No, it's not made for kids"
+            not_for_kids = await page.query_selector(
+                'tp-yt-paper-radio-button[name="NOT_MADE_FOR_KIDS"], '
+                '#radioLabel:has-text("not made for kids")'
+            )
+            if not_for_kids:
+                await not_for_kids.click()
+                await self._human_delay(0.5, 1)
+            # Click Next through the wizard steps (Elements, Checks, Visibility)
+            for step_name in ["Elements", "Checks", "Visibility"]:
+                console.print(f"[cyan]YouTube:[/cyan] Step: {step_name}...")
+                next_btn = await page.query_selector('#next-button, button:has-text("Next")')
+                if next_btn:
+                    await next_btn.click()
+                    await self._human_delay(2, 3)
+            # Set visibility to Public
+            public_radio = await page.query_selector(
+                'tp-yt-paper-radio-button[name="PUBLIC"], '
+                '#radioLabel:has-text("Public")'
+            )
+            if public_radio:
+                await public_radio.click()
+                await self._human_delay(1, 2)
+            # Click Publish / Done
+            console.print(f"[cyan]YouTube:[/cyan] Publishing...")
+            publish_btn = await page.query_selector(
+                '#done-button, button:has-text("Publish"), button:has-text("Done")'
+            )
+            if publish_btn:
+                await publish_btn.click()
+            else:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Could not find Publish button")
+            # Wait for publish confirmation
+            await page.wait_for_timeout(10000)
+            # Check for success — dialog may show "Video published" or close
+            try:
+                await page.wait_for_selector(
+                    'div:has-text("Video published"), a[href*="youtu"]',
+                    timeout=30000,
+                )
+                # Try to extract the video URL
+                link_el = await page.query_selector('a[href*="youtu.be"], a[href*="youtube.com/watch"]')
+                video_url = None
+                if link_el:
+                    video_url = await link_el.get_attribute("href")
+                console.print(f"[green]YouTube: Published successfully![/green]")
+                return self._make_result(video_id, "success", caption, url=video_url)
+            except Exception:
+                await self._screenshot_on_error(page, video_id)
+                return self._make_result(video_id, "failed", caption, error="Publish confirmation not detected")
+        except Exception as e:
+            try:
+                await self._screenshot_on_error(page, video_id)
+            except Exception:
+                pass
+            return self._make_result(video_id, "failed", caption, error=str(e))
+        finally:
+            await page.close()

social_distributor/poster/post_log.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""JSON-based posting history for deduplication."""
+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from .config import POST_LOG_PATH
+from .models import PostResult
+def _load() -> dict:
+    if POST_LOG_PATH.exists():
+        with open(POST_LOG_PATH) as f:
+            return json.load(f)
+    return {}
+def _save(data: dict) -> None:
+    with open(POST_LOG_PATH, "w") as f:
+        json.dump(data, f, indent=2)
+def is_posted(video_id: str, platform: str) -> bool:
+    data = _load()
+    entry = data.get(video_id, {}).get(platform, {})
+    return entry.get("status") == "success"
+def record(result: PostResult) -> None:
+    data = _load()
+    if result.video_id not in data:
+        data[result.video_id] = {}
+    data[result.video_id][result.platform] = {
+        "status": result.status,
+        "timestamp": result.timestamp,
+        "caption": result.caption_used,
+        "error": result.error,
+        "url": result.url,
+    }
+    _save(data)
+def get_all() -> dict:
+    return _load()

social_distributor/poster/video_loader.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""Load and validate VideoVoice data folders into VideoData objects."""
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from rich.console import Console
+from .config import LANGUAGE_CODE_TO_NAME, VIDEOVOICE_DATA_DIR
+from .models import VideoData
+console = Console()
+def _detect_platform(video_link: str | None) -> str | None:
+    if not video_link:
+        return None
+    if re.search(r"/reels?/", video_link):
+        return "instagram"
+    if "tiktok.com" in video_link:
+        return "tiktok"
+    if "youtube.com" in video_link or "youtu.be" in video_link:
+        return "youtube"
+    return None
+def load_video(folder_name: str, lang_override: str | None = None) -> VideoData | None:
+    """Load a single video folder. Returns None if the folder is invalid."""
+    folder = VIDEOVOICE_DATA_DIR / folder_name
+    if not folder.is_dir():
+        console.print(f"[red]Folder not found:[/red] {folder}")
+        return None
+    output_mp4 = folder / "output.mp4"
+    if not output_mp4.exists():
+        console.print(f"[red]No output.mp4 in:[/red] {folder_name}")
+        return None
+    # Read transcription.json
+    transcription_path = folder / "transcription.json"
+    if not transcription_path.exists():
+        console.print(f"[red]No transcription.json in:[/red] {folder_name}")
+        return None
+    with open(transcription_path) as f:
+        transcription = json.load(f)
+    video_link = transcription.get("video_link")
+    source_language = transcription.get("source_language", "en")
+    original_text = " ".join(
+        seg.get("text", "") for seg in transcription.get("segments", [])
+    )
+    # Read segment_comparison.json
+    seg_comp_path = folder / "segment_comparison.json"
+    target_lang_code = "en"
+    translated_text = ""
+    if seg_comp_path.exists():
+        with open(seg_comp_path) as f:
+            segments = json.load(f)
+        if segments and isinstance(segments, list):
+            target_lang_code = segments[0].get("language_id", "en")
+            translated_text = " ".join(
+                seg.get("tts_text", "") or seg.get("translated_text", "")
+                for seg in segments
+            )
+    target_lang_name = lang_override or LANGUAGE_CODE_TO_NAME.get(
+        target_lang_code, target_lang_code
+    )
+    return VideoData(
+        video_id=folder_name,
+        output_path=str(output_mp4),
+        video_link=video_link,
+        source_language=source_language,
+        target_language_code=target_lang_code,
+        target_language_name=target_lang_name,
+        original_text=original_text,
+        translated_text=translated_text,
+        platform_type=_detect_platform(video_link),
+    )
+def load_videos(
+    folder_names: list[str], lang_override: str | None = None
+) -> list[VideoData]:
+    """Load multiple video folders, skipping invalid ones."""
+    videos = []
+    for name in folder_names:
+        video = load_video(name, lang_override)
+        if video:
+            videos.append(video)
+            console.print(
+                f"[green]Loaded:[/green] {name} "
+                f"({video.source_language} -> {video.target_language_name})"
+            )
+    return videos

social_distributor/pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[project]
+name = "videovoice-poster"
+version = "0.1.0"
+description = "Automated social media posting for VideoVoice dubbed videos"
+requires-python = ">=3.10"
+dependencies = [
+    "playwright>=1.40",
+    "openai>=1.0",
+    "boto3>=1.34",
+    "click>=8.0",
+    "rich>=13.0",
+    "python-dotenv>=1.0",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["poster"]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"

social_distributor/uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

steps/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Video Translation Pipeline — steps package

steps/lang/__init__.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Language-specific handlers for the translation pipeline.
+Each language that needs special handling gets its own module (e.g. urdu.py).
+This package provides a simple dispatcher so s3_translate.py stays language-agnostic.
+"""
+def _get_handler(target_language: str):
+    """Lazy-import language handler module if it exists."""
+    lang = target_language.lower()
+    if lang == "urdu":
+        from . import urdu
+        return urdu
+    return None
+def get_translation_prompt(target_language: str, default_prompt: str) -> str:
+    """Return a language-specific translation prompt, or the default."""
+    handler = _get_handler(target_language)
+    if handler and hasattr(handler, 'get_translation_prompt'):
+        return handler.get_translation_prompt()
+    return default_prompt
+def get_fallback_mode(target_language: str) -> str:
+    """Return 'bedrock' or 'google' depending on the language."""
+    handler = _get_handler(target_language)
+    if handler and hasattr(handler, 'get_fallback_mode'):
+        return handler.get_fallback_mode()
+    return "google"
+def post_translate(segments: list[dict], target_language: str) -> list[dict]:
+    """Run any language-specific post-processing after translation."""
+    handler = _get_handler(target_language)
+    if handler and hasattr(handler, 'post_translate'):
+        return handler.post_translate(segments)
+    return segments

steps/lang/_shared.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""Shared utilities for language-specific translation handlers."""
+import json
+import os
+import re
+from datetime import datetime, timezone
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
+MODEL = os.getenv("POLLEN_MODEL", "openai-large")
+def build_client() -> OpenAI:
+    """Build an OpenAI-compatible client pointing at Pollinations."""
+    api_key = (
+        os.getenv("POLLEN_API_KEY_SECONDARY")
+        or os.getenv("POLLEN_API_KEY")
+        or os.getenv("POLLINATIONS_API_KEY")
+        or "pollinations"
+    )
+    return OpenAI(base_url=POLLINATIONS_BASE, api_key=api_key)
+_LLM_LOG_PATH = "tmp/llm_calls.json"
+def log_llm_call(
+    step: str,
+    provider: str,
+    model: str,
+    system_prompt: str,
+    user_prompt: str,
+    response: str,
+    temperature: float,
+) -> None:
+    """Append an LLM call record to tmp/llm_calls.json."""
+    entry = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "step": step,
+        "provider": provider,
+        "model": model,
+        "temperature": temperature,
+        "system_prompt": system_prompt,
+        "user_prompt": user_prompt,
+        "response": response,
+    }
+    try:
+        with open(_LLM_LOG_PATH, "r", encoding="utf-8") as f:
+            calls = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError):
+        calls = []
+    calls.append(entry)
+    os.makedirs(os.path.dirname(_LLM_LOG_PATH) or ".", exist_ok=True)
+    with open(_LLM_LOG_PATH, "w", encoding="utf-8") as f:
+        json.dump(calls, f, indent=2, ensure_ascii=False)
+def parse_json_array(raw: str) -> list:
+    """Parse a JSON array from LLM output, with regex fallback for markdown fences etc."""
+    raw = raw.strip()
+    # Direct parse
+    try:
+        result = json.loads(raw)
+        if isinstance(result, dict):
+            return list(result.values())
+        if isinstance(result, list):
+            return [item[0] if isinstance(item, list) and len(item) > 0 else str(item) for item in result]
+        return result
+    except json.JSONDecodeError:
+        pass
+    # Fallback: extract [...] with regex
+    match = re.search(r'\[.*\]', raw, re.DOTALL)
+    if match:
+        result = json.loads(match.group())
+        if isinstance(result, list):
+            return [item[0] if isinstance(item, list) and len(item) > 0 else str(item) for item in result]
+        return result
+    # Fallback: extract {...} and convert dict values
+    match_dict = re.search(r'\{.*\}', raw, re.DOTALL)
+    if match_dict:
+        result = json.loads(match_dict.group())
+        if isinstance(result, dict):
+            return list(result.values())
+        return result
+    raise ValueError(f"Could not parse JSON array from LLM response:\n{raw[:200]}")
+def bedrock_converse(system_prompt: str, user_text: str, temperature: float = 0.1, step: str = "bedrock") -> str:
+    """Make a single Bedrock converse call and return the raw response text."""
+    import boto3
+    region = os.getenv("AWS_REGION", "us-east-1")
+    model_id = os.getenv("BEDROCK_MODEL", "qwen.qwen3-next-80b-a3b")
+    client = boto3.client("bedrock-runtime", region_name=region)
+    response = client.converse(
+        modelId=model_id,
+        messages=[{"role": "user", "content": [{"text": user_text}]}],
+        system=[{"text": system_prompt}],
+        inferenceConfig={"temperature": temperature},
+    )
+    result = response["output"]["message"]["content"][0]["text"].strip()
+    log_llm_call(
+        step=step, provider="bedrock", model=model_id,
+        system_prompt=system_prompt, user_prompt=user_text,
+        response=result, temperature=temperature,
+    )
+    return result
+def bedrock_fallback(segments: list[dict], numbered: str, system_prompt: str, max_retries: int = 2) -> list[dict]:
+    """Fallback translator using AWS Bedrock. Retries on count mismatch."""
+    expected = len(segments)
+    strict_prompt = (
+        system_prompt
+        + f"\n\nCRITICAL: You MUST return exactly {expected} items in the JSON array "
+        f"— one per input line. Do NOT merge, skip, or split any lines."
+    )
+    print(f"[lang] Bedrock fallback: translating {expected} segments")
+    for attempt in range(1, max_retries + 1):
+        raw = bedrock_converse(strict_prompt, numbered, step="s3_translate_bedrock")
+        translated_list = parse_json_array(raw)
+        if len(translated_list) == expected:
+            break
+        print(f"[lang] Bedrock returned {len(translated_list)}/{expected} items (attempt {attempt}/{max_retries})")
+        if attempt == max_retries:
+            raise ValueError(
+                f"Bedrock translation returned {len(translated_list)} items but expected {expected} after {max_retries} attempts"
+            )
+    cleaned = [re.sub(r'^\d+[\.\)\-]\s*', '', t) for t in translated_list]
+    result = [{**seg, "translated_text": t} for seg, t in zip(segments, cleaned)]
+    print("[lang] Bedrock fallback translation complete ✓")
+    return result

steps/lang/omnivoice_languages.py ADDED Viewed

	@@ -0,0 +1,652 @@

+# AUTO-GENERATED from k2-fsa/OmniVoice omnivoice/utils/lang_map.py
+# Source: https://github.com/k2-fsa/OmniVoice/blob/master/omnivoice/utils/lang_map.py
+"""Omnivoice-supported languages (display name -> Omnivoice language id)."""
+OMNIVOICE_LANGUAGE_CODES: dict[str, str] = {
+    "Abadi": "kbt",
+    "Abkhazian": "ab",
+    "Abron": "abr",
+    "Abua": "abn",
+    "Adamawa Fulfulde": "fub",
+    "Adyghe": "ady",
+    "Afade": "aal",
+    "Afrikaans": "af",
+    "Agwagwune": "yay",
+    "Aja (Benin)": "ajg",
+    "Akebu": "keu",
+    "Alago": "ala",
+    "Albanian": "sq",
+    "Algerian Arabic": "arq",
+    "Algerian Saharan Arabic": "aao",
+    "Ambo-Pasco Quechua": "qva",
+    "Ambonese Malay": "abs",
+    "Amdo Tibetan": "adx",
+    "Amharic": "am",
+    "Anaang": "anw",
+    "Angika": "anp",
+    "Antankarana Malagasy": "xmv",
+    "Aragonese": "an",
+    "Arbëreshë Albanian": "aae",
+    "Arequipa-La Unión Quechua": "qxu",
+    "Armenian": "hy",
+    "Ashe": "ahs",
+    "Ashéninka Perené": "prq",
+    "Askopan": "eiv",
+    "Assamese": "as",
+    "Asturian": "ast",
+    "Atayal": "tay",
+    "Awak": "awo",
+    "Ayacucho Quechua": "quy",
+    "Azerbaijani": "az",
+    "Baatonum": "bba",
+    "Bacama": "bcy",
+    "Bade": "bde",
+    "Bafia": "ksf",
+    "Bafut": "bfd",
+    "Bagirmi Fulfulde": "fui",
+    "Bago-Kusuntu": "bqg",
+    "Baharna Arabic": "abv",
+    "Bakoko": "bkh",
+    "Balanta-Ganja": "bjt",
+    "Balti": "bft",
+    "Bamenyam": "bce",
+    "Bamun": "bax",
+    "Bangwinji": "bsj",
+    "Banjar": "bjn",
+    "Bankon": "abb",
+    "Baoulé": "bci",
+    "Bara Malagasy": "bhr",
+    "Barok": "bjk",
+    "Basa (Cameroon)": "bas",
+    "Basa (Nigeria)": "bzw",
+    "Bashkir": "ba",
+    "Basque": "eu",
+    "Batak Mandailing": "btm",
+    "Batanga": "bnm",
+    "Bateri": "btv",
+    "Bats": "bbl",
+    "Bayot": "bda",
+    "Bebele": "beb",
+    "Belarusian": "be",
+    "Bengali": "bn",
+    "Betawi": "bew",
+    "Bhili": "bhb",
+    "Bhojpuri": "bho",
+    "Bilur": "bxf",
+    "Bima": "bhp",
+    "Bodo": "brx",
+    "Boghom": "bux",
+    "Bokyi": "bky",
+    "Bomu": "bmq",
+    "Bondei": "bou",
+    "Borgu Fulfulde": "fue",
+    "Bosnian": "bs",
+    "Brahui": "brh",
+    "Braj": "bra",
+    "Breton": "br",
+    "Buduma": "bdm",
+    "Buginese": "bug",
+    "Bukharic": "bhh",
+    "Bulgarian": "bg",
+    "Bulu (Cameroon)": "bum",
+    "Bundeli": "bns",
+    "Bunun": "bnn",
+    "Bura-Pabir": "bwr",
+    "Burak": "bys",
+    "Burmese": "my",
+    "Burushaski": "bsk",
+    "Cacaloxtepec Mixtec": "miu",
+    "Cajatambo North Lima Quechua": "qvl",
+    "Cakfem-Mushere": "cky",
+    "Cameroon Pidgin": "wes",
+    "Campidanese Sardinian": "sro",
+    "Cantonese": "yue",
+    "Catalan": "ca",
+    "Cebuano": "ceb",
+    "Cen": "cen",
+    "Central Kurdish": "ckb",
+    "Central Nahuatl": "nhn",
+    "Central Pame": "pbs",
+    "Central Pashto": "pst",
+    "Central Puebla Nahuatl": "ncx",
+    "Central Tarahumara": "tar",
+    "Central Yupik": "esu",
+    "Central-Eastern Niger Fulfulde": "fuq",
+    "Chadian Arabic": "shu",
+    "Chichewa": "ny",
+    "Chichicapan Zapotec": "zpv",
+    "Chiga": "cgg",
+    "Chimalapa Zoque": "zoh",
+    "Chimborazo Highland Quichua": "qug",
+    "Chinese": "zh",
+    "Chiquián Ancash Quechua": "qxa",
+    "Chitwania Tharu": "the",
+    "Chokwe": "cjk",
+    "Chuvash": "cv",
+    "Cibak": "ckl",
+    "Coastal Konjo": "kjc",
+    "Copainalá Zoque": "zoc",
+    "Cornish": "kw",
+    "Corongo Ancash Quechua": "qwa",
+    "Croatian": "hr",
+    "Cross River Mbembe": "mfn",
+    "Cuyamecalco Mixtec": "xtu",
+    "Czech": "cs",
+    "Dadiya": "dbd",
+    "Dagbani": "dag",
+    "Dameli": "dml",
+    "Danish": "da",
+    "Dargwa": "dar",
+    "Dazaga": "dzg",
+    "Deccan": "dcc",
+    "Degema": "deg",
+    "Dera (Nigeria)": "kna",
+    "Dghwede": "dgh",
+    "Dhatki": "mki",
+    "Dhivehi": "dv",
+    "Dhofari Arabic": "adf",
+    "Dijim-Bwilim": "cfa",
+    "Dogri": "dgo",
+    "Domaaki": "dmk",
+    "Dotyali": "dty",
+    "Duala": "dua",
+    "Dutch": "nl",
+    "DũYa": "ldb",
+    "Dyula": "dyu",
+    "Eastern Balochi": "bgp",
+    "Eastern Bolivian Guaraní": "gui",
+    "Eastern Egyptian Bedawi Arabic": "avl",
+    "Eastern Krahn": "kqo",
+    "Eastern Mari": "mhr",
+    "Eastern Yiddish": "ydd",
+    "Ebrié": "ebr",
+    "Eggon": "ego",
+    "Egyptian Arabic": "arz",
+    "Ejagham": "etu",
+    "Eleme": "elm",
+    "Eloyi": "afo",
+    "Embu": "ebu",
+    "English": "en",
+    "Erzya": "myv",
+    "Esan": "ish",
+    "Esperanto": "eo",
+    "Estonian": "et",
+    "Eton (Cameroon)": "eto",
+    "Ewondo": "ewo",
+    "Extremaduran": "ext",
+    "Fang (Equatorial Guinea)": "fan",
+    "Fanti": "fat",
+    "Farefare": "gur",
+    "Fe'fe'": "fmp",
+    "Filipino": "fil",
+    "Filomena Mata-Coahuitlán Totonac": "tlp",
+    "Finnish": "fi",
+    "Fipa": "fip",
+    "French": "fr",
+    "Fulah": "ff",
+    "Galician": "gl",
+    "Gambian Wolof": "wof",
+    "Ganda": "lg",
+    "Garhwali": "gbm",
+    "Gawar-Bati": "gwt",
+    "Gawri": "gwc",
+    "Gbagyi": "gbr",
+    "Gbari": "gby",
+    "Geji": "gyz",
+    "Gen": "gej",
+    "Georgian": "ka",
+    "German": "de",
+    "Geser-Gorom": "ges",
+    "Gheg Albanian": "aln",
+    "Ghomálá'": "bbj",
+    "Gidar": "gid",
+    "Glavda": "glw",
+    "Goan Konkani": "gom",
+    "Goaria": "gig",
+    "Goemai": "ank",
+    "Gola": "gol",
+    "Greek": "el",
+    "Guarani": "gn",
+    "Guduf-Gava": "gdf",
+    "Guerrero Amuzgo": "amu",
+    "Gujarati": "gu",
+    "Gujari": "gju",
+    "Gulf Arabic": "afb",
+    "Gurgula": "ggg",
+    "Gusii": "guz",
+    "Gusilay": "gsl",
+    "Gweno": "gwe",
+    "Güilá Zapotec": "ztu",
+    "Hadothi": "hoj",
+    "Hahon": "hah",
+    "Haitian": "ht",
+    "Hakha Chin": "cnh",
+    "Hakö": "hao",
+    "Halia": "hla",
+    "Hausa": "ha",
+    "Hawaiian": "haw",
+    "Hazaragi": "haz",
+    "Hebrew": "he",
+    "Hemba": "hem",
+    "Herero": "hz",
+    "Highland Konjo": "kjk",
+    "Hijazi Arabic": "acw",
+    "Hindi": "hi",
+    "Huarijio": "var",
+    "Huautla Mazatec": "mau",
+    "Huaxcaleca Nahuatl": "nhq",
+    "Huba": "hbb",
+    "Huitepec Mixtec": "mxs",
+    "Hula": "hul",
+    "Hungarian": "hu",
+    "Hunjara-Kaina Ke": "hkk",
+    "Hwana": "hwo",
+    "Ibibio": "ibb",
+    "Icelandic": "is",
+    "Idakho-Isukha-Tiriki": "ida",
+    "Idoma": "idu",
+    "Igbo": "ig",
+    "Igo": "ahl",
+    "Ikposo": "kpo",
+    "Ikwere": "ikw",
+    "Imbabura Highland Quichua": "qvi",
+    "Indonesian": "id",
+    "Indus Kohistani": "mvy",
+    "Interlingua (International Auxiliary Language Association)": "ia",
+    "Inupiaq": "ik",
+    "Irish": "ga",
+    "Iron Ossetic": "os",
+    "Isekiri": "its",
+    "Isoko": "iso",
+    "Italian": "it",
+    "Ito": "itw",
+    "Itzá": "itz",
+    "Ixtayutla Mixtec": "vmj",
+    "Izon": "ijc",
+    "Jambi Malay": "jax",
+    "Japanese": "ja",
+    "Jaqaru": "jqr",
+    "Jauja Wanca Quechua": "qxw",
+    "Jaunsari": "jns",
+    "Javanese": "jv",
+    "Jiba": "juo",
+    "Jju": "kaj",
+    "Judeo-Moroccan Arabic": "aju",
+    "Juxtlahuaca Mixtec": "vmc",
+    "Kabardian": "kbd",
+    "Kabras": "lkb",
+    "Kabuverdianu": "kea",
+    "Kabyle": "kab",
+    "Kachi Koli": "gjk",
+    "Kairak": "ckr",
+    "Kalabari": "ijn",
+    "Kalasha": "kls",
+    "Kalenjin": "kln",
+    "Kalkoti": "xka",
+    "Kamba": "kam",
+    "Kamo": "kcq",
+    "Kanauji": "bjj",
+    "Kanembu": "kbl",
+    "Kannada": "kn",
+    "Karekare": "kai",
+    "Kashmiri": "ks",
+    "Kathoriya Tharu": "tkt",
+    "Kati": "bsh",
+    "Kazakh": "kk",
+    "Keiyo": "eyo",
+    "Khams Tibetan": "khg",
+    "Khana": "ogo",
+    "Khetrani": "xhe",
+    "Khmer": "km",
+    "Khowar": "khw",
+    "Kinga": "zga",
+    "Kinnauri": "kfk",
+    "Kinyarwanda": "rw",
+    "Kirghiz": "ky",
+    "Kirya-Konzəl": "fkk",
+    "Kochila Tharu": "thq",
+    "Kohistani Shina": "plk",
+    "Kohumono": "bcs",
+    "Kok Borok": "trp",
+    "Kol (Papua New Guinea)": "kol",
+    "Kom (Cameroon)": "bkm",
+    "Koma": "kmy",
+    "Konkani": "knn",
+    "Konzo": "koo",
+    "Korean": "ko",
+    "Korwa": "kfp",
+    "Kota (India)": "kfe",
+    "Koti": "eko",
+    "Kuanua": "ksd",
+    "Kuanyama": "kj",
+    "Kui (India)": "uki",
+    "Kulung (Nigeria)": "bbu",
+    "Kuot": "kto",
+    "Kushi": "kuh",
+    "Kwambi": "kwm",
+    "Kwasio": "nmg",
+    "Lala-Roba": "lla",
+    "Lamang": "hia",
+    "Lao": "lo",
+    "Larike-Wakasihu": "alo",
+    "Lasi": "lss",
+    "Latgalian": "ltg",
+    "Latvian": "lv",
+    "Levantine Arabic": "apc",
+    "Liana-Seti": "ste",
+    "Liberia Kpelle": "xpe",
+    "Liberian English": "lir",
+    "Libyan Arabic": "ayl",
+    "Ligurian": "lij",
+    "Lijili": "mgi",
+    "Lingala": "ln",
+    "Lithuanian": "lt",
+    "Loarki": "lrk",
+    "Logooli": "rag",
+    "Logudorese Sardinian": "src",
+    "Loja Highland Quichua": "qvj",
+    "Loloda": "loa",
+    "Longuda": "lnu",
+    "Loxicha Zapotec": "ztp",
+    "Luba-Lulua": "lua",
+    "Luo": "luo",
+    "Lushai": "lus",
+    "Luxembourgish": "lb",
+    "Maasina Fulfulde": "ffm",
+    "Maba (Chad)": "mde",
+    "Macedo-Romanian": "rup",
+    "Macedonian": "mk",
+    "Mada (Cameroon)": "mxu",
+    "Mafa": "maf",
+    "Maithili": "mai",
+    "Malay": "ms",
+    "Malayalam": "ml",
+    "Mali": "gcc",
+    "Malinaltepec Me'phaa": "tcf",
+    "Maltese": "mt",
+    "Mandara": "tbf",
+    "Mandjak": "mfv",
+    "Manggarai": "mqy",
+    "Manipuri": "mni",
+    "Mansoanka": "msw",
+    "Manx": "gv",
+    "Maori": "mi",
+    "Marathi": "mr",
+    "Marghi Central": "mrt",
+    "Marghi South": "mfm",
+    "Maria (India)": "mrr",
+    "Marwari (Pakistan)": "mve",
+    "Masana": "mcn",
+    "Masikoro Malagasy": "msh",
+    "Matsés": "mcf",
+    "Mazaltepec Zapotec": "zpy",
+    "Mazatlán Mazatec": "vmz",
+    "Mazatlán Mixe": "mzl",
+    "Mbe": "mfo",
+    "Mbo (Cameroon)": "mbo",
+    "Mbum": "mdd",
+    "Medumba": "byv",
+    "Mekeo": "mek",
+    "Meru": "mer",
+    "Mesopotamian Arabic": "acm",
+    "Mewari": "mtr",
+    "Min Nan Chinese": "nan",
+    "Mingrelian": "xmf",
+    "Mitlatongo Mixtec": "vmm",
+    "Miya": "mkf",
+    "Mokpwe": "bri",
+    "Moksha": "mdf",
+    "Mom Jango": "ver",
+    "Mongolian": "mn",
+    "Moroccan Arabic": "ary",
+    "Motu": "meu",
+    "Mpiemo": "mcx",
+    "Mpumpong": "mgg",
+    "Mundang": "mua",
+    "Mungaka": "mhk",
+    "Musey": "mse",
+    "Musgu": "mug",
+    "Musi": "mui",
+    "Naba": "mne",
+    "Najdi Arabic": "ars",
+    "Nalik": "nal",
+    "Nawdm": "nmz",
+    "Ndonga": "ng",
+    "Neapolitan": "nap",
+    "Nepali": "npi",
+    "Ngamo": "nbh",
+    "Ngas": "anc",
+    "Ngiemboon": "nnh",
+    "Ngizim": "ngi",
+    "Ngomba": "jgo",
+    "Ngombale": "nla",
+    "Nigerian Fulfulde": "fuv",
+    "Nigerian Pidgin": "pcm",
+    "Nimadi": "noe",
+    "Nobiin": "fia",
+    "North Mesopotamian Arabic": "ayp",
+    "North Moluccan Malay": "max",
+    "Northern Betsimisaraka Malagasy": "bmm",
+    "Northern Hindko": "hno",
+    "Northern Kurdish": "kmr",
+    "Northern Pame": "pmq",
+    "Northern Pashto": "pbu",
+    "Northern Uzbek": "uzn",
+    "Northwest Gbaya": "gya",
+    "Norwegian": "no",
+    "Norwegian Bokmål": "nb",
+    "Norwegian Nynorsk": "nn",
+    "Notsi": "ncf",
+    "Nyankpa": "yes",
+    "Nyungwe": "nyu",
+    "Nzanyi": "nja",
+    "Nüpode Huitoto": "hux",
+    "Occitan": "oc",
+    "Od": "odk",
+    "Odia": "ory",
+    "Odual": "odu",
+    "Omani Arabic": "acx",
+    "Orizaba Nahuatl": "nlv",
+    "Orma": "orc",
+    "Ormuri": "oru",
+    "Oromo": "om",
+    "Pahari-Potwari": "phr",
+    "Paiwan": "pwn",
+    "Panjabi": "pa",
+    "Papuan Malay": "pmy",
+    "Parkari Koli": "kvx",
+    "Pedi": "nso",
+    "Pero": "pip",
+    "Persian": "fa",
+    "Petats": "pex",
+    "Phalura": "phl",
+    "Piemontese": "pms",
+    "Piya-Kwonci": "piy",
+    "Plateau Malagasy": "plt",
+    "Polish": "pl",
+    "Poqomam": "poc",
+    "Portuguese": "pt",
+    "Pulaar": "fuc",
+    "Pular": "fuf",
+    "Puno Quechua": "qxp",
+    "Pushto": "ps",
+    "Pökoot": "pko",
+    "Qaqet": "byx",
+    "Quiotepec Chinantec": "chq",
+    "Rana Tharu": "thr",
+    "Rangi": "lag",
+    "Rapoisi": "kyx",
+    "Ratahan": "rth",
+    "Rayón Zoque": "zor",
+    "Romanian": "ro",
+    "Romansh": "rm",
+    "Rombo": "rof",
+    "Rotokas": "roo",
+    "Rukai": "dru",
+    "Russian": "ru",
+    "Sacapulteco": "quv",
+    "Saidi Arabic": "aec",
+    "Sakalava Malagasy": "skg",
+    "Sakizaya": "szy",
+    "Saleman": "sau",
+    "Samba Daka": "ccg",
+    "Samba Leko": "ndi",
+    "San Felipe Otlaltepec Popoloca": "pow",
+    "San Francisco Del Mar Huave": "hue",
+    "San Juan Atzingo Popoloca": "poe",
+    "San Martín Itunyoso Triqui": "trq",
+    "San Miguel El Grande Mixtec": "mig",
+    "Sansi": "ssi",
+    "Sanskrit": "sa",
+    "Santa Ana de Tusi Pasco Quechua": "qxt",
+    "Santa Catarina Albarradas Zapotec": "ztn",
+    "Santali": "sat",
+    "Santiago del Estero Quichua": "qus",
+    "Saposa": "sps",
+    "Saraiki": "skr",
+    "Sardinian": "sc",
+    "Saya": "say",
+    "Sediq": "trv",
+    "Serbian": "sr",
+    "Seri": "sei",
+    "Shina": "scl",
+    "Shona": "sn",
+    "Siar-Lak": "sjr",
+    "Sibe": "nco",
+    "Sicilian": "scn",
+    "Sihuas Ancash Quechua": "qws",
+    "Sikkimese": "sip",
+    "Sinaugoro": "snc",
+    "Sindhi": "sd",
+    "Sindhi Bhil": "sbn",
+    "Sinhala": "si",
+    "Sinicahua Mixtec": "xti",
+    "Sipacapense": "qum",
+    "Siwai": "siw",
+    "Slovak": "sk",
+    "Slovenian": "sl",
+    "Solos": "sol",
+    "Somali": "so",
+    "Soninke": "snk",
+    "South Giziga": "giz",
+    "South Ucayali Ashéninka": "cpy",
+    "Southeastern Nochixtlán Mixtec": "mxy",
+    "Southern Betsimisaraka Malagasy": "bzc",
+    "Southern Pashto": "pbt",
+    "Southern Pastaza Quechua": "qup",
+    "Soyaltepec Mazatec": "vmp",
+    "Spanish": "es",
+    "Standard Arabic": "arb",
+    "Standard Moroccan Tamazight": "zgh",
+    "Sudanese Arabic": "apd",
+    "Sulka": "sua",
+    "Svan": "sva",
+    "Swahili": "sw",
+    "Swedish": "sv",
+    "Tae'": "rob",
+    "Tahaggart Tamahaq": "thv",
+    "Taita": "dav",
+    "Tajik": "tg",
+    "Tamil": "ta",
+    "Tandroy-Mahafaly Malagasy": "tdx",
+    "Tangale": "tan",
+    "Tanosy Malagasy": "txy",
+    "Tarok": "yer",
+    "Tatar": "tt",
+    "Tedaga": "tuq",
+    "Telugu": "te",
+    "Tem": "kdh",
+    "Teop": "tio",
+    "Tepeuxila Cuicatec": "cux",
+    "Tepinapa Chinantec": "cte",
+    "Tera": "ttr",
+    "Terei": "buo",
+    "Termanu": "twu",
+    "Tesaka Malagasy": "tkg",
+    "Tetelcingo Nahuatl": "nhg",
+    "Teutila Cuicatec": "cut",
+    "Thai": "th",
+    "Tibetan": "bo",
+    "Tidaá Mixtec": "mtx",
+    "Tidore": "tvo",
+    "Tigak": "tgc",
+    "Tigre": "tig",
+    "Tigrinya": "ti",
+    "Tilquiapan Zapotec": "zts",
+    "Tinputz": "tpz",
+    "Tlacoapa Me'phaa": "tpl",
+    "Tlacoatzintepec Chinantec": "ctl",
+    "Tlingit": "tli",
+    "Toki Pona": "tok",
+    "Tomoip": "tqp",
+    "Tondano": "tdn",
+    "Tonsea": "txs",
+    "Tooro": "ttj",
+    "Torau": "ttu",
+    "Torwali": "trw",
+    "Tsimihety Malagasy": "xmw",
+    "Tsotso": "lto",
+    "Tswana": "tn",
+    "Tugen": "tuy",
+    "Tuki": "bag",
+    "Tula": "tul",
+    "Tulu": "tcy",
+    "Tunen": "tvu",
+    "Tungag": "lcm",
+    "Tunisian Arabic": "aeb",
+    "Tupuri": "tui",
+    "Turkana": "tuv",
+    "Turkish": "tr",
+    "Turkmen": "tk",
+    "Tututepec Mixtec": "mtu",
+    "Twi": "tw",
+    "Ubaghara": "byc",
+    "Uighur": "ug",
+    "Ukrainian": "uk",
+    "Umbundu": "umb",
+    "Upper Sorbian": "hsb",
+    "Urdu": "ur",
+    "Ushojo": "ush",
+    "Uzbek": "uz",
+    "Vai": "vai",
+    "Vietnamese": "vi",
+    "Votic": "vot",
+    "Võro": "vro",
+    "Waci Gbe": "wci",
+    "Wadiyara Koli": "kxp",
+    "Waja": "wja",
+    "Wakhi": "wbl",
+    "Wanga": "lwg",
+    "Wapan": "juk",
+    "Warji": "wji",
+    "Welsh": "cy",
+    "Wemale": "weo",
+    "Western Frisian": "fy",
+    "Western Highland Purepecha": "pua",
+    "Western Juxtlahuaca Mixtec": "jmx",
+    "Western Maninkakan": "mlq",
+    "Western Mari": "mrj",
+    "Western Niger Fulfulde": "fuh",
+    "Western Panjabi": "pnb",
+    "Wolof": "wo",
+    "Wuzlam": "udl",
+    "Xanaguía Zapotec": "ztg",
+    "Xhosa": "xh",
+    "Yace": "ekr",
+    "Yakut": "sah",
+    "Yalahatan": "jal",
+    "Yanahuanca Pasco Quechua": "qur",
+    "Yangben": "yav",
+    "Yaqui": "yaq",
+    "Yauyos Quechua": "qux",
+    "Yekhee": "ets",
+    "Yiddish": "yi",
+    "Yidgha": "ydg",
+    "Yoruba": "yo",
+    "Yutanduchi Mixtec": "mab",
+    "Zacatlán-Ahuacatlán-Tepetzintla Nahuatl": "nhi",
+    "Zarma": "dje",
+    "Zaza": "zza",
+    "Zulu": "zu",
+    "Ömie": "aom",
+}

steps/lang/qwen3_languages.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Qwen3-TTS supported target languages.
+# Source: https://huggingface.co/spaces/Qwen/Qwen3-TTS (LANGUAGES constant in app.py)
+"""Qwen3-supported languages (display name -> ISO-639-1 code)."""
+QWEN3_LANGUAGE_CODES: dict[str, str] = {
+    "Chinese": "zh",
+    "English": "en",
+    "French": "fr",
+    "German": "de",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Portuguese": "pt",
+    "Russian": "ru",
+    "Spanish": "es",
+}

steps/lang/urdu.py ADDED Viewed

	@@ -0,0 +1,324 @@

+"""Urdu-specific translation handlers.
+Handles:
+- Urdu-specific translation prompt (Nastaliq script, spoken Urdu vocabulary)
+- Urdu → Devanagari transliteration for TTS (Chatterbox needs Devanagari)
+- Devanagari → Urdu script conversion for captions
+"""
+import json
+import re
+from ._shared import build_client, parse_json_array, bedrock_converse, MODEL, log_llm_call
+# ── Public dispatcher hooks ──────────────────────────────────────────────────
+def get_translation_prompt() -> str:
+    """Return the Urdu-specific system prompt for translation."""
+    return (
+        "You are a professional voice-over translator for commonly spoken Urdu. "
+        "Translate the following numbered lines into Urdu (Nastaliq/Arabic script).\n\n"
+        "LANGUAGE RULES:\n"
+        "- Use ONLY everyday spoken Urdu — the kind heard on Pakistani news, dramas, and streets.\n"
+        "- Use Urdu, Persian, and Arabic-origin vocabulary ONLY. "
+        "NEVER use Sanskrit-origin Hindi words (e.g. use محبت not پیار, زندگی not جیون, "
+        "وقت not سمے, لیکن not پرنتو, اگر not یدی).\n"
+        "- Keep it natural and conversational, not literary or formal.\n"
+        "- NEVER insert English words, interjections, or filler sounds (Oh, Ah, Hmm, Well, So). "
+        "Translate ALL such expressions into Urdu equivalents.\n\n"
+        "CRITICAL — DURATION CONSTRAINT:\n"
+        "Each line shows its spoken duration in brackets (e.g. [4.6s]). "
+        "The translation will be spoken by TTS and MUST fit within that duration.\n"
+        "STRICT RULE: Your translation MUST have FEWER words than the original English. "
+        "If the English has 10 words, aim for 7-8 Urdu words maximum.\n"
+        "Every word must earn its place — if removing a word doesn't lose core meaning, remove it. "
+        "Paraphrase aggressively. Use shorter synonyms. Merge clauses. "
+        "A concise translation that fits the time is ALWAYS better than a complete one that overflows.\n\n"
+        "TTS COMPATIBILITY — IMPORTANT:\n"
+        "The TTS model struggles with long sentences that have multiple commas or clauses. "
+        "Restructure into short, direct sentences — but the TOTAL text must still fit the duration shown in brackets. "
+        "Do NOT add extra words or content when restructuring. The goal is simpler phrasing, not more text.\n"
+        "Each output line is still ONE item in the array (one per input line). "
+        "You may use multiple short sentences within that single line, but it must all fit the original duration.\n\n"
+        "Write ONLY in Urdu script (Nastaliq/Arabic script). "
+        "Return ONLY a JSON array of translated strings, in order, no extra text. "
+        "Do NOT include the duration prefix or numbering in the output — only the translated text itself. "
+        'Example input: 1. [3.0s] Hello\n2. [2.5s] Goodbye '
+        'Example output: ["سلام", "خدا حافظ"]'
+    )
+def get_fallback_mode() -> str:
+    """Urdu uses Bedrock instead of Google Translate as fallback."""
+    return "bedrock"
+_ENGLISH_FILLERS = re.compile(
+    r'\b(Oh|Ah|Hmm|Well|So|Right|Okay|OK|Um|Uh|Hey|Wow|Ooh|Aah)[\.\!\,]?\s*',
+    re.IGNORECASE,
+)
+def post_translate(segments: list[dict]) -> list[dict]:
+    """Run Urdu-specific post-processing after translation.
+    - Strips leaked English fillers.
+    - Transliterates Urdu script → Devanagari for TTS (sets 'tts_text').
+    - Captions use translated_text directly (already Urdu/Nastaliq script).
+    """
+    for seg in segments:
+        text = seg.get("translated_text", "")
+        # Strip leaked English fillers
+        clean_text = _ENGLISH_FILLERS.sub("", text).strip()
+        seg["translated_text"] = clean_text
+    return transliterate_to_devanagari(segments)
+# ── Transliteration: Urdu → Devanagari (for TTS) ────────────────────────────
+_URDU_TO_DEVA = {
+    'آ': 'आ', 'ب': 'ब', 'پ': 'प', 'ت': 'त', 'ٹ': 'ट', 'ث': 'स',
+    'ج': 'ज', 'چ': 'च', 'ح': 'ह', 'خ': 'ख़', 'د': 'द', 'ڈ': 'ड',
+    'ذ': 'ज़', 'ر': 'र', 'ڑ': 'ड़', 'ز': 'ज़', 'ژ': 'झ', 'س': 'स',
+    'ش': 'श', 'ص': 'स', 'ض': 'ज़', 'ط': 'त', 'ظ': 'ज़', 'ع': 'अ',
+    'غ': 'ग़', 'ف': 'फ़', 'ق': 'क़', 'ک': 'क', 'ك': 'क', 'گ': 'ग',
+    'ل': 'ल', 'م': 'म', 'ن': 'न', 'ں': 'ं', 'و': 'व', 'ہ': 'ह',
+    'ه': 'ह', 'ھ': '्ह', 'ی': 'य', 'ي': 'य', 'ے': 'े', 'ئ': 'इ',
+    'َ': 'ा', 'ِ': 'ि', 'ُ': 'ु', 'ٰ': 'ा', 'ّ': '्', 'ً': 'न',
+    'ٔ': '', 'ء': '', 'ؓ': '', '۔': '।', '،': ',', '؟': '?', '؛': ';',
+}
+def _urdu_to_rough_devanagari(text: str) -> str:
+    """Deterministic character mapping from Urdu to Devanagari.
+    Consonants are mapped correctly, but short vowels are omitted/incorrect
+    because Urdu script doesn't explicitly mark them."""
+    result = []
+    for i, ch in enumerate(text):
+        if ch == 'ا':
+            # Word-initial alif is 'अ', otherwise 'ा'
+            result.append('अ' if i == 0 or text[i - 1] == ' ' else 'ा')
+        elif ch in _URDU_TO_DEVA:
+            result.append(_URDU_TO_DEVA[ch])
+        else:
+            result.append(ch)
+    # Fix a common edge case: ئ + ے  (e.g., in بروئے)
+    rough = ''.join(result)
+    rough = rough.replace('इे', 'ए')
+    return rough
+def _polish_devanagari_vowels(client, model, numbered, expected_count, max_attempts=2):
+    """Use an LLM to ONLY fix vowels in the rough Devanagari conversion, preserving exact vocabulary."""
+    prompt = (
+        "You are a Devanagari spelling corrector for Urdu text. Below are Urdu sentences with ROUGH "
+        "character-by-character Devanagari conversions. Consonants are correct but vowels are wrong/missing.\n\n"
+        "YOUR ONLY JOB: Fix vowels to make readable Urdu in Devanagari.\n\n"
+        "STRICT RULES:\n"
+        "- Do NOT change, replace, or translate ANY word. Keep every single Urdu word exactly.\n"
+        "- Only add or fix vowel matras (ा ि ी ु ू े ै ो ौ ं ँ)\n"
+        "- Add nuqta dots where needed: क़ ख़ ग़ ज़ फ़\n"
+        "- Add halant (्) for conjuncts where needed\n\n"
+        "EXAMPLES:\n"
+        "Urdu: محبت | rough: महबत | fixed: मोहब्बत\n"
+        "Urdu: استعمال | rough: असतअमाल | fixed: इस्तेमाल\n"
+        "Urdu: حکمت | rough: हकमत | fixed: हिकमत\n"
+        "Urdu: طاقت | rough: ताक़त | fixed: ताक़त\n"
+        "Urdu: ہمدردی | rough: हमदरदय | fixed: हमदर्दी\n"
+        "Urdu: پیروی | rough: पयरवय | fixed: पैरवी\n"
+        "Urdu: کریم | rough: करयम | fixed: करीम\n\n"
+        "Return ONLY a JSON array of corrected Devanagari strings, in order, one per input."
+    )
+    for attempt in range(1, max_attempts + 1):
+        try:
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": prompt},
+                    {"role": "user", "content": numbered},
+                ],
+                temperature=0.1,
+            )
+            raw = response.choices[0].message.content.strip()
+            log_llm_call(
+                step="urdu_vowel_polish", provider="pollinations", model=model,
+                system_prompt=prompt, user_prompt=numbered,
+                response=raw, temperature=0.1,
+            )
+            try:
+                polished_list = parse_json_array(raw)
+            except (json.JSONDecodeError, ValueError):
+                print(f"[urdu] Attempt {attempt}: Could not parse response as JSON")
+                continue
+            if len(polished_list) != expected_count:
+                print(f"[urdu] Attempt {attempt}: Got {len(polished_list)} items, expected {expected_count}")
+                continue
+            # Quick check if it's returning Arabic/Urdu script instead
+            sample = " ".join(polished_list[:3])
+            bad_chars = sum(1 for ch in sample if '\u0600' <= ch <= '\u06FF')
+            if bad_chars > 0:
+                print(f"[urdu] Attempt {attempt}: Output still contains Urdu script — retrying")
+                prompt = "CRITICAL: OUTPUT MUST BE DEVANAGARI ONLY. NO ARABIC/URDU SCRIPT.\n\n" + prompt
+                continue
+            return polished_list
+        except Exception as e:
+            print(f"[urdu] LLM error on attempt {attempt}: {e}")
+    return None
+def transliterate_to_devanagari(segments: list[dict]) -> list[dict]:
+    """Convert Urdu script translations to Devanagari for TTS.
+    Adds 'tts_text' field to each segment.
+    Uses a hybrid approach: Deterministic char mapping + LLM vowel polishing."""
+    if not segments:
+        return segments
+    print("[urdu] Starting Hybrid Urdu → Devanagari conversion...")
+    # Step 1: Deterministic mapping to rough Devanagari
+    rough_texts = []
+    for seg in segments:
+        urdu_text = seg.get("translated_text", "")
+        rough_deva = _urdu_to_rough_devanagari(urdu_text)
+        rough_texts.append(rough_deva)
+    expected = len(segments)
+    numbered = "\n".join(
+        f"{i + 1}. Urdu: {seg.get('translated_text', '')}\n   Rough: {rough_texts[i]}"
+        for i, seg in enumerate(segments)
+    )
+    # Try Pollinations
+    client = build_client()
+    polished_list = _polish_devanagari_vowels(client, MODEL, numbered, expected)
+    if polished_list:
+        for seg, deva_text in zip(segments, polished_list):
+            seg["tts_text"] = deva_text
+        print("[urdu] Urdu → Devanagari hybrid transliteration complete ✓")
+        return segments
+    print("[urdu] Pollinations Polish failed ��� trying Bedrock fallback...")
+    # Bedrock Fallback
+    try:
+        system_prompt = (
+            "You are a Devanagari spelling corrector for Urdu text. Below are Urdu sentences with ROUGH "
+            "character-by-character Devanagari conversions. Consonants are correct but vowels are wrong/missing.\n\n"
+            "YOUR ONLY JOB: Fix vowels to make readable Urdu in Devanagari. Do NOT change/replace/translate ANY word.\n\n"
+            "EXAMPLES:\nمحبت | rough: महबत | fixed: मोहब्बत\nاستعمال | rough: असतअमाल | fixed: इस्तेमाल\n"
+            "حکمت | rough: हकमत | fixed: हिकमत\nहमदरदی | rough: हमदरदय | fixed: हमदर्दी\n\n"
+            "Return ONLY a JSON array of corrected Devanagari strings."
+        )
+        for attempt in range(1, 3):
+            raw = bedrock_converse(system_prompt, numbered, step="urdu_vowel_polish_bedrock")
+            try:
+                polished_list = parse_json_array(raw)
+            except (json.JSONDecodeError, ValueError):
+                print(f"[urdu] Bedrock attempt {attempt}: Could not parse response")
+                continue
+            if len(polished_list) != expected:
+                print(f"[urdu] Bedrock attempt {attempt}: Got {len(polished_list)} items, expected {expected}")
+                continue
+            sample = " ".join(polished_list[:3])
+            bad_chars = sum(1 for ch in sample if '\u0600' <= ch <= '\u06FF')
+            if bad_chars > 0:
+                print(f"[urdu] Bedrock attempt {attempt}: Output contains Urdu script — retrying")
+                system_prompt = "CRITICAL: OUTPUT MUST BE DEVANAGARI ONLY. NO ARABIC/URDU SCRIPT.\n\n" + system_prompt
+                continue
+            for seg, deva_text in zip(segments, polished_list):
+                seg["tts_text"] = deva_text
+            print("[urdu] Urdu → Devanagari transliteration (Bedrock) complete ✓")
+            return segments
+    except Exception as e:
+        print(f"[urdu] WARNING: Bedrock fallback failed ({e})")
+    print("[urdu] WARNING: All polishing failed. Falling back to rough Devanagari.")
+    for seg, r_text in zip(segments, rough_texts):
+        seg["tts_text"] = r_text
+    return segments
+# ── Transliteration: Devanagari → Urdu script (for captions) ────────────────
+def transliterate_to_urdu_script(segments: list[dict]) -> list[dict]:
+    """Convert Devanagari Urdu translations to Urdu (Nastaliq/Arabic) script for subtitles.
+    Adds 'caption_text' field to each segment."""
+    if not segments:
+        return segments
+    texts = [seg.get("translated_text", "") for seg in segments]
+    numbered = "\n".join(f"{i + 1}. {t}" for i, t in enumerate(texts))
+    system_prompt = (
+        "You are a script converter. Convert the following Devanagari Urdu text into Urdu script (Nastaliq/Arabic script). "
+        "This is NOT translation — the language is already Urdu, just written in Devanagari. "
+        "Convert it to proper Urdu script preserving every word exactly.\n\n"
+        "Return ONLY a JSON array of converted strings, in order, no extra text. "
+        "Do NOT include numbering in the output."
+    )
+    client = build_client()
+    try:
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": numbered},
+            ],
+            temperature=0.1,
+        )
+        raw = response.choices[0].message.content.strip()
+        log_llm_call(
+            step="urdu_script_convert", provider="pollinations", model=MODEL,
+            system_prompt=system_prompt, user_prompt=numbered,
+            response=raw, temperature=0.1,
+        )
+        urdu_list = parse_json_array(raw)
+        if len(urdu_list) != len(segments):
+            print(f"[urdu] WARNING: Urdu script returned {len(urdu_list)} items, expected {len(segments)}. Using Devanagari for captions")
+            return segments
+        for seg, urdu_text in zip(segments, urdu_list):
+            seg["caption_text"] = urdu_text
+        print("[urdu] Urdu script transliteration complete ✓")
+        return segments
+    except Exception as e:
+        print(f"[urdu] Pollinations transliteration failed ({e}) — trying Bedrock...")
+        try:
+            raw = bedrock_converse(system_prompt, numbered, step="urdu_script_convert_bedrock")
+            urdu_list = parse_json_array(raw)
+            if len(urdu_list) != len(segments):
+                print(f"[urdu] WARNING: Bedrock Urdu script returned {len(urdu_list)} items, expected {len(segments)}. Using Devanagari for captions")
+                return segments
+            for seg, urdu_text in zip(segments, urdu_list):
+                seg["caption_text"] = urdu_text
+            print("[urdu] Urdu script transliteration (Bedrock) complete ✓")
+            return segments
+        except Exception as e2:
+            print(f"[urdu] WARNING: Bedrock transliteration also failed ({e2}), using Devanagari for captions")
+            return segments

steps/s1_extract_audio.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""
+Step 1-2: Extract audio track from input video.
+Outputs a 16 kHz mono WAV suitable for Whisper + Chatterbox.
+"""
+import subprocess
+from pathlib import Path
+def extract_audio(video_path: str, output_path: str = "tmp/audio/source/extracted_audio.wav") -> str:
+    """
+    Extract audio from video using ffmpeg.
+    Args:
+        video_path: Path to the input video file.
+        output_path: Where to save the extracted audio (WAV).
+    Returns:
+        Absolute path to the extracted audio file.
+    """
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", video_path,
+        "-vn",                  # no video
+        "-acodec", "pcm_s16le", # PCM 16-bit
+        "-ar", "16000",         # 16 kHz (Whisper standard)
+        "-ac", "1",             # mono
+        output_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg audio extraction failed:\n{result.stderr}")
+    print(f"[s1] Audio extracted → {output_path}")
+    return output_path
+def extract_audio_hq(video_path: str, output_path: str = "tmp/audio/source/extracted_audio_hq.wav") -> str:
+    """
+    Extract high-quality 44.1 kHz stereo audio for source separation (Demucs).
+    Args:
+        video_path: Path to the input video file.
+        output_path: Where to save the HQ audio (WAV).
+    Returns:
+        Absolute path to the extracted HQ audio file.
+    """
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", video_path,
+        "-vn",
+        "-acodec", "pcm_s16le",
+        "-ar", "44100",         # 44.1 kHz for Demucs
+        "-ac", "2",             # stereo
+        output_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg HQ audio extraction failed:\n{result.stderr}")
+    print(f"[s1] HQ audio extracted → {output_path}")
+    return output_path

steps/s1b_separate.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""
+Step 1b: Separate vocals from accompaniment using Demucs (Python API).
+In-process inference so ZeroGPU can intercept the GPU allocation via
+`@spaces.GPU`. Works on CUDA, MPS, and CPU without code changes.
+Only runs when preserve_music=True.
+"""
+import shutil
+import subprocess
+from pathlib import Path
+import torch
+import torchaudio
+import spaces
+_MODEL = None
+def _select_device() -> str:
+    if torch.cuda.is_available():
+        return "cuda"
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+def _get_model():
+    """Lazy-load htdemucs once per process. Module-level semantics; we load
+    on first call so the import itself stays cheap on non-GPU envs."""
+    global _MODEL
+    if _MODEL is None:
+        from demucs.pretrained import get_model
+        print("[s1b] Loading htdemucs on cpu...")
+        model = get_model("htdemucs")
+        model.eval()
+        model.to("cpu")
+        _MODEL = model
+    return _MODEL
+@spaces.GPU(duration=120)
+def _apply_demucs(mix: torch.Tensor, device: str) -> torch.Tensor:
+    """GPU-bound inference call. `mix` shape: [1, channels, time]."""
+    from demucs.apply import apply_model
+    model = _get_model()
+    if next(model.parameters()).device.type != device:
+        print(f"[s1b] Moving htdemucs to {device} inside GPU scope...")
+        model = model.to(device)
+    with torch.no_grad():
+        # apply_model returns [batch, sources, channels, time]
+        sources = apply_model(
+            model,
+            mix.to(device),
+            shifts=1,
+            split=True,
+            overlap=0.25,
+            device=device,
+        )
+    return sources.cpu()
+def _load_and_normalise(audio_hq_path: str, target_sr: int, target_ch: int) -> tuple[torch.Tensor, float, float]:
+    """Load WAV, resample/remix to match model requirements, z-normalise."""
+    wav, sr = torchaudio.load(audio_hq_path)
+    if sr != target_sr:
+        wav = torchaudio.functional.resample(wav, sr, target_sr)
+    if wav.shape[0] == 1 and target_ch == 2:
+        wav = wav.repeat(2, 1)
+    elif wav.shape[0] > target_ch:
+        wav = wav[:target_ch]
+    mean = wav.mean()
+    std = wav.std().clamp_min(1e-8)
+    wav_norm = (wav - mean) / std
+    return wav_norm.unsqueeze(0), mean.item(), std.item()
+def separate_audio(
+    audio_hq_path: str,
+    output_dir: str = "tmp",
+) -> tuple[str, str]:
+    """
+    Separate vocals from accompaniment using Demucs htdemucs (Python API).
+    Args:
+        audio_hq_path: Path to input audio (any sample rate / channels).
+        output_dir: Directory to write output stems.
+    Returns:
+        (vocals_16k_path, accompaniment_path)
+    """
+    out = Path(output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+    model = _get_model()
+    device = _select_device()
+    target_sr = model.samplerate
+    target_ch = model.audio_channels
+    source_names = list(model.sources)
+    print(f"[s1b] Running Demucs htdemucs on {device} (Python API)...")
+    mix, mean, std = _load_and_normalise(audio_hq_path, target_sr, target_ch)
+    sources = _apply_demucs(mix, device)
+    sources = sources * std + mean
+    sources = sources[0]  # drop batch dim → [num_sources, channels, time]
+    try:
+        vocals_idx = source_names.index("vocals")
+    except ValueError as e:
+        raise RuntimeError(f"htdemucs is missing 'vocals' source: {source_names}") from e
+    vocals = sources[vocals_idx]
+    no_vocals = sum(
+        sources[i] for i in range(sources.shape[0]) if i != vocals_idx
+    )
+    vocals_path = str(out / "vocals.wav")
+    accompaniment_path = str(out / "accompaniment.wav")
+    vocals_16k_path = str(out / "vocals_16k.wav")
+    torchaudio.save(vocals_path, vocals, target_sr)
+    torchaudio.save(accompaniment_path, no_vocals, target_sr)
+    print(f"[s1b] Vocals saved → {vocals_path}")
+    print(f"[s1b] Accompaniment saved → {accompaniment_path}")
+    # Resample vocals to 16 kHz mono for Whisper/TTS via ffmpeg
+    # (torchaudio resample works but ffmpeg is more predictable for downstream)
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", vocals_path,
+        "-ar", "16000",
+        "-ac", "1",
+        vocals_16k_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg vocals resample failed:\n{result.stderr}")
+    print(f"[s1b] Vocals (16 kHz) saved → {vocals_16k_path}")
+    # Leftover cleanup for any previously-shelled-out demucs runs
+    old_demucs_dir = out / "demucs"
+    if old_demucs_dir.exists():
+        shutil.rmtree(str(old_demucs_dir), ignore_errors=True)
+    return vocals_16k_path, accompaniment_path

steps/s2_transcribe.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""
+Step 3: Transcribe audio with timestamps.
+Primary local backend (device-dependent):
+  - Apple MPS: mlx-whisper
+  - CUDA: faster-whisper
+  - CPU: faster-whisper
+Outermost fallback:
+  - Pollinations Whisper API (verbose_json)
+"""
+import os
+import requests
+import torch
+from dotenv import load_dotenv
+import spaces
+load_dotenv()
+POLLINATIONS_URL = "https://gen.pollinations.ai/v1/audio/transcriptions"
+POLLEN_TRANSCRIBE_MODEL = os.getenv("POLLEN_TRANSCRIBE_MODEL", "whisper-large-v3")
+MLX_MODEL = os.getenv("MLX_WHISPER_MODEL", "mlx-community/whisper-large-mlx")
+FASTER_WHISPER_MODEL = os.getenv("FASTER_WHISPER_MODEL", "large-v3")
+OPENAI_WHISPER_MODEL = os.getenv("OPENAI_WHISPER_MODEL", "large-v3")
+LOCAL_WHISPER_BACKEND_ENV = "VIDEOVOICE_WHISPER_BACKEND"
+_VALID_LOCAL_BACKENDS = {
+    "mlx-whisper",
+    "openai-whisper-cuda",
+    "faster-whisper-cpu",
+}
+_FASTER_WHISPER_MODELS = {}
+_OPENAI_WHISPER_MODEL = None
+def _running_on_hf_space() -> bool:
+    return bool(
+        os.getenv("SPACE_ID")
+        or os.getenv("SPACE_HOST")
+        or os.getenv("HF_SPACE_ID")
+    )
+def _get_local_whisper_backend() -> str:
+    """
+    Resolve the local transcription backend lazily.
+    On HF Spaces, default to CPU faster-whisper unless explicitly overridden.
+    ZeroGPU can report CUDA availability outside an active @spaces.GPU call,
+    which makes import-time backend selection unreliable.
+    """
+    override = os.getenv(LOCAL_WHISPER_BACKEND_ENV, "").strip().lower()
+    if override:
+        if override not in _VALID_LOCAL_BACKENDS:
+            raise ValueError(
+                f"Invalid {LOCAL_WHISPER_BACKEND_ENV}={override!r}. "
+                f"Expected one of: {', '.join(sorted(_VALID_LOCAL_BACKENDS))}."
+            )
+        return override
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mlx-whisper"
+    if _running_on_hf_space():
+        return "faster-whisper-cpu"
+    if torch.cuda.is_available():
+        # PyTorch-based path so @spaces.GPU can intercept the CUDA allocation.
+        # faster-whisper uses CTranslate2 which bypasses PyTorch and breaks ZeroGPU.
+        return "openai-whisper-cuda"
+    return "faster-whisper-cpu"
+def _extract_words(raw_words: list[dict]) -> list[dict]:
+    """Normalise word timestamps into {word, start, end}."""
+    output = []
+    for raw in raw_words or []:
+        start = raw.get("start")
+        end = raw.get("end")
+        if start is None or end is None:
+            continue
+        output.append(
+            {
+                "word": str(raw.get("word", "")).strip(),
+                "start": float(start),
+                "end": float(end),
+            }
+        )
+    return output
+def _normalise_segments(segments: list[dict]) -> list[dict]:
+    """Return canonical segment schema with word-level timestamps."""
+    output = []
+    for seg in segments:
+        start = seg.get("start")
+        end = seg.get("end")
+        if start is None or end is None:
+            continue
+        words = _extract_words(seg.get("words", []))
+        output.append(
+            {
+                "start": float(start),
+                "end": float(end),
+                "text": str(seg.get("text", "")).strip(),
+                "words": words,
+            }
+        )
+    return output
+# Max duration (seconds) before a segment is considered oversized and needs splitting.
+_MAX_SEGMENT_DURATION = 15.0
+# Preferred pause gap (seconds) to use as a split point.
+_PAUSE_THRESHOLD = 0.4
+def _split_oversized_segments(segments: list[dict]) -> list[dict]:
+    """Split segments longer than _MAX_SEGMENT_DURATION using word timings."""
+    output = []
+    for seg in segments:
+        duration = seg["end"] - seg["start"]
+        words = seg.get("words", [])
+        real_words = [w for w in words if w["word"]]
+        if duration <= _MAX_SEGMENT_DURATION or len(real_words) < 2:
+            output.append(seg)
+            continue
+        chunks = []
+        chunk_start_idx = 0
+        chunk_start_time = real_words[0]["start"]
+        for i in range(len(real_words) - 1):
+            elapsed = real_words[i]["end"] - chunk_start_time
+            gap = real_words[i + 1]["start"] - real_words[i]["end"]
+            should_split = (
+                (elapsed >= _MAX_SEGMENT_DURATION and gap >= 0.15)
+                or (elapsed >= _MAX_SEGMENT_DURATION * 0.5 and gap >= _PAUSE_THRESHOLD)
+            )
+            if should_split:
+                chunks.append(real_words[chunk_start_idx : i + 1])
+                chunk_start_idx = i + 1
+                chunk_start_time = real_words[i + 1]["start"]
+        if chunk_start_idx < len(real_words):
+            chunks.append(real_words[chunk_start_idx:])
+        for chunk_words in chunks:
+            output.append(
+                {
+                    "start": chunk_words[0]["start"],
+                    "end": chunk_words[-1]["end"],
+                    "text": " ".join(w["word"] for w in chunk_words).strip(),
+                    "words": chunk_words,
+                }
+            )
+    return output
+def _assign_words_to_segments(segments: list[dict], words: list[dict]) -> None:
+    """Distribute top-level word list into segments by timestamp overlap."""
+    normalised = _extract_words(words)
+    for seg in segments:
+        seg["words"] = [
+            w for w in normalised if w["start"] >= seg["start"] and w["end"] <= seg["end"]
+        ]
+def _segments_from_pollinations(audio_path: str, language: str) -> list[dict]:
+    """Call Pollinations Whisper API and return canonical segments."""
+    api_key = (
+        os.getenv("POLLEN_API_KEY_SECONDARY")
+        or os.getenv("POLLEN_API_KEY")
+        or os.getenv("POLLINATIONS_API_KEY", "")
+    )
+    headers = {"Authorization": f"Bearer {api_key}"}
+    with open(audio_path, "rb") as audio_file:
+        files = {"file": (os.path.basename(audio_path), audio_file, "audio/wav")}
+        # When the caller passes "auto" (or empty), omit the `language` field so
+        # Whisper auto-detects. Forcing a wrong language code makes Whisper
+        # silently switch to translate-mode (e.g. Hindi audio + language="en"
+        # produces an English translation, not a Hindi transcript).
+        data = {
+            "model": POLLEN_TRANSCRIBE_MODEL,
+            "response_format": "verbose_json",
+            "temperature": 0,
+            "timestamp_granularities[]": "word",
+        }
+        if language and language.lower() not in ("auto", ""):
+            data["language"] = language
+        response = requests.post(
+            POLLINATIONS_URL,
+            headers=headers,
+            files=files,
+            data=data,
+            timeout=120,
+        )
+    response.raise_for_status()
+    result = response.json()
+    segments = _normalise_segments(result.get("segments", []))
+    if not any(seg.get("words") for seg in segments) and "words" in result:
+        _assign_words_to_segments(segments, result["words"])
+    return _normalise_segments(segments)
+def _segments_from_mlx(audio_path: str, language: str) -> list[dict]:
+    """Run mlx-whisper locally."""
+    print("[s2] Using mlx-whisper backend...")
+    try:
+        import mlx_whisper
+    except ImportError:
+        raise ImportError("mlx-whisper is not installed. Run: uv add mlx-whisper")
+    result = mlx_whisper.transcribe(
+        audio_path,
+        path_or_hf_repo=MLX_MODEL,
+        language=language if language != "auto" else None,
+        word_timestamps=True,
+    )
+    return _normalise_segments(result.get("segments", []))
+def _get_faster_whisper_model(device: str, compute_type: str):
+    """Load/cached faster-whisper model."""
+    from faster_whisper import WhisperModel
+    key = (device, compute_type)
+    if key not in _FASTER_WHISPER_MODELS:
+        _FASTER_WHISPER_MODELS[key] = WhisperModel(
+            FASTER_WHISPER_MODEL,
+            device=device,
+            compute_type=compute_type,
+        )
+    return _FASTER_WHISPER_MODELS[key]
+def _segments_from_faster_whisper_impl(
+    audio_path: str,
+    language: str,
+    device: str,
+    compute_type: str,
+) -> list[dict]:
+    model = _get_faster_whisper_model(device=device, compute_type=compute_type)
+    segments, _ = model.transcribe(
+        audio_path,
+        language=None if language == "auto" else language,
+        word_timestamps=True,
+    )
+    output = []
+    for seg in segments:
+        words = []
+        for word in seg.words or []:
+            if word.start is None or word.end is None:
+                continue
+            words.append(
+                {
+                    "word": str(word.word or "").strip(),
+                    "start": float(word.start),
+                    "end": float(word.end),
+                }
+            )
+        output.append(
+            {
+                "start": float(seg.start),
+                "end": float(seg.end),
+                "text": str(seg.text or "").strip(),
+                "words": words,
+            }
+        )
+    return output
+def _segments_from_faster_whisper_cpu(
+    audio_path: str,
+    language: str,
+) -> list[dict]:
+    """CPU-only faster-whisper (no GPU decorator — runs outside ZeroGPU budget)."""
+    return _segments_from_faster_whisper_impl(audio_path, language, "cpu", "int8")
+def _get_openai_whisper_model():
+    """Load openai-whisper once per process. CUDA if available."""
+    global _OPENAI_WHISPER_MODEL
+    if _OPENAI_WHISPER_MODEL is None:
+        try:
+            import whisper as openai_whisper
+        except ImportError as exc:
+            raise ImportError("openai-whisper is not installed") from exc
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"[s2] Loading openai-whisper ({OPENAI_WHISPER_MODEL}) on {device}...")
+        _OPENAI_WHISPER_MODEL = openai_whisper.load_model(OPENAI_WHISPER_MODEL, device=device)
+    return _OPENAI_WHISPER_MODEL
+@spaces.GPU(duration=60)
+def _segments_from_openai_whisper(
+    audio_path: str,
+    language: str,
+) -> list[dict]:
+    """GPU-decorated openai-whisper execution (PyTorch-native, ZeroGPU-compatible)."""
+    model = _get_openai_whisper_model()
+    result = model.transcribe(
+        audio_path,
+        language=None if language == "auto" else language,
+        word_timestamps=True,
+        verbose=False,
+    )
+    return _normalise_segments(result.get("segments", []))
+def _segments_from_local_backend(audio_path: str, language: str) -> list[dict]:
+    """Dispatch local whisper backend from runtime device detection."""
+    backend = _get_local_whisper_backend()
+    if backend == "mlx-whisper":
+        return _segments_from_mlx(audio_path, language)
+    if backend == "openai-whisper-cuda":
+        print("[s2] Using openai-whisper backend (cuda)...")
+        try:
+            return _segments_from_openai_whisper(audio_path, language)
+        except ImportError:
+            print("[s2] openai-whisper unavailable; falling back to faster-whisper (cpu).")
+            return _segments_from_faster_whisper_cpu(audio_path, language)
+    print("[s2] Using faster-whisper backend (cpu)...")
+    return _segments_from_faster_whisper_cpu(audio_path, language)
+def transcribe(audio_path: str, language: str = "en") -> list[dict]:
+    """
+    Transcribe audio and return canonical segment schema.
+    Priority:
+    1. Pollinations API (fast, offloads computation)
+    2. Local backend (GPU/MPS if available, otherwise CPU)
+    """
+    print(f"[s2] Transcribing {audio_path} (lang={language})...")
+    segments = None
+    pollinations_error = None
+    local_error = None
+    # 1. Try Pollinations API first
+    try:
+        print("[s2] Trying Pollinations API...")
+        segments = _segments_from_pollinations(audio_path, language)
+        if segments:
+            print(f"[s2] Pollinations returned {len(segments)} segments ✓")
+        else:
+            segments = None
+    except Exception as exc:
+        print(f"[s2] Pollinations error ({exc}) — falling back to local backend.")
+        pollinations_error = exc
+        segments = None
+    # 2. Try Local Backend (GPU or CPU)
+    if segments is None:
+        try:
+            backend = _get_local_whisper_backend()
+            print(f"[s2] Trying local backend ({backend})...")
+            segments = _segments_from_local_backend(audio_path, language)
+            if segments:
+                print(f"[s2] Local backend returned {len(segments)} segments ✓")
+        except Exception as exc:
+            print(f"[s2] Local backend error ({exc}).")
+            local_error = exc
+            segments = None
+    if segments is None:
+        details = []
+        if pollinations_error is not None:
+            details.append(f"Pollinations: {pollinations_error}")
+        if local_error is not None:
+            details.append(f"Local backend: {local_error}")
+        suffix = f" Details: {' | '.join(details)}" if details else ""
+        raise RuntimeError(f"Transcription failed on all available backends.{suffix}")
+    before = len(segments)
+    segments = _split_oversized_segments(segments)
+    if len(segments) != before:
+        print(f"[s2] Split {before} oversized segment(s) → {len(segments)} segments")
+    return _normalise_segments(segments)

steps/s3_translate.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""
+Step 4: Translate segment texts using Pollinations chat completions API
+(OpenAI-compatible endpoint, no extra API key needed beyond POLLEN_API_KEY).
+"""
+import re
+from .lang._shared import build_client, bedrock_fallback, parse_json_array, MODEL, log_llm_call
+from .lang import get_translation_prompt, get_fallback_mode, post_translate
+def _translate_batch(segments: list[dict], target_language: str) -> list[dict]:
+    """Translate a batch of segments into target_language."""
+    if not segments:
+        return segments
+    # Build single-shot batch: include duration so the LLM can match spoken length
+    numbered = "\n".join(
+        f"{i+1}. [{s['end'] - s['start']:.1f}s] {s['text']}"
+        for i, s in enumerate(segments)
+    )
+    # Default prompt (generic, works for most languages)
+    default_prompt = (
+    f"You are a voice-over dubbing writer — not a translator. "
+    f"Your job is to write what a native {target_language} speaker would *actually say out loud* "
+    f"in a casual, natural conversation. Forget the source words. Capture the meaning, tone, and energy.\n\n"
+    f"INPUT FORMAT:\n"
+    f"Numbered lines with a spoken duration in brackets, e.g.: 1. [4.6s] Hello there\n\n"
+    f"OUTPUT FORMAT:\n"
+    f"A JSON array of {target_language} strings — one per input line, in order. "
+    f"No numbering, no brackets, no extra text.\n"
+    f'Shape: ["<first line translated into {target_language}>", "<second line translated into {target_language}>"]\n\n'
+    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
+    f"SCORING RUBRIC — evaluate every line against these before outputting:\n"
+    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+    f"[1] NATURALNESS — weight: HIGH\n"
+    f"  Would a native speaker actually say this in real life?\n"
+    f"  ✗ Fail: dictionary phrasing, formal register, textbook grammar\n"
+    f"  ✓ Pass: contractions, colloquial rhythm, everyday vocabulary\n"
+    f"  Ask yourself: 'Would I hear this in a TV show or on the street?' If no → rewrite.\n\n"
+    f"[2] SPOKEN FIT — weight: CRITICAL\n"
+    f"  The line will be read by TTS within the duration shown in brackets.\n"
+    f"  Fewer words is almost always safer. Aim for 70–80% of the original word count.\n"
+    f"  ✗ Fail: translation is longer or same length as the English\n"
+    f"  ✓ Pass: shorter, with no loss of core meaning or emotional tone\n"
+    f"  Trick: cut filler, merge ideas, use contractions and short-form spoken words.\n\n"
+    f"[3] TTS READABILITY — weight: HIGH\n"
+    f"  Long sentences with multiple commas trip up TTS engines.\n"
+    f"  ✗ Fail: 'She met him, her true love, on a rainy evening, in the city she once fled.'\n"
+    f"  ✓ Pass: 'She met him on a rainy evening. Her true love. In the city she once fled.'\n"
+    f"  Short beats. Natural pauses. Each sentence punches clean.\n\n"
+    f"[4] EMOTIONAL REGISTER — weight: HIGH\n"
+    f"  Match the tone of the original: casual, urgent, tender, funny, sarcastic — whatever it is.\n"
+    f"  ✗ Fail: a sarcastic line becomes polite; a tender moment becomes clinical\n"
+    f"  ✓ Pass: the emotional texture is preserved even if the words are completely different\n\n"
+    f"[5] TRANSLATION PURITY — weight: MEDIUM\n"
+    f"  Every word in the output must be {target_language}. No words from the original "
+    f"language should leak through.\n"
+    f"  This includes: filler words (Oh, Hmm, Well, So, Right when not native to "
+    f"{target_language}), names used as exclamations, brand-style interjections. "
+    f"Find the {target_language} equivalent every time.\n\n"
+    f"[6] WORD-FOR-WORD TRAP — weight: HIGH (avoid this)\n"
+    f"  Do NOT translate word by word. No one speaks that way.\n"
+    f"  ✗ Fail: a literal one-to-one rendering that preserves the source word order\n"
+    f"  ✓ Pass: a restructured line that reads naturally in {target_language} "
+    f"while keeping the same meaning\n"
+    f"  Restructure freely. {target_language} has its own natural word order — use it.\n\n"
+    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
+    f"BEFORE RETURNING OUTPUT:\n"
+    f"For each line, silently run this checklist:\n"
+    f"  □ Would a native speaker say this naturally out loud?\n"
+    f"  □ Is it shorter than the English original?\n"
+    f"  □ Are there any commas that create awkward TTS pauses? → break into short sentences\n"
+    f"  □ Does the emotional tone match?\n"
+    f"  □ Are there any English words hiding in the output?\n"
+    f"If any box fails → rewrite that line. Then output.\n"
+    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+    f"Return ONLY the JSON array. No preamble, no explanation, no duration prefixes."
+    )
+    # Let language-specific handler override the prompt if needed
+    system_prompt = get_translation_prompt(target_language, default_prompt)
+    expected = len(segments)
+    strict_prompt = (
+        system_prompt
+        + f"\n\nCRITICAL: You MUST return exactly {expected} items in the JSON array "
+        f"— one per input line. Do NOT merge, skip, or split any lines."
+    )
+    client = build_client()
+    max_retries = 2
+    try:
+        for attempt in range(1, max_retries + 1):
+            response = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                    {"role": "system", "content": strict_prompt},
+                    {"role": "user", "content": numbered},
+                ],
+                temperature=0.2,
+            )
+            raw = response.choices[0].message.content.strip()
+            log_llm_call(
+                step="s3_translate", provider="pollinations", model=MODEL,
+                system_prompt=strict_prompt, user_prompt=numbered,
+                response=raw, temperature=0.2,
+            )
+            translated_list = parse_json_array(raw)
+            if len(translated_list) == expected:
+                break
+            print(f"[s3] Pollinations returned {len(translated_list)}/{expected} items (attempt {attempt}/{max_retries})")
+            if attempt == max_retries:
+                raise ValueError(
+                    f"Translation returned {len(translated_list)} items but expected {expected} after {max_retries} attempts"
+                )
+        cleaned = [re.sub(r'^\d+[\.\)\-]\s*', '', t) for t in translated_list]
+        result = []
+        for seg, translated_text in zip(segments, cleaned):
+            result.append({**seg, "translated_text": translated_text})
+        print(f"[s3] Translating via Pollinations complete ✓")
+        return result
+    except Exception as e:
+        print(f"[s3] Pollinations translation error ({e}) — using fallback.")
+        # Language-specific fallback routing
+        if get_fallback_mode(target_language) == "bedrock":
+            return bedrock_fallback(segments, numbered, system_prompt)
+        # Default: Google Translate
+        from deep_translator import GoogleTranslator
+        try:
+            translator = GoogleTranslator(source='auto', target=target_language.lower())
+        except Exception as e2:
+            print(f"[s3] Fallback failed to init translator ({e2})")
+            raise
+        result = []
+        for seg in segments:
+            translated_text = translator.translate(seg["text"])
+            result.append({**seg, "translated_text": translated_text})
+        print(f"[s3] Translation via fallback complete ✓")
+        return result
+def translate(segments: list[dict], target_language: str) -> list[dict]:
+    """
+    Translate the text of each segment into target_language in batches.
+    Args:
+        segments: List of {start, end, text} dicts.
+        target_language: Full language name, e.g. "Spanish", "French", "Hindi".
+    Returns:
+        Same list with 'translated_text' added to each segment.
+        Language-specific fields (e.g. 'tts_text') may also be added.
+    """
+    if not segments:
+        return segments
+    print(f"[s3] Translating {len(segments)} segments → {target_language} (in batches)...")
+    BATCH_SIZE = 15
+    final_result = []
+    for i in range(0, len(segments), BATCH_SIZE):
+        batch = segments[i:i + BATCH_SIZE]
+        if len(segments) > BATCH_SIZE:
+            print(f"[s3] Processing batch {i//BATCH_SIZE + 1} ({len(batch)} items)...")
+        batch_result = _translate_batch(batch, target_language)
+        final_result.extend(batch_result)
+    # Run language-specific post-processing (e.g., Urdu transliteration)
+    final_result = post_translate(final_result, target_language)
+    return final_result