JakgritB commited on
Commit ·
dbc3c35
1
Parent(s): 12a024d
feat(backend): add modular video processing API
Browse filesAdd FastAPI job endpoints, ROCm-aware configuration, Whisper and Qwen service boundaries, ffmpeg clip rendering, subtitle generation, timing logs, and file-backed MVP job storage.
- backend/Dockerfile +17 -0
- backend/app/__init__.py +1 -0
- backend/app/core/__init__.py +1 -0
- backend/app/core/config.py +68 -0
- backend/app/core/timing.py +20 -0
- backend/app/main.py +122 -0
- backend/app/models/__init__.py +1 -0
- backend/app/models/schemas.py +92 -0
- backend/app/services/__init__.py +1 -0
- backend/app/services/clips.py +112 -0
- backend/app/services/highlight.py +139 -0
- backend/app/services/multimodal.py +18 -0
- backend/app/services/pipeline.py +157 -0
- backend/app/services/subtitles.py +44 -0
- backend/app/services/transcription.py +95 -0
- backend/app/services/video_input.py +80 -0
- backend/app/storage.py +58 -0
- backend/app/utils/__init__.py +1 -0
- backend/app/utils/rocm.py +33 -0
- backend/app/workers/__init__.py +1 -0
- backend/app/workers/celery_app.py +15 -0
- backend/pyproject.toml +42 -0
backend/Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ROCM_PYTORCH_IMAGE=rocm/pytorch:latest
|
| 2 |
+
FROM ${ROCM_PYTORCH_IMAGE}
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
RUN apt-get update \
|
| 7 |
+
&& apt-get install -y --no-install-recommends ffmpeg git curl \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
COPY pyproject.toml ./
|
| 11 |
+
ARG INSTALL_EXTRAS=.
|
| 12 |
+
RUN pip install --upgrade pip && pip install -e "${INSTALL_EXTRAS}"
|
| 13 |
+
|
| 14 |
+
COPY app ./app
|
| 15 |
+
|
| 16 |
+
EXPOSE 8000
|
| 17 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
backend/app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""AI Clip Studio backend."""
|
backend/app/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Core configuration and instrumentation."""
|
backend/app/core/config.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from pydantic import Field
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Settings(BaseModel):
|
| 10 |
+
app_name: str = "ElevenClip.AI"
|
| 11 |
+
demo_mode: bool = True
|
| 12 |
+
storage_dir: Path = Path("data")
|
| 13 |
+
frontend_origin: str = "http://localhost:5173"
|
| 14 |
+
|
| 15 |
+
whisper_model_id: str = "openai/whisper-large-v3"
|
| 16 |
+
qwen_text_model_id: str = "Qwen/Qwen2.5-7B-Instruct"
|
| 17 |
+
qwen_vl_model_id: str = "Qwen/Qwen2-VL-7B-Instruct"
|
| 18 |
+
hf_token: str | None = None
|
| 19 |
+
preferred_torch_dtype: str = "bfloat16"
|
| 20 |
+
|
| 21 |
+
target_clip_count: int = Field(default=5, ge=1, le=20)
|
| 22 |
+
max_clips: int = Field(default=10, ge=1, le=50)
|
| 23 |
+
|
| 24 |
+
ffmpeg_binary: str = "ffmpeg"
|
| 25 |
+
ffprobe_binary: str = "ffprobe"
|
| 26 |
+
ffmpeg_video_codec: str = "h264_amf"
|
| 27 |
+
ffmpeg_cpu_codec: str = "libx264"
|
| 28 |
+
|
| 29 |
+
redis_url: str = "redis://redis:6379/0"
|
| 30 |
+
celery_enabled: bool = False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@lru_cache
|
| 34 |
+
def get_settings() -> Settings:
|
| 35 |
+
settings = Settings(
|
| 36 |
+
demo_mode=_bool_env("DEMO_MODE", True),
|
| 37 |
+
storage_dir=Path(os.getenv("STORAGE_DIR", "data")),
|
| 38 |
+
frontend_origin=os.getenv("FRONTEND_ORIGIN", "http://localhost:5173"),
|
| 39 |
+
whisper_model_id=os.getenv("WHISPER_MODEL_ID", "openai/whisper-large-v3"),
|
| 40 |
+
qwen_text_model_id=os.getenv("QWEN_TEXT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct"),
|
| 41 |
+
qwen_vl_model_id=os.getenv("QWEN_VL_MODEL_ID", "Qwen/Qwen2-VL-7B-Instruct"),
|
| 42 |
+
hf_token=os.getenv("HF_TOKEN") or None,
|
| 43 |
+
preferred_torch_dtype=os.getenv("TORCH_DTYPE", "bfloat16"),
|
| 44 |
+
target_clip_count=_int_env("TARGET_CLIP_COUNT", 5),
|
| 45 |
+
max_clips=_int_env("MAX_CLIPS", 10),
|
| 46 |
+
ffmpeg_binary=os.getenv("FFMPEG_BINARY", "ffmpeg"),
|
| 47 |
+
ffprobe_binary=os.getenv("FFPROBE_BINARY", "ffprobe"),
|
| 48 |
+
ffmpeg_video_codec=os.getenv("FFMPEG_VIDEO_CODEC", "h264_amf"),
|
| 49 |
+
ffmpeg_cpu_codec=os.getenv("FFMPEG_CPU_CODEC", "libx264"),
|
| 50 |
+
redis_url=os.getenv("REDIS_URL", "redis://redis:6379/0"),
|
| 51 |
+
celery_enabled=_bool_env("CELERY_ENABLED", False),
|
| 52 |
+
)
|
| 53 |
+
settings.storage_dir.mkdir(parents=True, exist_ok=True)
|
| 54 |
+
return settings
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _bool_env(name: str, default: bool) -> bool:
|
| 58 |
+
value = os.getenv(name)
|
| 59 |
+
if value is None:
|
| 60 |
+
return default
|
| 61 |
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _int_env(name: str, default: int) -> int:
|
| 65 |
+
value = os.getenv(name)
|
| 66 |
+
if value is None:
|
| 67 |
+
return default
|
| 68 |
+
return int(value)
|
backend/app/core/timing.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import Iterator
|
| 2 |
+
from contextlib import contextmanager
|
| 3 |
+
from time import perf_counter
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TimingLog:
|
| 7 |
+
def __init__(self) -> None:
|
| 8 |
+
self._steps: dict[str, float] = {}
|
| 9 |
+
|
| 10 |
+
@contextmanager
|
| 11 |
+
def measure(self, name: str) -> Iterator[None]:
|
| 12 |
+
started = perf_counter()
|
| 13 |
+
try:
|
| 14 |
+
yield
|
| 15 |
+
finally:
|
| 16 |
+
self._steps[name] = round(perf_counter() - started, 3)
|
| 17 |
+
|
| 18 |
+
def to_dict(self) -> dict[str, float]:
|
| 19 |
+
total = round(sum(self._steps.values()), 3)
|
| 20 |
+
return {**self._steps, "total": total}
|
backend/app/main.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import FileResponse
|
| 4 |
+
from fastapi.staticfiles import StaticFiles
|
| 5 |
+
|
| 6 |
+
from app.core.config import get_settings
|
| 7 |
+
from app.models.schemas import (
|
| 8 |
+
ChannelProfile,
|
| 9 |
+
ClipCandidate,
|
| 10 |
+
ClipPatch,
|
| 11 |
+
HealthResponse,
|
| 12 |
+
JobSnapshot,
|
| 13 |
+
RegenerateClipRequest,
|
| 14 |
+
YoutubeJobRequest,
|
| 15 |
+
)
|
| 16 |
+
from app.services.pipeline import VideoPipeline
|
| 17 |
+
from app.services.video_input import save_upload
|
| 18 |
+
from app.storage import JobStore
|
| 19 |
+
from app.utils.rocm import detect_accelerator
|
| 20 |
+
|
| 21 |
+
settings = get_settings()
|
| 22 |
+
store = JobStore(settings)
|
| 23 |
+
pipeline = VideoPipeline(settings, store)
|
| 24 |
+
|
| 25 |
+
app = FastAPI(title=settings.app_name, version="0.1.0")
|
| 26 |
+
app.add_middleware(
|
| 27 |
+
CORSMiddleware,
|
| 28 |
+
allow_origins=[settings.frontend_origin, "http://localhost:5173", "http://127.0.0.1:5173"],
|
| 29 |
+
allow_credentials=True,
|
| 30 |
+
allow_methods=["*"],
|
| 31 |
+
allow_headers=["*"],
|
| 32 |
+
)
|
| 33 |
+
app.mount("/media", StaticFiles(directory=settings.storage_dir), name="media")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@app.get("/health", response_model=HealthResponse)
|
| 37 |
+
async def health() -> HealthResponse:
|
| 38 |
+
return HealthResponse(
|
| 39 |
+
ok=True,
|
| 40 |
+
app=settings.app_name,
|
| 41 |
+
demo_mode=settings.demo_mode,
|
| 42 |
+
accelerator=detect_accelerator(),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@app.post("/api/jobs/youtube", response_model=JobSnapshot)
|
| 47 |
+
async def create_youtube_job(
|
| 48 |
+
request: YoutubeJobRequest, background_tasks: BackgroundTasks
|
| 49 |
+
) -> JobSnapshot:
|
| 50 |
+
snapshot = store.create_job(
|
| 51 |
+
request.profile, {"kind": "youtube", "url": str(request.youtube_url)}
|
| 52 |
+
)
|
| 53 |
+
background_tasks.add_task(
|
| 54 |
+
pipeline.process_source, snapshot.id, "youtube", str(request.youtube_url), request.profile
|
| 55 |
+
)
|
| 56 |
+
return snapshot
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.post("/api/jobs/upload", response_model=JobSnapshot)
|
| 60 |
+
async def create_upload_job(
|
| 61 |
+
background_tasks: BackgroundTasks,
|
| 62 |
+
profile_json: str = Form(...),
|
| 63 |
+
file: UploadFile = File(...),
|
| 64 |
+
) -> JobSnapshot:
|
| 65 |
+
try:
|
| 66 |
+
profile = ChannelProfile.model_validate_json(profile_json)
|
| 67 |
+
except Exception as exc:
|
| 68 |
+
raise HTTPException(status_code=422, detail=f"Invalid profile JSON: {exc}") from exc
|
| 69 |
+
|
| 70 |
+
snapshot = store.create_job(profile, {"kind": "upload", "filename": file.filename})
|
| 71 |
+
source_path = await save_upload(file, store.job_dir(snapshot.id))
|
| 72 |
+
background_tasks.add_task(pipeline.process_source, snapshot.id, "upload", str(source_path), profile)
|
| 73 |
+
return snapshot
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
@app.get("/api/jobs/{job_id}", response_model=JobSnapshot)
|
| 77 |
+
async def get_job(job_id: str) -> JobSnapshot:
|
| 78 |
+
try:
|
| 79 |
+
return store.get_job(job_id)
|
| 80 |
+
except FileNotFoundError as exc:
|
| 81 |
+
raise HTTPException(status_code=404, detail="Job not found") from exc
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@app.patch("/api/jobs/{job_id}/clips/{clip_id}", response_model=ClipCandidate)
|
| 85 |
+
async def update_clip(job_id: str, clip_id: str, patch: ClipPatch) -> ClipCandidate:
|
| 86 |
+
try:
|
| 87 |
+
return pipeline.patch_clip(job_id, clip_id, patch.model_dump())
|
| 88 |
+
except FileNotFoundError as exc:
|
| 89 |
+
raise HTTPException(status_code=404, detail="Job not found") from exc
|
| 90 |
+
except KeyError as exc:
|
| 91 |
+
raise HTTPException(status_code=404, detail="Clip not found") from exc
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@app.post("/api/jobs/{job_id}/clips/{clip_id}/regenerate", response_model=ClipCandidate)
|
| 95 |
+
async def regenerate_clip(
|
| 96 |
+
job_id: str, clip_id: str, request: RegenerateClipRequest
|
| 97 |
+
) -> ClipCandidate:
|
| 98 |
+
try:
|
| 99 |
+
return pipeline.regenerate_clip(
|
| 100 |
+
job_id,
|
| 101 |
+
clip_id,
|
| 102 |
+
clip_style=request.clip_style,
|
| 103 |
+
clip_length_seconds=request.clip_length_seconds,
|
| 104 |
+
subtitle_text=request.subtitle_text,
|
| 105 |
+
)
|
| 106 |
+
except FileNotFoundError as exc:
|
| 107 |
+
raise HTTPException(status_code=404, detail="Source video not found") from exc
|
| 108 |
+
except KeyError as exc:
|
| 109 |
+
raise HTTPException(status_code=404, detail="Clip not found") from exc
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
@app.get("/api/jobs/{job_id}/clips/{clip_id}/download")
|
| 113 |
+
async def download_clip(job_id: str, clip_id: str) -> FileResponse:
|
| 114 |
+
snapshot = store.get_job(job_id)
|
| 115 |
+
clip = next((item for item in snapshot.clips if item.id == clip_id), None)
|
| 116 |
+
if clip is None or clip.download_url is None:
|
| 117 |
+
raise HTTPException(status_code=404, detail="Clip not found")
|
| 118 |
+
filename = clip.download_url.rsplit("/", 1)[-1]
|
| 119 |
+
path = store.job_dir(job_id) / filename
|
| 120 |
+
if not path.exists():
|
| 121 |
+
raise HTTPException(status_code=404, detail="Clip file not found")
|
| 122 |
+
return FileResponse(path, media_type="video/mp4", filename=filename)
|
backend/app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic models."""
|
backend/app/models/schemas.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timezone
|
| 2 |
+
from enum import Enum
|
| 3 |
+
from typing import Any, Literal
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field, HttpUrl, field_validator
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def utc_now() -> datetime:
|
| 9 |
+
return datetime.now(timezone.utc)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TargetPlatform(str, Enum):
|
| 13 |
+
tiktok = "tiktok"
|
| 14 |
+
youtube_shorts = "youtube_shorts"
|
| 15 |
+
instagram_reels = "instagram_reels"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ChannelProfile(BaseModel):
|
| 19 |
+
niche: str = Field(default="education", min_length=2, max_length=80)
|
| 20 |
+
clip_style: str = Field(default="informative", min_length=2, max_length=80)
|
| 21 |
+
clip_length_seconds: int = Field(default=60, ge=15, le=180)
|
| 22 |
+
primary_language: str = Field(default="Thai", min_length=2, max_length=40)
|
| 23 |
+
target_platform: TargetPlatform = TargetPlatform.tiktok
|
| 24 |
+
|
| 25 |
+
@field_validator("niche", "clip_style", "primary_language")
|
| 26 |
+
@classmethod
|
| 27 |
+
def clean_text(cls, value: str) -> str:
|
| 28 |
+
return value.strip()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class YoutubeJobRequest(BaseModel):
|
| 32 |
+
youtube_url: HttpUrl
|
| 33 |
+
profile: ChannelProfile
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class TranscriptSegment(BaseModel):
|
| 37 |
+
id: str
|
| 38 |
+
start_seconds: float = Field(ge=0)
|
| 39 |
+
end_seconds: float = Field(ge=0)
|
| 40 |
+
text: str
|
| 41 |
+
language: str | None = None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class ClipCandidate(BaseModel):
|
| 45 |
+
id: str
|
| 46 |
+
start_seconds: float = Field(ge=0)
|
| 47 |
+
end_seconds: float = Field(ge=0)
|
| 48 |
+
title: str
|
| 49 |
+
reason: str
|
| 50 |
+
score: float = Field(ge=0, le=100)
|
| 51 |
+
subtitle_text: str = ""
|
| 52 |
+
video_url: str | None = None
|
| 53 |
+
download_url: str | None = None
|
| 54 |
+
approved: bool = False
|
| 55 |
+
deleted: bool = False
|
| 56 |
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class ClipPatch(BaseModel):
|
| 60 |
+
start_seconds: float | None = Field(default=None, ge=0)
|
| 61 |
+
end_seconds: float | None = Field(default=None, ge=0)
|
| 62 |
+
subtitle_text: str | None = None
|
| 63 |
+
approved: bool | None = None
|
| 64 |
+
deleted: bool | None = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class RegenerateClipRequest(BaseModel):
|
| 68 |
+
clip_style: str | None = None
|
| 69 |
+
clip_length_seconds: int | None = Field(default=None, ge=15, le=180)
|
| 70 |
+
subtitle_text: str | None = None
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class JobSnapshot(BaseModel):
|
| 74 |
+
id: str
|
| 75 |
+
status: Literal["queued", "running", "completed", "failed"]
|
| 76 |
+
progress: float = Field(ge=0, le=1)
|
| 77 |
+
message: str
|
| 78 |
+
source: dict[str, Any]
|
| 79 |
+
profile: ChannelProfile
|
| 80 |
+
transcript: list[TranscriptSegment] = Field(default_factory=list)
|
| 81 |
+
clips: list[ClipCandidate] = Field(default_factory=list)
|
| 82 |
+
timings: dict[str, float] = Field(default_factory=dict)
|
| 83 |
+
error: str | None = None
|
| 84 |
+
created_at: datetime = Field(default_factory=utc_now)
|
| 85 |
+
updated_at: datetime = Field(default_factory=utc_now)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class HealthResponse(BaseModel):
|
| 89 |
+
ok: bool
|
| 90 |
+
app: str
|
| 91 |
+
demo_mode: bool
|
| 92 |
+
accelerator: dict[str, Any]
|
backend/app/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Pipeline services."""
|
backend/app/services/clips.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import shutil
|
| 2 |
+
import subprocess
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from app.core.config import Settings
|
| 6 |
+
from app.models.schemas import ChannelProfile, ClipCandidate, TranscriptSegment
|
| 7 |
+
from app.services.subtitles import write_single_caption_srt, write_srt
|
| 8 |
+
from app.storage import JobStore
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ClipGenerator:
|
| 12 |
+
def __init__(self, settings: Settings, store: JobStore) -> None:
|
| 13 |
+
self.settings = settings
|
| 14 |
+
self.store = store
|
| 15 |
+
|
| 16 |
+
def generate(
|
| 17 |
+
self,
|
| 18 |
+
job_id: str,
|
| 19 |
+
video_path: Path,
|
| 20 |
+
clips: list[ClipCandidate],
|
| 21 |
+
transcript: list[TranscriptSegment],
|
| 22 |
+
profile: ChannelProfile,
|
| 23 |
+
) -> list[ClipCandidate]:
|
| 24 |
+
rendered: list[ClipCandidate] = []
|
| 25 |
+
for index, clip in enumerate(clips, start=1):
|
| 26 |
+
rendered.append(self.render_one(job_id, video_path, clip, transcript, profile, index))
|
| 27 |
+
return rendered
|
| 28 |
+
|
| 29 |
+
def render_one(
|
| 30 |
+
self,
|
| 31 |
+
job_id: str,
|
| 32 |
+
video_path: Path,
|
| 33 |
+
clip: ClipCandidate,
|
| 34 |
+
transcript: list[TranscriptSegment],
|
| 35 |
+
profile: ChannelProfile,
|
| 36 |
+
index: int = 1,
|
| 37 |
+
) -> ClipCandidate:
|
| 38 |
+
job_dir = self.store.job_dir(job_id)
|
| 39 |
+
output_name = f"clip_{index:02}_{clip.id[:8]}.mp4"
|
| 40 |
+
subtitle_name = f"clip_{index:02}_{clip.id[:8]}.srt"
|
| 41 |
+
output_path = job_dir / output_name
|
| 42 |
+
subtitle_path = job_dir / subtitle_name
|
| 43 |
+
|
| 44 |
+
duration = max(1.0, clip.end_seconds - clip.start_seconds)
|
| 45 |
+
if clip.subtitle_text.strip():
|
| 46 |
+
write_single_caption_srt(subtitle_path, duration, clip.subtitle_text)
|
| 47 |
+
else:
|
| 48 |
+
write_srt(subtitle_path, clip.start_seconds, clip.end_seconds, transcript)
|
| 49 |
+
self._run_ffmpeg(video_path, output_path, subtitle_path, clip, profile)
|
| 50 |
+
|
| 51 |
+
clip.video_url = self.store.media_url(job_id, output_name)
|
| 52 |
+
clip.download_url = clip.video_url
|
| 53 |
+
clip.metadata["subtitle_file"] = self.store.media_url(job_id, subtitle_name)
|
| 54 |
+
return clip
|
| 55 |
+
|
| 56 |
+
def _run_ffmpeg(
|
| 57 |
+
self,
|
| 58 |
+
video_path: Path,
|
| 59 |
+
output_path: Path,
|
| 60 |
+
subtitle_path: Path,
|
| 61 |
+
clip: ClipCandidate,
|
| 62 |
+
profile: ChannelProfile,
|
| 63 |
+
) -> None:
|
| 64 |
+
ffmpeg = shutil.which(self.settings.ffmpeg_binary)
|
| 65 |
+
if not ffmpeg or not video_path.exists() or video_path.stat().st_size == 0:
|
| 66 |
+
output_path.write_bytes(b"")
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
duration = max(1.0, clip.end_seconds - clip.start_seconds)
|
| 70 |
+
filters = [self._platform_filter(profile), self._subtitle_filter(subtitle_path)]
|
| 71 |
+
command = [
|
| 72 |
+
ffmpeg,
|
| 73 |
+
"-y",
|
| 74 |
+
"-ss",
|
| 75 |
+
f"{clip.start_seconds:.3f}",
|
| 76 |
+
"-i",
|
| 77 |
+
str(video_path),
|
| 78 |
+
"-t",
|
| 79 |
+
f"{duration:.3f}",
|
| 80 |
+
"-vf",
|
| 81 |
+
",".join(filters),
|
| 82 |
+
"-c:v",
|
| 83 |
+
self.settings.ffmpeg_video_codec,
|
| 84 |
+
"-c:a",
|
| 85 |
+
"aac",
|
| 86 |
+
"-b:a",
|
| 87 |
+
"160k",
|
| 88 |
+
"-movflags",
|
| 89 |
+
"+faststart",
|
| 90 |
+
str(output_path),
|
| 91 |
+
]
|
| 92 |
+
try:
|
| 93 |
+
subprocess.run(command, check=True, capture_output=True, text=True, timeout=180)
|
| 94 |
+
return
|
| 95 |
+
except Exception:
|
| 96 |
+
fallback = command.copy()
|
| 97 |
+
fallback[fallback.index(self.settings.ffmpeg_video_codec)] = self.settings.ffmpeg_cpu_codec
|
| 98 |
+
try:
|
| 99 |
+
subprocess.run(fallback, check=True, capture_output=True, text=True, timeout=180)
|
| 100 |
+
return
|
| 101 |
+
except Exception:
|
| 102 |
+
output_path.write_bytes(b"")
|
| 103 |
+
|
| 104 |
+
def _platform_filter(self, profile: ChannelProfile) -> str:
|
| 105 |
+
if profile.target_platform.value in {"tiktok", "youtube_shorts", "instagram_reels"}:
|
| 106 |
+
return "scale=1080:1920:force_original_aspect_ratio=increase,crop=1080:1920"
|
| 107 |
+
return "scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2"
|
| 108 |
+
|
| 109 |
+
def _subtitle_filter(self, subtitle_path: Path) -> str:
|
| 110 |
+
escaped = str(subtitle_path.resolve()).replace("\\", "/").replace(":", "\\:")
|
| 111 |
+
style = "Fontname=Arial,Fontsize=18,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000"
|
| 112 |
+
return f"subtitles='{escaped}':force_style='{style}'"
|
backend/app/services/highlight.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from uuid import uuid4
|
| 4 |
+
|
| 5 |
+
from app.core.config import Settings
|
| 6 |
+
from app.models.schemas import ChannelProfile, ClipCandidate, TranscriptSegment
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class QwenHighlightDetector:
|
| 10 |
+
def __init__(self, settings: Settings) -> None:
|
| 11 |
+
self.settings = settings
|
| 12 |
+
self._llm = None
|
| 13 |
+
|
| 14 |
+
def detect(
|
| 15 |
+
self, transcript: list[TranscriptSegment], profile: ChannelProfile
|
| 16 |
+
) -> list[ClipCandidate]:
|
| 17 |
+
if self.settings.demo_mode:
|
| 18 |
+
return self._heuristic_detect(transcript, profile)
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
return self._qwen_detect(transcript, profile)
|
| 22 |
+
except Exception:
|
| 23 |
+
return self._heuristic_detect(transcript, profile)
|
| 24 |
+
|
| 25 |
+
def _qwen_detect(
|
| 26 |
+
self, transcript: list[TranscriptSegment], profile: ChannelProfile
|
| 27 |
+
) -> list[ClipCandidate]:
|
| 28 |
+
try:
|
| 29 |
+
from vllm import LLM, SamplingParams
|
| 30 |
+
except Exception as exc:
|
| 31 |
+
raise RuntimeError("vLLM with ROCm backend is required for Qwen inference") from exc
|
| 32 |
+
|
| 33 |
+
if self._llm is None:
|
| 34 |
+
self._llm = LLM(
|
| 35 |
+
model=self.settings.qwen_text_model_id,
|
| 36 |
+
dtype=self.settings.preferred_torch_dtype,
|
| 37 |
+
trust_remote_code=True,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
transcript_text = "\n".join(
|
| 41 |
+
f"[{segment.start_seconds:.1f}-{segment.end_seconds:.1f}] {segment.text}"
|
| 42 |
+
for segment in transcript
|
| 43 |
+
)
|
| 44 |
+
prompt = f"""
|
| 45 |
+
You are selecting short-form clips for a creator.
|
| 46 |
+
Profile:
|
| 47 |
+
- niche: {profile.niche}
|
| 48 |
+
- style: {profile.clip_style}
|
| 49 |
+
- target length seconds: {profile.clip_length_seconds}
|
| 50 |
+
- language: {profile.primary_language}
|
| 51 |
+
- platform: {profile.target_platform.value}
|
| 52 |
+
|
| 53 |
+
Return strict JSON only. Shape:
|
| 54 |
+
[
|
| 55 |
+
{{
|
| 56 |
+
"start_seconds": 12.0,
|
| 57 |
+
"end_seconds": 72.0,
|
| 58 |
+
"title": "short title",
|
| 59 |
+
"reason": "why this will engage viewers",
|
| 60 |
+
"score": 91,
|
| 61 |
+
"subtitle_text": "clean subtitle text"
|
| 62 |
+
}}
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
Transcript:
|
| 66 |
+
{transcript_text}
|
| 67 |
+
""".strip()
|
| 68 |
+
sampling = SamplingParams(temperature=0.2, max_tokens=1200)
|
| 69 |
+
outputs = self._llm.generate([prompt], sampling)
|
| 70 |
+
text = outputs[0].outputs[0].text
|
| 71 |
+
payload = self._parse_json_array(text)
|
| 72 |
+
clips = [
|
| 73 |
+
ClipCandidate(
|
| 74 |
+
id=uuid4().hex,
|
| 75 |
+
start_seconds=float(item["start_seconds"]),
|
| 76 |
+
end_seconds=float(item["end_seconds"]),
|
| 77 |
+
title=str(item.get("title") or "Highlight"),
|
| 78 |
+
reason=str(item.get("reason") or "High engagement potential"),
|
| 79 |
+
score=float(item.get("score") or 75),
|
| 80 |
+
subtitle_text=str(item.get("subtitle_text") or ""),
|
| 81 |
+
metadata={"model": self.settings.qwen_text_model_id},
|
| 82 |
+
)
|
| 83 |
+
for item in payload[: self.settings.max_clips]
|
| 84 |
+
]
|
| 85 |
+
return clips or self._heuristic_detect(transcript, profile)
|
| 86 |
+
|
| 87 |
+
def _parse_json_array(self, text: str) -> list[dict]:
|
| 88 |
+
match = re.search(r"\[[\s\S]*\]", text)
|
| 89 |
+
if not match:
|
| 90 |
+
raise ValueError("No JSON array in Qwen response")
|
| 91 |
+
payload = json.loads(match.group(0))
|
| 92 |
+
if not isinstance(payload, list):
|
| 93 |
+
raise ValueError("Qwen response is not a list")
|
| 94 |
+
return payload
|
| 95 |
+
|
| 96 |
+
def _heuristic_detect(
|
| 97 |
+
self, transcript: list[TranscriptSegment], profile: ChannelProfile
|
| 98 |
+
) -> list[ClipCandidate]:
|
| 99 |
+
style_terms = {
|
| 100 |
+
"funny": ["react", "punchy", "mistake", "surprising"],
|
| 101 |
+
"informative": ["important", "practical", "takeaway", "explanation"],
|
| 102 |
+
"dramatic": ["problem", "surprising", "before-and-after", "stop scrolling"],
|
| 103 |
+
"educational": ["question", "answer", "context", "takeaway"],
|
| 104 |
+
}
|
| 105 |
+
preferred_terms = style_terms.get(profile.clip_style.lower(), [])
|
| 106 |
+
scored: list[tuple[float, TranscriptSegment]] = []
|
| 107 |
+
for segment in transcript:
|
| 108 |
+
text = segment.text.lower()
|
| 109 |
+
score = 45.0
|
| 110 |
+
score += 12 if "?" in segment.text else 0
|
| 111 |
+
score += 8 if any(term in text for term in preferred_terms) else 0
|
| 112 |
+
score += 8 if any(term in text for term in ["mistake", "surprising", "stop scrolling"]) else 0
|
| 113 |
+
score += 6 if any(term in text for term in ["takeaway", "answer", "reacts"]) else 0
|
| 114 |
+
score += min(len(segment.text) / 12, 10)
|
| 115 |
+
scored.append((min(score, 100), segment))
|
| 116 |
+
|
| 117 |
+
scored.sort(key=lambda item: item[0], reverse=True)
|
| 118 |
+
clips: list[ClipCandidate] = []
|
| 119 |
+
for score, segment in scored[: self.settings.target_clip_count]:
|
| 120 |
+
start = max(0.0, segment.start_seconds - 5.0)
|
| 121 |
+
end = start + float(profile.clip_length_seconds)
|
| 122 |
+
clips.append(
|
| 123 |
+
ClipCandidate(
|
| 124 |
+
id=uuid4().hex,
|
| 125 |
+
start_seconds=start,
|
| 126 |
+
end_seconds=end,
|
| 127 |
+
title=self._title_for(segment.text),
|
| 128 |
+
reason=f"Matches the {profile.clip_style} style for a {profile.niche} audience.",
|
| 129 |
+
score=round(score, 1),
|
| 130 |
+
subtitle_text=segment.text,
|
| 131 |
+
metadata={"model": "heuristic-fallback"},
|
| 132 |
+
)
|
| 133 |
+
)
|
| 134 |
+
return sorted(clips, key=lambda clip: clip.start_seconds)
|
| 135 |
+
|
| 136 |
+
def _title_for(self, text: str) -> str:
|
| 137 |
+
words = re.sub(r"[^A-Za-z0-9 ]+", "", text).split()
|
| 138 |
+
title = " ".join(words[:7])
|
| 139 |
+
return title or "Highlight"
|
backend/app/services/multimodal.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.core.config import Settings
|
| 2 |
+
from app.models.schemas import ClipCandidate
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class QwenVisualAnalyzer:
|
| 6 |
+
def __init__(self, settings: Settings) -> None:
|
| 7 |
+
self.settings = settings
|
| 8 |
+
|
| 9 |
+
def enrich(self, video_path: str, clips: list[ClipCandidate]) -> list[ClipCandidate]:
|
| 10 |
+
if self.settings.demo_mode:
|
| 11 |
+
return clips
|
| 12 |
+
|
| 13 |
+
# Hook for Qwen2-VL frame/audio scoring on AMD ROCm.
|
| 14 |
+
# Keep this side-effect free until the hackathon demo has stable frame sampling assets.
|
| 15 |
+
for clip in clips:
|
| 16 |
+
clip.metadata["visual_model"] = self.settings.qwen_vl_model_id
|
| 17 |
+
clip.metadata["visual_status"] = "not_configured"
|
| 18 |
+
return clips
|
backend/app/services/pipeline.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
from app.core.config import Settings
|
| 5 |
+
from app.core.timing import TimingLog
|
| 6 |
+
from app.models.schemas import ChannelProfile, ClipCandidate
|
| 7 |
+
from app.services.clips import ClipGenerator
|
| 8 |
+
from app.services.highlight import QwenHighlightDetector
|
| 9 |
+
from app.services.multimodal import QwenVisualAnalyzer
|
| 10 |
+
from app.services.transcription import WhisperTranscriber
|
| 11 |
+
from app.services.video_input import resolve_youtube_url
|
| 12 |
+
from app.storage import JobStore
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class VideoPipeline:
|
| 16 |
+
def __init__(self, settings: Settings, store: JobStore) -> None:
|
| 17 |
+
self.settings = settings
|
| 18 |
+
self.store = store
|
| 19 |
+
self.transcriber = WhisperTranscriber(settings)
|
| 20 |
+
self.highlight_detector = QwenHighlightDetector(settings)
|
| 21 |
+
self.visual_analyzer = QwenVisualAnalyzer(settings)
|
| 22 |
+
self.clip_generator = ClipGenerator(settings, store)
|
| 23 |
+
|
| 24 |
+
async def process_source(
|
| 25 |
+
self,
|
| 26 |
+
job_id: str,
|
| 27 |
+
source_kind: str,
|
| 28 |
+
source_value: str,
|
| 29 |
+
profile: ChannelProfile,
|
| 30 |
+
) -> None:
|
| 31 |
+
timings = TimingLog()
|
| 32 |
+
try:
|
| 33 |
+
self.store.update_job(
|
| 34 |
+
job_id, status="running", progress=0.05, message="Preparing video input"
|
| 35 |
+
)
|
| 36 |
+
with timings.measure("input"):
|
| 37 |
+
if source_kind == "youtube":
|
| 38 |
+
video_path = await resolve_youtube_url(
|
| 39 |
+
source_value, self.store.job_dir(job_id), self.settings
|
| 40 |
+
)
|
| 41 |
+
else:
|
| 42 |
+
video_path = Path(source_value)
|
| 43 |
+
|
| 44 |
+
self.store.update_job(
|
| 45 |
+
job_id, progress=0.25, message="Transcribing with Whisper Large V3"
|
| 46 |
+
)
|
| 47 |
+
with timings.measure("transcription"):
|
| 48 |
+
transcript = await asyncio.to_thread(
|
| 49 |
+
self.transcriber.transcribe, str(video_path), profile
|
| 50 |
+
)
|
| 51 |
+
self.store.write_json(
|
| 52 |
+
job_id,
|
| 53 |
+
"transcript.json",
|
| 54 |
+
[segment.model_dump(mode="json") for segment in transcript],
|
| 55 |
+
)
|
| 56 |
+
self.store.update_job(job_id, transcript=transcript, timings=timings.to_dict())
|
| 57 |
+
|
| 58 |
+
self.store.update_job(job_id, progress=0.55, message="Scoring highlights with Qwen")
|
| 59 |
+
with timings.measure("highlight_detection"):
|
| 60 |
+
clips = await asyncio.to_thread(self.highlight_detector.detect, transcript, profile)
|
| 61 |
+
|
| 62 |
+
self.store.update_job(job_id, progress=0.65, message="Checking visual highlights")
|
| 63 |
+
with timings.measure("multimodal_analysis"):
|
| 64 |
+
clips = await asyncio.to_thread(self.visual_analyzer.enrich, str(video_path), clips)
|
| 65 |
+
|
| 66 |
+
self.store.update_job(job_id, progress=0.78, message="Generating clips and subtitles")
|
| 67 |
+
with timings.measure("clip_generation"):
|
| 68 |
+
rendered = await asyncio.to_thread(
|
| 69 |
+
self.clip_generator.generate, job_id, video_path, clips, transcript, profile
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
self.store.write_json(
|
| 73 |
+
job_id, "clips.json", [clip.model_dump(mode="json") for clip in rendered]
|
| 74 |
+
)
|
| 75 |
+
self.store.update_job(
|
| 76 |
+
job_id,
|
| 77 |
+
status="completed",
|
| 78 |
+
progress=1,
|
| 79 |
+
message="Clips ready",
|
| 80 |
+
transcript=transcript,
|
| 81 |
+
clips=rendered,
|
| 82 |
+
timings=timings.to_dict(),
|
| 83 |
+
)
|
| 84 |
+
except Exception as exc:
|
| 85 |
+
self.store.update_job(
|
| 86 |
+
job_id,
|
| 87 |
+
status="failed",
|
| 88 |
+
progress=1,
|
| 89 |
+
message="Processing failed",
|
| 90 |
+
error=str(exc),
|
| 91 |
+
timings=timings.to_dict(),
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
def patch_clip(self, job_id: str, clip_id: str, updates: dict) -> ClipCandidate:
|
| 95 |
+
snapshot = self.store.get_job(job_id)
|
| 96 |
+
patched: ClipCandidate | None = None
|
| 97 |
+
clips: list[ClipCandidate] = []
|
| 98 |
+
for clip in snapshot.clips:
|
| 99 |
+
if clip.id == clip_id:
|
| 100 |
+
clean_updates = {key: value for key, value in updates.items() if value is not None}
|
| 101 |
+
clip = clip.model_copy(update=clean_updates)
|
| 102 |
+
if clip.end_seconds <= clip.start_seconds:
|
| 103 |
+
clip = clip.model_copy(update={"end_seconds": clip.start_seconds + 1})
|
| 104 |
+
patched = clip
|
| 105 |
+
clips.append(clip)
|
| 106 |
+
if patched is None:
|
| 107 |
+
raise KeyError(clip_id)
|
| 108 |
+
self.store.update_job(job_id, clips=clips)
|
| 109 |
+
return patched
|
| 110 |
+
|
| 111 |
+
def regenerate_clip(
|
| 112 |
+
self,
|
| 113 |
+
job_id: str,
|
| 114 |
+
clip_id: str,
|
| 115 |
+
clip_style: str | None = None,
|
| 116 |
+
clip_length_seconds: int | None = None,
|
| 117 |
+
subtitle_text: str | None = None,
|
| 118 |
+
) -> ClipCandidate:
|
| 119 |
+
snapshot = self.store.get_job(job_id)
|
| 120 |
+
source_path = self._source_path(job_id)
|
| 121 |
+
clips: list[ClipCandidate] = []
|
| 122 |
+
regenerated: ClipCandidate | None = None
|
| 123 |
+
for index, clip in enumerate(snapshot.clips, start=1):
|
| 124 |
+
if clip.id == clip_id:
|
| 125 |
+
profile = snapshot.profile.model_copy(
|
| 126 |
+
update={
|
| 127 |
+
key: value
|
| 128 |
+
for key, value in {
|
| 129 |
+
"clip_style": clip_style,
|
| 130 |
+
"clip_length_seconds": clip_length_seconds,
|
| 131 |
+
}.items()
|
| 132 |
+
if value is not None
|
| 133 |
+
}
|
| 134 |
+
)
|
| 135 |
+
if clip_length_seconds is not None:
|
| 136 |
+
clip = clip.model_copy(
|
| 137 |
+
update={"end_seconds": clip.start_seconds + clip_length_seconds}
|
| 138 |
+
)
|
| 139 |
+
if subtitle_text is not None:
|
| 140 |
+
clip = clip.model_copy(update={"subtitle_text": subtitle_text})
|
| 141 |
+
clip = self.clip_generator.render_one(
|
| 142 |
+
job_id, source_path, clip, snapshot.transcript, profile, index
|
| 143 |
+
)
|
| 144 |
+
clip.metadata["regenerated"] = True
|
| 145 |
+
regenerated = clip
|
| 146 |
+
clips.append(clip)
|
| 147 |
+
if regenerated is None:
|
| 148 |
+
raise KeyError(clip_id)
|
| 149 |
+
self.store.update_job(job_id, clips=clips)
|
| 150 |
+
return regenerated
|
| 151 |
+
|
| 152 |
+
def _source_path(self, job_id: str) -> Path:
|
| 153 |
+
job_dir = self.store.job_dir(job_id)
|
| 154 |
+
matches = sorted(job_dir.glob("source.*"))
|
| 155 |
+
if not matches:
|
| 156 |
+
raise FileNotFoundError("source video missing")
|
| 157 |
+
return matches[0]
|
backend/app/services/subtitles.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from app.models.schemas import TranscriptSegment
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def seconds_to_srt_time(value: float) -> str:
|
| 7 |
+
millis = int(round(value * 1000))
|
| 8 |
+
hours, remainder = divmod(millis, 3_600_000)
|
| 9 |
+
minutes, remainder = divmod(remainder, 60_000)
|
| 10 |
+
seconds, millis = divmod(remainder, 1000)
|
| 11 |
+
return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def write_srt(path: Path, clip_start: float, clip_end: float, segments: list[TranscriptSegment]) -> None:
|
| 15 |
+
rows: list[str] = []
|
| 16 |
+
index = 1
|
| 17 |
+
for segment in segments:
|
| 18 |
+
if segment.end_seconds < clip_start or segment.start_seconds > clip_end:
|
| 19 |
+
continue
|
| 20 |
+
start = max(0.0, segment.start_seconds - clip_start)
|
| 21 |
+
end = min(clip_end - clip_start, segment.end_seconds - clip_start)
|
| 22 |
+
rows.extend(
|
| 23 |
+
[
|
| 24 |
+
str(index),
|
| 25 |
+
f"{seconds_to_srt_time(start)} --> {seconds_to_srt_time(max(end, start + 0.8))}",
|
| 26 |
+
segment.text.strip(),
|
| 27 |
+
"",
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
index += 1
|
| 31 |
+
if not rows:
|
| 32 |
+
rows = ["1", "00:00:00,000 --> 00:00:03,000", "", ""]
|
| 33 |
+
path.write_text("\n".join(rows), encoding="utf-8")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def write_single_caption_srt(path: Path, duration: float, text: str) -> None:
|
| 37 |
+
safe_duration = max(duration, 1.0)
|
| 38 |
+
rows = [
|
| 39 |
+
"1",
|
| 40 |
+
f"00:00:00,000 --> {seconds_to_srt_time(safe_duration)}",
|
| 41 |
+
text.strip(),
|
| 42 |
+
"",
|
| 43 |
+
]
|
| 44 |
+
path.write_text("\n".join(rows), encoding="utf-8")
|
backend/app/services/transcription.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from uuid import uuid4
|
| 2 |
+
|
| 3 |
+
from app.core.config import Settings
|
| 4 |
+
from app.models.schemas import ChannelProfile, TranscriptSegment
|
| 5 |
+
from app.utils.rocm import torch_device_index
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class WhisperTranscriber:
|
| 9 |
+
def __init__(self, settings: Settings) -> None:
|
| 10 |
+
self.settings = settings
|
| 11 |
+
self._pipeline = None
|
| 12 |
+
|
| 13 |
+
def transcribe(self, video_path: str, profile: ChannelProfile) -> list[TranscriptSegment]:
|
| 14 |
+
if self.settings.demo_mode:
|
| 15 |
+
return self._demo_transcript(profile)
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from transformers import pipeline
|
| 19 |
+
except Exception as exc:
|
| 20 |
+
raise RuntimeError("transformers is required for Whisper transcription") from exc
|
| 21 |
+
|
| 22 |
+
if self._pipeline is None:
|
| 23 |
+
self._pipeline = pipeline(
|
| 24 |
+
task="automatic-speech-recognition",
|
| 25 |
+
model=self.settings.whisper_model_id,
|
| 26 |
+
device=torch_device_index(),
|
| 27 |
+
token=self.settings.hf_token,
|
| 28 |
+
chunk_length_s=30,
|
| 29 |
+
return_timestamps=True,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
generate_kwargs = {"task": "transcribe"}
|
| 33 |
+
if profile.primary_language and profile.primary_language.lower() != "auto":
|
| 34 |
+
generate_kwargs["language"] = profile.primary_language.lower()
|
| 35 |
+
|
| 36 |
+
result = self._pipeline(str(video_path), generate_kwargs=generate_kwargs)
|
| 37 |
+
chunks = result.get("chunks") or []
|
| 38 |
+
if not chunks:
|
| 39 |
+
text = result.get("text", "").strip()
|
| 40 |
+
return [
|
| 41 |
+
TranscriptSegment(
|
| 42 |
+
id=uuid4().hex,
|
| 43 |
+
start_seconds=0,
|
| 44 |
+
end_seconds=max(profile.clip_length_seconds, 15),
|
| 45 |
+
text=text,
|
| 46 |
+
language=profile.primary_language,
|
| 47 |
+
)
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
segments: list[TranscriptSegment] = []
|
| 51 |
+
for chunk in chunks:
|
| 52 |
+
timestamp = chunk.get("timestamp") or (0, 0)
|
| 53 |
+
start = float(timestamp[0] or 0)
|
| 54 |
+
end = float(timestamp[1] or start + 5)
|
| 55 |
+
text = (chunk.get("text") or "").strip()
|
| 56 |
+
if text:
|
| 57 |
+
segments.append(
|
| 58 |
+
TranscriptSegment(
|
| 59 |
+
id=uuid4().hex,
|
| 60 |
+
start_seconds=start,
|
| 61 |
+
end_seconds=max(end, start + 1),
|
| 62 |
+
text=text,
|
| 63 |
+
language=profile.primary_language,
|
| 64 |
+
)
|
| 65 |
+
)
|
| 66 |
+
return segments
|
| 67 |
+
|
| 68 |
+
def _demo_transcript(self, profile: ChannelProfile) -> list[TranscriptSegment]:
|
| 69 |
+
style = profile.clip_style.lower()
|
| 70 |
+
niche = profile.niche.lower()
|
| 71 |
+
lines = [
|
| 72 |
+
"This opening sets up the main problem creators face when a long video hides the best moments.",
|
| 73 |
+
"Here is the surprising mistake most teams make when they choose clips only by view count.",
|
| 74 |
+
"The important question is simple: which moment would make someone stop scrolling right now?",
|
| 75 |
+
f"For a {niche} channel, the answer changes because the audience expects a {style} rhythm.",
|
| 76 |
+
"This section has the clearest explanation and a strong before-and-after contrast.",
|
| 77 |
+
"Then the guest reacts with a punchy line that works well as a short hook.",
|
| 78 |
+
"A practical takeaway lands here, with enough context to stand alone as a sixty second clip.",
|
| 79 |
+
"The final segment wraps the idea with a direct callout that is easy to subtitle.",
|
| 80 |
+
]
|
| 81 |
+
segments: list[TranscriptSegment] = []
|
| 82 |
+
cursor = 0.0
|
| 83 |
+
for line in lines:
|
| 84 |
+
end = cursor + 15.0
|
| 85 |
+
segments.append(
|
| 86 |
+
TranscriptSegment(
|
| 87 |
+
id=uuid4().hex,
|
| 88 |
+
start_seconds=cursor,
|
| 89 |
+
end_seconds=end,
|
| 90 |
+
text=line,
|
| 91 |
+
language=profile.primary_language,
|
| 92 |
+
)
|
| 93 |
+
)
|
| 94 |
+
cursor = end
|
| 95 |
+
return segments
|
backend/app/services/video_input.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import shutil
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from fastapi import UploadFile
|
| 7 |
+
|
| 8 |
+
from app.core.config import Settings
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
async def save_upload(upload: UploadFile, job_dir: Path) -> Path:
|
| 12 |
+
suffix = Path(upload.filename or "upload.mp4").suffix or ".mp4"
|
| 13 |
+
destination = job_dir / f"source{suffix.lower()}"
|
| 14 |
+
with destination.open("wb") as handle:
|
| 15 |
+
while chunk := await upload.read(1024 * 1024):
|
| 16 |
+
handle.write(chunk)
|
| 17 |
+
return destination
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
async def resolve_youtube_url(url: str, job_dir: Path, settings: Settings) -> Path:
|
| 21 |
+
if settings.demo_mode:
|
| 22 |
+
return await asyncio.to_thread(create_demo_video, job_dir, settings)
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
import yt_dlp
|
| 26 |
+
except Exception as exc:
|
| 27 |
+
raise RuntimeError("yt-dlp is required for YouTube ingestion") from exc
|
| 28 |
+
|
| 29 |
+
output_template = str(job_dir / "source.%(ext)s")
|
| 30 |
+
ydl_opts = {
|
| 31 |
+
"outtmpl": output_template,
|
| 32 |
+
"format": "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/best",
|
| 33 |
+
"merge_output_format": "mp4",
|
| 34 |
+
"quiet": True,
|
| 35 |
+
"noprogress": True,
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
def download() -> Path:
|
| 39 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 40 |
+
ydl.download([url])
|
| 41 |
+
matches = sorted(job_dir.glob("source.*"))
|
| 42 |
+
if not matches:
|
| 43 |
+
raise RuntimeError("yt-dlp finished without producing a video")
|
| 44 |
+
return matches[0]
|
| 45 |
+
|
| 46 |
+
return await asyncio.to_thread(download)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def create_demo_video(job_dir: Path, settings: Settings) -> Path:
|
| 50 |
+
destination = job_dir / "source.mp4"
|
| 51 |
+
ffmpeg = shutil.which(settings.ffmpeg_binary)
|
| 52 |
+
if not ffmpeg:
|
| 53 |
+
destination.write_bytes(b"")
|
| 54 |
+
return destination
|
| 55 |
+
|
| 56 |
+
command = [
|
| 57 |
+
ffmpeg,
|
| 58 |
+
"-y",
|
| 59 |
+
"-f",
|
| 60 |
+
"lavfi",
|
| 61 |
+
"-i",
|
| 62 |
+
"testsrc2=size=1280x720:rate=30:duration=120",
|
| 63 |
+
"-f",
|
| 64 |
+
"lavfi",
|
| 65 |
+
"-i",
|
| 66 |
+
"sine=frequency=660:sample_rate=48000:duration=120",
|
| 67 |
+
"-shortest",
|
| 68 |
+
"-c:v",
|
| 69 |
+
"libx264",
|
| 70 |
+
"-pix_fmt",
|
| 71 |
+
"yuv420p",
|
| 72 |
+
"-c:a",
|
| 73 |
+
"aac",
|
| 74 |
+
str(destination),
|
| 75 |
+
]
|
| 76 |
+
try:
|
| 77 |
+
subprocess.run(command, check=True, capture_output=True, text=True, timeout=45)
|
| 78 |
+
except Exception:
|
| 79 |
+
destination.write_bytes(b"")
|
| 80 |
+
return destination
|
backend/app/storage.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from uuid import uuid4
|
| 4 |
+
|
| 5 |
+
from app.core.config import Settings
|
| 6 |
+
from app.models.schemas import ChannelProfile, JobSnapshot, utc_now
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class JobStore:
|
| 10 |
+
def __init__(self, settings: Settings) -> None:
|
| 11 |
+
self.settings = settings
|
| 12 |
+
self.root = settings.storage_dir
|
| 13 |
+
self.jobs_root = self.root / "jobs"
|
| 14 |
+
self.jobs_root.mkdir(parents=True, exist_ok=True)
|
| 15 |
+
|
| 16 |
+
def create_job(self, profile: ChannelProfile, source: dict) -> JobSnapshot:
|
| 17 |
+
job_id = uuid4().hex
|
| 18 |
+
job_dir = self.job_dir(job_id)
|
| 19 |
+
job_dir.mkdir(parents=True, exist_ok=True)
|
| 20 |
+
snapshot = JobSnapshot(
|
| 21 |
+
id=job_id,
|
| 22 |
+
status="queued",
|
| 23 |
+
progress=0,
|
| 24 |
+
message="Queued",
|
| 25 |
+
source=source,
|
| 26 |
+
profile=profile,
|
| 27 |
+
)
|
| 28 |
+
self.save_job(snapshot)
|
| 29 |
+
return snapshot
|
| 30 |
+
|
| 31 |
+
def job_dir(self, job_id: str) -> Path:
|
| 32 |
+
return self.jobs_root / job_id
|
| 33 |
+
|
| 34 |
+
def media_url(self, job_id: str, filename: str) -> str:
|
| 35 |
+
return f"/media/jobs/{job_id}/{filename}"
|
| 36 |
+
|
| 37 |
+
def save_job(self, snapshot: JobSnapshot) -> JobSnapshot:
|
| 38 |
+
snapshot.updated_at = utc_now()
|
| 39 |
+
path = self.job_dir(snapshot.id) / "job.json"
|
| 40 |
+
path.write_text(snapshot.model_dump_json(indent=2), encoding="utf-8")
|
| 41 |
+
return snapshot
|
| 42 |
+
|
| 43 |
+
def get_job(self, job_id: str) -> JobSnapshot:
|
| 44 |
+
path = self.job_dir(job_id) / "job.json"
|
| 45 |
+
if not path.exists():
|
| 46 |
+
raise FileNotFoundError(job_id)
|
| 47 |
+
data = json.loads(path.read_text(encoding="utf-8"))
|
| 48 |
+
return JobSnapshot.model_validate(data)
|
| 49 |
+
|
| 50 |
+
def update_job(self, job_id: str, **updates) -> JobSnapshot:
|
| 51 |
+
snapshot = self.get_job(job_id)
|
| 52 |
+
updated = snapshot.model_copy(update=updates)
|
| 53 |
+
return self.save_job(updated)
|
| 54 |
+
|
| 55 |
+
def write_json(self, job_id: str, filename: str, payload: object) -> Path:
|
| 56 |
+
path = self.job_dir(job_id) / filename
|
| 57 |
+
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
|
| 58 |
+
return path
|
backend/app/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Runtime helpers."""
|
backend/app/utils/rocm.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def detect_accelerator() -> dict[str, Any]:
|
| 5 |
+
try:
|
| 6 |
+
import torch
|
| 7 |
+
except Exception as exc:
|
| 8 |
+
return {
|
| 9 |
+
"torch_available": False,
|
| 10 |
+
"cuda_api_available": False,
|
| 11 |
+
"rocm_hip_version": None,
|
| 12 |
+
"device_name": None,
|
| 13 |
+
"error": str(exc),
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
cuda_available = bool(torch.cuda.is_available())
|
| 17 |
+
device_name = torch.cuda.get_device_name(0) if cuda_available else None
|
| 18 |
+
return {
|
| 19 |
+
"torch_available": True,
|
| 20 |
+
"cuda_api_available": cuda_available,
|
| 21 |
+
"rocm_hip_version": getattr(torch.version, "hip", None),
|
| 22 |
+
"cuda_version": getattr(torch.version, "cuda", None),
|
| 23 |
+
"device_name": device_name,
|
| 24 |
+
"device_count": torch.cuda.device_count() if cuda_available else 0,
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def torch_device_index() -> int:
|
| 29 |
+
try:
|
| 30 |
+
import torch
|
| 31 |
+
except Exception:
|
| 32 |
+
return -1
|
| 33 |
+
return 0 if torch.cuda.is_available() else -1
|
backend/app/workers/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Optional async workers."""
|
backend/app/workers/celery_app.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from celery import Celery
|
| 2 |
+
|
| 3 |
+
from app.core.config import get_settings
|
| 4 |
+
|
| 5 |
+
settings = get_settings()
|
| 6 |
+
|
| 7 |
+
celery_app = Celery("ai_clip_studio", broker=settings.redis_url, backend=settings.redis_url)
|
| 8 |
+
celery_app.conf.task_serializer = "json"
|
| 9 |
+
celery_app.conf.result_serializer = "json"
|
| 10 |
+
celery_app.conf.accept_content = ["json"]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@celery_app.task(name="pipeline.process_job")
|
| 14 |
+
def process_job(job_id: str) -> str:
|
| 15 |
+
return f"Queued job {job_id}. FastAPI background tasks are active by default."
|
backend/pyproject.toml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "elevenclip-ai-backend"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "FastAPI backend for ElevenClip.AI on AMD ROCm"
|
| 5 |
+
requires-python = ">=3.11"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"fastapi>=0.115.0",
|
| 8 |
+
"uvicorn[standard]>=0.30.0",
|
| 9 |
+
"pydantic>=2.8.0",
|
| 10 |
+
"python-multipart>=0.0.9",
|
| 11 |
+
"yt-dlp>=2025.1.15",
|
| 12 |
+
"celery[redis]>=5.4.0",
|
| 13 |
+
"redis>=5.0.0"
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[project.optional-dependencies]
|
| 17 |
+
ai = [
|
| 18 |
+
"transformers>=4.47.0",
|
| 19 |
+
"accelerate>=1.2.0",
|
| 20 |
+
"sentencepiece>=0.2.0",
|
| 21 |
+
"safetensors>=0.4.5"
|
| 22 |
+
]
|
| 23 |
+
rocm-inference = [
|
| 24 |
+
"vllm>=0.6.6",
|
| 25 |
+
"optimum-amd>=0.1.0; platform_system == 'Linux'"
|
| 26 |
+
]
|
| 27 |
+
dev = [
|
| 28 |
+
"pytest>=8.3.0",
|
| 29 |
+
"httpx>=0.27.0",
|
| 30 |
+
"ruff>=0.6.0"
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
[build-system]
|
| 34 |
+
requires = ["setuptools>=69.0"]
|
| 35 |
+
build-backend = "setuptools.build_meta"
|
| 36 |
+
|
| 37 |
+
[tool.setuptools.packages.find]
|
| 38 |
+
include = ["app*"]
|
| 39 |
+
|
| 40 |
+
[tool.ruff]
|
| 41 |
+
line-length = 100
|
| 42 |
+
target-version = "py311"
|