JakgritB commited on
Commit
dbc3c35
·
1 Parent(s): 12a024d

feat(backend): add modular video processing API

Browse files

Add FastAPI job endpoints, ROCm-aware configuration, Whisper and Qwen service boundaries, ffmpeg clip rendering, subtitle generation, timing logs, and file-backed MVP job storage.

backend/Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ROCM_PYTORCH_IMAGE=rocm/pytorch:latest
2
+ FROM ${ROCM_PYTORCH_IMAGE}
3
+
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update \
7
+ && apt-get install -y --no-install-recommends ffmpeg git curl \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY pyproject.toml ./
11
+ ARG INSTALL_EXTRAS=.
12
+ RUN pip install --upgrade pip && pip install -e "${INSTALL_EXTRAS}"
13
+
14
+ COPY app ./app
15
+
16
+ EXPOSE 8000
17
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
backend/app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """AI Clip Studio backend."""
backend/app/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Core configuration and instrumentation."""
backend/app/core/config.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from pydantic import Field
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class Settings(BaseModel):
10
+ app_name: str = "ElevenClip.AI"
11
+ demo_mode: bool = True
12
+ storage_dir: Path = Path("data")
13
+ frontend_origin: str = "http://localhost:5173"
14
+
15
+ whisper_model_id: str = "openai/whisper-large-v3"
16
+ qwen_text_model_id: str = "Qwen/Qwen2.5-7B-Instruct"
17
+ qwen_vl_model_id: str = "Qwen/Qwen2-VL-7B-Instruct"
18
+ hf_token: str | None = None
19
+ preferred_torch_dtype: str = "bfloat16"
20
+
21
+ target_clip_count: int = Field(default=5, ge=1, le=20)
22
+ max_clips: int = Field(default=10, ge=1, le=50)
23
+
24
+ ffmpeg_binary: str = "ffmpeg"
25
+ ffprobe_binary: str = "ffprobe"
26
+ ffmpeg_video_codec: str = "h264_amf"
27
+ ffmpeg_cpu_codec: str = "libx264"
28
+
29
+ redis_url: str = "redis://redis:6379/0"
30
+ celery_enabled: bool = False
31
+
32
+
33
+ @lru_cache
34
+ def get_settings() -> Settings:
35
+ settings = Settings(
36
+ demo_mode=_bool_env("DEMO_MODE", True),
37
+ storage_dir=Path(os.getenv("STORAGE_DIR", "data")),
38
+ frontend_origin=os.getenv("FRONTEND_ORIGIN", "http://localhost:5173"),
39
+ whisper_model_id=os.getenv("WHISPER_MODEL_ID", "openai/whisper-large-v3"),
40
+ qwen_text_model_id=os.getenv("QWEN_TEXT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct"),
41
+ qwen_vl_model_id=os.getenv("QWEN_VL_MODEL_ID", "Qwen/Qwen2-VL-7B-Instruct"),
42
+ hf_token=os.getenv("HF_TOKEN") or None,
43
+ preferred_torch_dtype=os.getenv("TORCH_DTYPE", "bfloat16"),
44
+ target_clip_count=_int_env("TARGET_CLIP_COUNT", 5),
45
+ max_clips=_int_env("MAX_CLIPS", 10),
46
+ ffmpeg_binary=os.getenv("FFMPEG_BINARY", "ffmpeg"),
47
+ ffprobe_binary=os.getenv("FFPROBE_BINARY", "ffprobe"),
48
+ ffmpeg_video_codec=os.getenv("FFMPEG_VIDEO_CODEC", "h264_amf"),
49
+ ffmpeg_cpu_codec=os.getenv("FFMPEG_CPU_CODEC", "libx264"),
50
+ redis_url=os.getenv("REDIS_URL", "redis://redis:6379/0"),
51
+ celery_enabled=_bool_env("CELERY_ENABLED", False),
52
+ )
53
+ settings.storage_dir.mkdir(parents=True, exist_ok=True)
54
+ return settings
55
+
56
+
57
+ def _bool_env(name: str, default: bool) -> bool:
58
+ value = os.getenv(name)
59
+ if value is None:
60
+ return default
61
+ return value.strip().lower() in {"1", "true", "yes", "on"}
62
+
63
+
64
+ def _int_env(name: str, default: int) -> int:
65
+ value = os.getenv(name)
66
+ if value is None:
67
+ return default
68
+ return int(value)
backend/app/core/timing.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Iterator
2
+ from contextlib import contextmanager
3
+ from time import perf_counter
4
+
5
+
6
+ class TimingLog:
7
+ def __init__(self) -> None:
8
+ self._steps: dict[str, float] = {}
9
+
10
+ @contextmanager
11
+ def measure(self, name: str) -> Iterator[None]:
12
+ started = perf_counter()
13
+ try:
14
+ yield
15
+ finally:
16
+ self._steps[name] = round(perf_counter() - started, 3)
17
+
18
+ def to_dict(self) -> dict[str, float]:
19
+ total = round(sum(self._steps.values()), 3)
20
+ return {**self._steps, "total": total}
backend/app/main.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import FileResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+
6
+ from app.core.config import get_settings
7
+ from app.models.schemas import (
8
+ ChannelProfile,
9
+ ClipCandidate,
10
+ ClipPatch,
11
+ HealthResponse,
12
+ JobSnapshot,
13
+ RegenerateClipRequest,
14
+ YoutubeJobRequest,
15
+ )
16
+ from app.services.pipeline import VideoPipeline
17
+ from app.services.video_input import save_upload
18
+ from app.storage import JobStore
19
+ from app.utils.rocm import detect_accelerator
20
+
21
+ settings = get_settings()
22
+ store = JobStore(settings)
23
+ pipeline = VideoPipeline(settings, store)
24
+
25
+ app = FastAPI(title=settings.app_name, version="0.1.0")
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=[settings.frontend_origin, "http://localhost:5173", "http://127.0.0.1:5173"],
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+ app.mount("/media", StaticFiles(directory=settings.storage_dir), name="media")
34
+
35
+
36
+ @app.get("/health", response_model=HealthResponse)
37
+ async def health() -> HealthResponse:
38
+ return HealthResponse(
39
+ ok=True,
40
+ app=settings.app_name,
41
+ demo_mode=settings.demo_mode,
42
+ accelerator=detect_accelerator(),
43
+ )
44
+
45
+
46
+ @app.post("/api/jobs/youtube", response_model=JobSnapshot)
47
+ async def create_youtube_job(
48
+ request: YoutubeJobRequest, background_tasks: BackgroundTasks
49
+ ) -> JobSnapshot:
50
+ snapshot = store.create_job(
51
+ request.profile, {"kind": "youtube", "url": str(request.youtube_url)}
52
+ )
53
+ background_tasks.add_task(
54
+ pipeline.process_source, snapshot.id, "youtube", str(request.youtube_url), request.profile
55
+ )
56
+ return snapshot
57
+
58
+
59
+ @app.post("/api/jobs/upload", response_model=JobSnapshot)
60
+ async def create_upload_job(
61
+ background_tasks: BackgroundTasks,
62
+ profile_json: str = Form(...),
63
+ file: UploadFile = File(...),
64
+ ) -> JobSnapshot:
65
+ try:
66
+ profile = ChannelProfile.model_validate_json(profile_json)
67
+ except Exception as exc:
68
+ raise HTTPException(status_code=422, detail=f"Invalid profile JSON: {exc}") from exc
69
+
70
+ snapshot = store.create_job(profile, {"kind": "upload", "filename": file.filename})
71
+ source_path = await save_upload(file, store.job_dir(snapshot.id))
72
+ background_tasks.add_task(pipeline.process_source, snapshot.id, "upload", str(source_path), profile)
73
+ return snapshot
74
+
75
+
76
+ @app.get("/api/jobs/{job_id}", response_model=JobSnapshot)
77
+ async def get_job(job_id: str) -> JobSnapshot:
78
+ try:
79
+ return store.get_job(job_id)
80
+ except FileNotFoundError as exc:
81
+ raise HTTPException(status_code=404, detail="Job not found") from exc
82
+
83
+
84
+ @app.patch("/api/jobs/{job_id}/clips/{clip_id}", response_model=ClipCandidate)
85
+ async def update_clip(job_id: str, clip_id: str, patch: ClipPatch) -> ClipCandidate:
86
+ try:
87
+ return pipeline.patch_clip(job_id, clip_id, patch.model_dump())
88
+ except FileNotFoundError as exc:
89
+ raise HTTPException(status_code=404, detail="Job not found") from exc
90
+ except KeyError as exc:
91
+ raise HTTPException(status_code=404, detail="Clip not found") from exc
92
+
93
+
94
+ @app.post("/api/jobs/{job_id}/clips/{clip_id}/regenerate", response_model=ClipCandidate)
95
+ async def regenerate_clip(
96
+ job_id: str, clip_id: str, request: RegenerateClipRequest
97
+ ) -> ClipCandidate:
98
+ try:
99
+ return pipeline.regenerate_clip(
100
+ job_id,
101
+ clip_id,
102
+ clip_style=request.clip_style,
103
+ clip_length_seconds=request.clip_length_seconds,
104
+ subtitle_text=request.subtitle_text,
105
+ )
106
+ except FileNotFoundError as exc:
107
+ raise HTTPException(status_code=404, detail="Source video not found") from exc
108
+ except KeyError as exc:
109
+ raise HTTPException(status_code=404, detail="Clip not found") from exc
110
+
111
+
112
+ @app.get("/api/jobs/{job_id}/clips/{clip_id}/download")
113
+ async def download_clip(job_id: str, clip_id: str) -> FileResponse:
114
+ snapshot = store.get_job(job_id)
115
+ clip = next((item for item in snapshot.clips if item.id == clip_id), None)
116
+ if clip is None or clip.download_url is None:
117
+ raise HTTPException(status_code=404, detail="Clip not found")
118
+ filename = clip.download_url.rsplit("/", 1)[-1]
119
+ path = store.job_dir(job_id) / filename
120
+ if not path.exists():
121
+ raise HTTPException(status_code=404, detail="Clip file not found")
122
+ return FileResponse(path, media_type="video/mp4", filename=filename)
backend/app/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Pydantic models."""
backend/app/models/schemas.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+ from enum import Enum
3
+ from typing import Any, Literal
4
+
5
+ from pydantic import BaseModel, Field, HttpUrl, field_validator
6
+
7
+
8
+ def utc_now() -> datetime:
9
+ return datetime.now(timezone.utc)
10
+
11
+
12
+ class TargetPlatform(str, Enum):
13
+ tiktok = "tiktok"
14
+ youtube_shorts = "youtube_shorts"
15
+ instagram_reels = "instagram_reels"
16
+
17
+
18
+ class ChannelProfile(BaseModel):
19
+ niche: str = Field(default="education", min_length=2, max_length=80)
20
+ clip_style: str = Field(default="informative", min_length=2, max_length=80)
21
+ clip_length_seconds: int = Field(default=60, ge=15, le=180)
22
+ primary_language: str = Field(default="Thai", min_length=2, max_length=40)
23
+ target_platform: TargetPlatform = TargetPlatform.tiktok
24
+
25
+ @field_validator("niche", "clip_style", "primary_language")
26
+ @classmethod
27
+ def clean_text(cls, value: str) -> str:
28
+ return value.strip()
29
+
30
+
31
+ class YoutubeJobRequest(BaseModel):
32
+ youtube_url: HttpUrl
33
+ profile: ChannelProfile
34
+
35
+
36
+ class TranscriptSegment(BaseModel):
37
+ id: str
38
+ start_seconds: float = Field(ge=0)
39
+ end_seconds: float = Field(ge=0)
40
+ text: str
41
+ language: str | None = None
42
+
43
+
44
+ class ClipCandidate(BaseModel):
45
+ id: str
46
+ start_seconds: float = Field(ge=0)
47
+ end_seconds: float = Field(ge=0)
48
+ title: str
49
+ reason: str
50
+ score: float = Field(ge=0, le=100)
51
+ subtitle_text: str = ""
52
+ video_url: str | None = None
53
+ download_url: str | None = None
54
+ approved: bool = False
55
+ deleted: bool = False
56
+ metadata: dict[str, Any] = Field(default_factory=dict)
57
+
58
+
59
+ class ClipPatch(BaseModel):
60
+ start_seconds: float | None = Field(default=None, ge=0)
61
+ end_seconds: float | None = Field(default=None, ge=0)
62
+ subtitle_text: str | None = None
63
+ approved: bool | None = None
64
+ deleted: bool | None = None
65
+
66
+
67
+ class RegenerateClipRequest(BaseModel):
68
+ clip_style: str | None = None
69
+ clip_length_seconds: int | None = Field(default=None, ge=15, le=180)
70
+ subtitle_text: str | None = None
71
+
72
+
73
+ class JobSnapshot(BaseModel):
74
+ id: str
75
+ status: Literal["queued", "running", "completed", "failed"]
76
+ progress: float = Field(ge=0, le=1)
77
+ message: str
78
+ source: dict[str, Any]
79
+ profile: ChannelProfile
80
+ transcript: list[TranscriptSegment] = Field(default_factory=list)
81
+ clips: list[ClipCandidate] = Field(default_factory=list)
82
+ timings: dict[str, float] = Field(default_factory=dict)
83
+ error: str | None = None
84
+ created_at: datetime = Field(default_factory=utc_now)
85
+ updated_at: datetime = Field(default_factory=utc_now)
86
+
87
+
88
+ class HealthResponse(BaseModel):
89
+ ok: bool
90
+ app: str
91
+ demo_mode: bool
92
+ accelerator: dict[str, Any]
backend/app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Pipeline services."""
backend/app/services/clips.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ from app.core.config import Settings
6
+ from app.models.schemas import ChannelProfile, ClipCandidate, TranscriptSegment
7
+ from app.services.subtitles import write_single_caption_srt, write_srt
8
+ from app.storage import JobStore
9
+
10
+
11
+ class ClipGenerator:
12
+ def __init__(self, settings: Settings, store: JobStore) -> None:
13
+ self.settings = settings
14
+ self.store = store
15
+
16
+ def generate(
17
+ self,
18
+ job_id: str,
19
+ video_path: Path,
20
+ clips: list[ClipCandidate],
21
+ transcript: list[TranscriptSegment],
22
+ profile: ChannelProfile,
23
+ ) -> list[ClipCandidate]:
24
+ rendered: list[ClipCandidate] = []
25
+ for index, clip in enumerate(clips, start=1):
26
+ rendered.append(self.render_one(job_id, video_path, clip, transcript, profile, index))
27
+ return rendered
28
+
29
+ def render_one(
30
+ self,
31
+ job_id: str,
32
+ video_path: Path,
33
+ clip: ClipCandidate,
34
+ transcript: list[TranscriptSegment],
35
+ profile: ChannelProfile,
36
+ index: int = 1,
37
+ ) -> ClipCandidate:
38
+ job_dir = self.store.job_dir(job_id)
39
+ output_name = f"clip_{index:02}_{clip.id[:8]}.mp4"
40
+ subtitle_name = f"clip_{index:02}_{clip.id[:8]}.srt"
41
+ output_path = job_dir / output_name
42
+ subtitle_path = job_dir / subtitle_name
43
+
44
+ duration = max(1.0, clip.end_seconds - clip.start_seconds)
45
+ if clip.subtitle_text.strip():
46
+ write_single_caption_srt(subtitle_path, duration, clip.subtitle_text)
47
+ else:
48
+ write_srt(subtitle_path, clip.start_seconds, clip.end_seconds, transcript)
49
+ self._run_ffmpeg(video_path, output_path, subtitle_path, clip, profile)
50
+
51
+ clip.video_url = self.store.media_url(job_id, output_name)
52
+ clip.download_url = clip.video_url
53
+ clip.metadata["subtitle_file"] = self.store.media_url(job_id, subtitle_name)
54
+ return clip
55
+
56
+ def _run_ffmpeg(
57
+ self,
58
+ video_path: Path,
59
+ output_path: Path,
60
+ subtitle_path: Path,
61
+ clip: ClipCandidate,
62
+ profile: ChannelProfile,
63
+ ) -> None:
64
+ ffmpeg = shutil.which(self.settings.ffmpeg_binary)
65
+ if not ffmpeg or not video_path.exists() or video_path.stat().st_size == 0:
66
+ output_path.write_bytes(b"")
67
+ return
68
+
69
+ duration = max(1.0, clip.end_seconds - clip.start_seconds)
70
+ filters = [self._platform_filter(profile), self._subtitle_filter(subtitle_path)]
71
+ command = [
72
+ ffmpeg,
73
+ "-y",
74
+ "-ss",
75
+ f"{clip.start_seconds:.3f}",
76
+ "-i",
77
+ str(video_path),
78
+ "-t",
79
+ f"{duration:.3f}",
80
+ "-vf",
81
+ ",".join(filters),
82
+ "-c:v",
83
+ self.settings.ffmpeg_video_codec,
84
+ "-c:a",
85
+ "aac",
86
+ "-b:a",
87
+ "160k",
88
+ "-movflags",
89
+ "+faststart",
90
+ str(output_path),
91
+ ]
92
+ try:
93
+ subprocess.run(command, check=True, capture_output=True, text=True, timeout=180)
94
+ return
95
+ except Exception:
96
+ fallback = command.copy()
97
+ fallback[fallback.index(self.settings.ffmpeg_video_codec)] = self.settings.ffmpeg_cpu_codec
98
+ try:
99
+ subprocess.run(fallback, check=True, capture_output=True, text=True, timeout=180)
100
+ return
101
+ except Exception:
102
+ output_path.write_bytes(b"")
103
+
104
+ def _platform_filter(self, profile: ChannelProfile) -> str:
105
+ if profile.target_platform.value in {"tiktok", "youtube_shorts", "instagram_reels"}:
106
+ return "scale=1080:1920:force_original_aspect_ratio=increase,crop=1080:1920"
107
+ return "scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2"
108
+
109
+ def _subtitle_filter(self, subtitle_path: Path) -> str:
110
+ escaped = str(subtitle_path.resolve()).replace("\\", "/").replace(":", "\\:")
111
+ style = "Fontname=Arial,Fontsize=18,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000"
112
+ return f"subtitles='{escaped}':force_style='{style}'"
backend/app/services/highlight.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from uuid import uuid4
4
+
5
+ from app.core.config import Settings
6
+ from app.models.schemas import ChannelProfile, ClipCandidate, TranscriptSegment
7
+
8
+
9
+ class QwenHighlightDetector:
10
+ def __init__(self, settings: Settings) -> None:
11
+ self.settings = settings
12
+ self._llm = None
13
+
14
+ def detect(
15
+ self, transcript: list[TranscriptSegment], profile: ChannelProfile
16
+ ) -> list[ClipCandidate]:
17
+ if self.settings.demo_mode:
18
+ return self._heuristic_detect(transcript, profile)
19
+
20
+ try:
21
+ return self._qwen_detect(transcript, profile)
22
+ except Exception:
23
+ return self._heuristic_detect(transcript, profile)
24
+
25
+ def _qwen_detect(
26
+ self, transcript: list[TranscriptSegment], profile: ChannelProfile
27
+ ) -> list[ClipCandidate]:
28
+ try:
29
+ from vllm import LLM, SamplingParams
30
+ except Exception as exc:
31
+ raise RuntimeError("vLLM with ROCm backend is required for Qwen inference") from exc
32
+
33
+ if self._llm is None:
34
+ self._llm = LLM(
35
+ model=self.settings.qwen_text_model_id,
36
+ dtype=self.settings.preferred_torch_dtype,
37
+ trust_remote_code=True,
38
+ )
39
+
40
+ transcript_text = "\n".join(
41
+ f"[{segment.start_seconds:.1f}-{segment.end_seconds:.1f}] {segment.text}"
42
+ for segment in transcript
43
+ )
44
+ prompt = f"""
45
+ You are selecting short-form clips for a creator.
46
+ Profile:
47
+ - niche: {profile.niche}
48
+ - style: {profile.clip_style}
49
+ - target length seconds: {profile.clip_length_seconds}
50
+ - language: {profile.primary_language}
51
+ - platform: {profile.target_platform.value}
52
+
53
+ Return strict JSON only. Shape:
54
+ [
55
+ {{
56
+ "start_seconds": 12.0,
57
+ "end_seconds": 72.0,
58
+ "title": "short title",
59
+ "reason": "why this will engage viewers",
60
+ "score": 91,
61
+ "subtitle_text": "clean subtitle text"
62
+ }}
63
+ ]
64
+
65
+ Transcript:
66
+ {transcript_text}
67
+ """.strip()
68
+ sampling = SamplingParams(temperature=0.2, max_tokens=1200)
69
+ outputs = self._llm.generate([prompt], sampling)
70
+ text = outputs[0].outputs[0].text
71
+ payload = self._parse_json_array(text)
72
+ clips = [
73
+ ClipCandidate(
74
+ id=uuid4().hex,
75
+ start_seconds=float(item["start_seconds"]),
76
+ end_seconds=float(item["end_seconds"]),
77
+ title=str(item.get("title") or "Highlight"),
78
+ reason=str(item.get("reason") or "High engagement potential"),
79
+ score=float(item.get("score") or 75),
80
+ subtitle_text=str(item.get("subtitle_text") or ""),
81
+ metadata={"model": self.settings.qwen_text_model_id},
82
+ )
83
+ for item in payload[: self.settings.max_clips]
84
+ ]
85
+ return clips or self._heuristic_detect(transcript, profile)
86
+
87
+ def _parse_json_array(self, text: str) -> list[dict]:
88
+ match = re.search(r"\[[\s\S]*\]", text)
89
+ if not match:
90
+ raise ValueError("No JSON array in Qwen response")
91
+ payload = json.loads(match.group(0))
92
+ if not isinstance(payload, list):
93
+ raise ValueError("Qwen response is not a list")
94
+ return payload
95
+
96
+ def _heuristic_detect(
97
+ self, transcript: list[TranscriptSegment], profile: ChannelProfile
98
+ ) -> list[ClipCandidate]:
99
+ style_terms = {
100
+ "funny": ["react", "punchy", "mistake", "surprising"],
101
+ "informative": ["important", "practical", "takeaway", "explanation"],
102
+ "dramatic": ["problem", "surprising", "before-and-after", "stop scrolling"],
103
+ "educational": ["question", "answer", "context", "takeaway"],
104
+ }
105
+ preferred_terms = style_terms.get(profile.clip_style.lower(), [])
106
+ scored: list[tuple[float, TranscriptSegment]] = []
107
+ for segment in transcript:
108
+ text = segment.text.lower()
109
+ score = 45.0
110
+ score += 12 if "?" in segment.text else 0
111
+ score += 8 if any(term in text for term in preferred_terms) else 0
112
+ score += 8 if any(term in text for term in ["mistake", "surprising", "stop scrolling"]) else 0
113
+ score += 6 if any(term in text for term in ["takeaway", "answer", "reacts"]) else 0
114
+ score += min(len(segment.text) / 12, 10)
115
+ scored.append((min(score, 100), segment))
116
+
117
+ scored.sort(key=lambda item: item[0], reverse=True)
118
+ clips: list[ClipCandidate] = []
119
+ for score, segment in scored[: self.settings.target_clip_count]:
120
+ start = max(0.0, segment.start_seconds - 5.0)
121
+ end = start + float(profile.clip_length_seconds)
122
+ clips.append(
123
+ ClipCandidate(
124
+ id=uuid4().hex,
125
+ start_seconds=start,
126
+ end_seconds=end,
127
+ title=self._title_for(segment.text),
128
+ reason=f"Matches the {profile.clip_style} style for a {profile.niche} audience.",
129
+ score=round(score, 1),
130
+ subtitle_text=segment.text,
131
+ metadata={"model": "heuristic-fallback"},
132
+ )
133
+ )
134
+ return sorted(clips, key=lambda clip: clip.start_seconds)
135
+
136
+ def _title_for(self, text: str) -> str:
137
+ words = re.sub(r"[^A-Za-z0-9 ]+", "", text).split()
138
+ title = " ".join(words[:7])
139
+ return title or "Highlight"
backend/app/services/multimodal.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.core.config import Settings
2
+ from app.models.schemas import ClipCandidate
3
+
4
+
5
+ class QwenVisualAnalyzer:
6
+ def __init__(self, settings: Settings) -> None:
7
+ self.settings = settings
8
+
9
+ def enrich(self, video_path: str, clips: list[ClipCandidate]) -> list[ClipCandidate]:
10
+ if self.settings.demo_mode:
11
+ return clips
12
+
13
+ # Hook for Qwen2-VL frame/audio scoring on AMD ROCm.
14
+ # Keep this side-effect free until the hackathon demo has stable frame sampling assets.
15
+ for clip in clips:
16
+ clip.metadata["visual_model"] = self.settings.qwen_vl_model_id
17
+ clip.metadata["visual_status"] = "not_configured"
18
+ return clips
backend/app/services/pipeline.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from pathlib import Path
3
+
4
+ from app.core.config import Settings
5
+ from app.core.timing import TimingLog
6
+ from app.models.schemas import ChannelProfile, ClipCandidate
7
+ from app.services.clips import ClipGenerator
8
+ from app.services.highlight import QwenHighlightDetector
9
+ from app.services.multimodal import QwenVisualAnalyzer
10
+ from app.services.transcription import WhisperTranscriber
11
+ from app.services.video_input import resolve_youtube_url
12
+ from app.storage import JobStore
13
+
14
+
15
+ class VideoPipeline:
16
+ def __init__(self, settings: Settings, store: JobStore) -> None:
17
+ self.settings = settings
18
+ self.store = store
19
+ self.transcriber = WhisperTranscriber(settings)
20
+ self.highlight_detector = QwenHighlightDetector(settings)
21
+ self.visual_analyzer = QwenVisualAnalyzer(settings)
22
+ self.clip_generator = ClipGenerator(settings, store)
23
+
24
+ async def process_source(
25
+ self,
26
+ job_id: str,
27
+ source_kind: str,
28
+ source_value: str,
29
+ profile: ChannelProfile,
30
+ ) -> None:
31
+ timings = TimingLog()
32
+ try:
33
+ self.store.update_job(
34
+ job_id, status="running", progress=0.05, message="Preparing video input"
35
+ )
36
+ with timings.measure("input"):
37
+ if source_kind == "youtube":
38
+ video_path = await resolve_youtube_url(
39
+ source_value, self.store.job_dir(job_id), self.settings
40
+ )
41
+ else:
42
+ video_path = Path(source_value)
43
+
44
+ self.store.update_job(
45
+ job_id, progress=0.25, message="Transcribing with Whisper Large V3"
46
+ )
47
+ with timings.measure("transcription"):
48
+ transcript = await asyncio.to_thread(
49
+ self.transcriber.transcribe, str(video_path), profile
50
+ )
51
+ self.store.write_json(
52
+ job_id,
53
+ "transcript.json",
54
+ [segment.model_dump(mode="json") for segment in transcript],
55
+ )
56
+ self.store.update_job(job_id, transcript=transcript, timings=timings.to_dict())
57
+
58
+ self.store.update_job(job_id, progress=0.55, message="Scoring highlights with Qwen")
59
+ with timings.measure("highlight_detection"):
60
+ clips = await asyncio.to_thread(self.highlight_detector.detect, transcript, profile)
61
+
62
+ self.store.update_job(job_id, progress=0.65, message="Checking visual highlights")
63
+ with timings.measure("multimodal_analysis"):
64
+ clips = await asyncio.to_thread(self.visual_analyzer.enrich, str(video_path), clips)
65
+
66
+ self.store.update_job(job_id, progress=0.78, message="Generating clips and subtitles")
67
+ with timings.measure("clip_generation"):
68
+ rendered = await asyncio.to_thread(
69
+ self.clip_generator.generate, job_id, video_path, clips, transcript, profile
70
+ )
71
+
72
+ self.store.write_json(
73
+ job_id, "clips.json", [clip.model_dump(mode="json") for clip in rendered]
74
+ )
75
+ self.store.update_job(
76
+ job_id,
77
+ status="completed",
78
+ progress=1,
79
+ message="Clips ready",
80
+ transcript=transcript,
81
+ clips=rendered,
82
+ timings=timings.to_dict(),
83
+ )
84
+ except Exception as exc:
85
+ self.store.update_job(
86
+ job_id,
87
+ status="failed",
88
+ progress=1,
89
+ message="Processing failed",
90
+ error=str(exc),
91
+ timings=timings.to_dict(),
92
+ )
93
+
94
+ def patch_clip(self, job_id: str, clip_id: str, updates: dict) -> ClipCandidate:
95
+ snapshot = self.store.get_job(job_id)
96
+ patched: ClipCandidate | None = None
97
+ clips: list[ClipCandidate] = []
98
+ for clip in snapshot.clips:
99
+ if clip.id == clip_id:
100
+ clean_updates = {key: value for key, value in updates.items() if value is not None}
101
+ clip = clip.model_copy(update=clean_updates)
102
+ if clip.end_seconds <= clip.start_seconds:
103
+ clip = clip.model_copy(update={"end_seconds": clip.start_seconds + 1})
104
+ patched = clip
105
+ clips.append(clip)
106
+ if patched is None:
107
+ raise KeyError(clip_id)
108
+ self.store.update_job(job_id, clips=clips)
109
+ return patched
110
+
111
+ def regenerate_clip(
112
+ self,
113
+ job_id: str,
114
+ clip_id: str,
115
+ clip_style: str | None = None,
116
+ clip_length_seconds: int | None = None,
117
+ subtitle_text: str | None = None,
118
+ ) -> ClipCandidate:
119
+ snapshot = self.store.get_job(job_id)
120
+ source_path = self._source_path(job_id)
121
+ clips: list[ClipCandidate] = []
122
+ regenerated: ClipCandidate | None = None
123
+ for index, clip in enumerate(snapshot.clips, start=1):
124
+ if clip.id == clip_id:
125
+ profile = snapshot.profile.model_copy(
126
+ update={
127
+ key: value
128
+ for key, value in {
129
+ "clip_style": clip_style,
130
+ "clip_length_seconds": clip_length_seconds,
131
+ }.items()
132
+ if value is not None
133
+ }
134
+ )
135
+ if clip_length_seconds is not None:
136
+ clip = clip.model_copy(
137
+ update={"end_seconds": clip.start_seconds + clip_length_seconds}
138
+ )
139
+ if subtitle_text is not None:
140
+ clip = clip.model_copy(update={"subtitle_text": subtitle_text})
141
+ clip = self.clip_generator.render_one(
142
+ job_id, source_path, clip, snapshot.transcript, profile, index
143
+ )
144
+ clip.metadata["regenerated"] = True
145
+ regenerated = clip
146
+ clips.append(clip)
147
+ if regenerated is None:
148
+ raise KeyError(clip_id)
149
+ self.store.update_job(job_id, clips=clips)
150
+ return regenerated
151
+
152
+ def _source_path(self, job_id: str) -> Path:
153
+ job_dir = self.store.job_dir(job_id)
154
+ matches = sorted(job_dir.glob("source.*"))
155
+ if not matches:
156
+ raise FileNotFoundError("source video missing")
157
+ return matches[0]
backend/app/services/subtitles.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from app.models.schemas import TranscriptSegment
4
+
5
+
6
+ def seconds_to_srt_time(value: float) -> str:
7
+ millis = int(round(value * 1000))
8
+ hours, remainder = divmod(millis, 3_600_000)
9
+ minutes, remainder = divmod(remainder, 60_000)
10
+ seconds, millis = divmod(remainder, 1000)
11
+ return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
12
+
13
+
14
+ def write_srt(path: Path, clip_start: float, clip_end: float, segments: list[TranscriptSegment]) -> None:
15
+ rows: list[str] = []
16
+ index = 1
17
+ for segment in segments:
18
+ if segment.end_seconds < clip_start or segment.start_seconds > clip_end:
19
+ continue
20
+ start = max(0.0, segment.start_seconds - clip_start)
21
+ end = min(clip_end - clip_start, segment.end_seconds - clip_start)
22
+ rows.extend(
23
+ [
24
+ str(index),
25
+ f"{seconds_to_srt_time(start)} --> {seconds_to_srt_time(max(end, start + 0.8))}",
26
+ segment.text.strip(),
27
+ "",
28
+ ]
29
+ )
30
+ index += 1
31
+ if not rows:
32
+ rows = ["1", "00:00:00,000 --> 00:00:03,000", "", ""]
33
+ path.write_text("\n".join(rows), encoding="utf-8")
34
+
35
+
36
+ def write_single_caption_srt(path: Path, duration: float, text: str) -> None:
37
+ safe_duration = max(duration, 1.0)
38
+ rows = [
39
+ "1",
40
+ f"00:00:00,000 --> {seconds_to_srt_time(safe_duration)}",
41
+ text.strip(),
42
+ "",
43
+ ]
44
+ path.write_text("\n".join(rows), encoding="utf-8")
backend/app/services/transcription.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from uuid import uuid4
2
+
3
+ from app.core.config import Settings
4
+ from app.models.schemas import ChannelProfile, TranscriptSegment
5
+ from app.utils.rocm import torch_device_index
6
+
7
+
8
+ class WhisperTranscriber:
9
+ def __init__(self, settings: Settings) -> None:
10
+ self.settings = settings
11
+ self._pipeline = None
12
+
13
+ def transcribe(self, video_path: str, profile: ChannelProfile) -> list[TranscriptSegment]:
14
+ if self.settings.demo_mode:
15
+ return self._demo_transcript(profile)
16
+
17
+ try:
18
+ from transformers import pipeline
19
+ except Exception as exc:
20
+ raise RuntimeError("transformers is required for Whisper transcription") from exc
21
+
22
+ if self._pipeline is None:
23
+ self._pipeline = pipeline(
24
+ task="automatic-speech-recognition",
25
+ model=self.settings.whisper_model_id,
26
+ device=torch_device_index(),
27
+ token=self.settings.hf_token,
28
+ chunk_length_s=30,
29
+ return_timestamps=True,
30
+ )
31
+
32
+ generate_kwargs = {"task": "transcribe"}
33
+ if profile.primary_language and profile.primary_language.lower() != "auto":
34
+ generate_kwargs["language"] = profile.primary_language.lower()
35
+
36
+ result = self._pipeline(str(video_path), generate_kwargs=generate_kwargs)
37
+ chunks = result.get("chunks") or []
38
+ if not chunks:
39
+ text = result.get("text", "").strip()
40
+ return [
41
+ TranscriptSegment(
42
+ id=uuid4().hex,
43
+ start_seconds=0,
44
+ end_seconds=max(profile.clip_length_seconds, 15),
45
+ text=text,
46
+ language=profile.primary_language,
47
+ )
48
+ ]
49
+
50
+ segments: list[TranscriptSegment] = []
51
+ for chunk in chunks:
52
+ timestamp = chunk.get("timestamp") or (0, 0)
53
+ start = float(timestamp[0] or 0)
54
+ end = float(timestamp[1] or start + 5)
55
+ text = (chunk.get("text") or "").strip()
56
+ if text:
57
+ segments.append(
58
+ TranscriptSegment(
59
+ id=uuid4().hex,
60
+ start_seconds=start,
61
+ end_seconds=max(end, start + 1),
62
+ text=text,
63
+ language=profile.primary_language,
64
+ )
65
+ )
66
+ return segments
67
+
68
+ def _demo_transcript(self, profile: ChannelProfile) -> list[TranscriptSegment]:
69
+ style = profile.clip_style.lower()
70
+ niche = profile.niche.lower()
71
+ lines = [
72
+ "This opening sets up the main problem creators face when a long video hides the best moments.",
73
+ "Here is the surprising mistake most teams make when they choose clips only by view count.",
74
+ "The important question is simple: which moment would make someone stop scrolling right now?",
75
+ f"For a {niche} channel, the answer changes because the audience expects a {style} rhythm.",
76
+ "This section has the clearest explanation and a strong before-and-after contrast.",
77
+ "Then the guest reacts with a punchy line that works well as a short hook.",
78
+ "A practical takeaway lands here, with enough context to stand alone as a sixty second clip.",
79
+ "The final segment wraps the idea with a direct callout that is easy to subtitle.",
80
+ ]
81
+ segments: list[TranscriptSegment] = []
82
+ cursor = 0.0
83
+ for line in lines:
84
+ end = cursor + 15.0
85
+ segments.append(
86
+ TranscriptSegment(
87
+ id=uuid4().hex,
88
+ start_seconds=cursor,
89
+ end_seconds=end,
90
+ text=line,
91
+ language=profile.primary_language,
92
+ )
93
+ )
94
+ cursor = end
95
+ return segments
backend/app/services/video_input.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import shutil
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ from fastapi import UploadFile
7
+
8
+ from app.core.config import Settings
9
+
10
+
11
+ async def save_upload(upload: UploadFile, job_dir: Path) -> Path:
12
+ suffix = Path(upload.filename or "upload.mp4").suffix or ".mp4"
13
+ destination = job_dir / f"source{suffix.lower()}"
14
+ with destination.open("wb") as handle:
15
+ while chunk := await upload.read(1024 * 1024):
16
+ handle.write(chunk)
17
+ return destination
18
+
19
+
20
+ async def resolve_youtube_url(url: str, job_dir: Path, settings: Settings) -> Path:
21
+ if settings.demo_mode:
22
+ return await asyncio.to_thread(create_demo_video, job_dir, settings)
23
+
24
+ try:
25
+ import yt_dlp
26
+ except Exception as exc:
27
+ raise RuntimeError("yt-dlp is required for YouTube ingestion") from exc
28
+
29
+ output_template = str(job_dir / "source.%(ext)s")
30
+ ydl_opts = {
31
+ "outtmpl": output_template,
32
+ "format": "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/best",
33
+ "merge_output_format": "mp4",
34
+ "quiet": True,
35
+ "noprogress": True,
36
+ }
37
+
38
+ def download() -> Path:
39
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
40
+ ydl.download([url])
41
+ matches = sorted(job_dir.glob("source.*"))
42
+ if not matches:
43
+ raise RuntimeError("yt-dlp finished without producing a video")
44
+ return matches[0]
45
+
46
+ return await asyncio.to_thread(download)
47
+
48
+
49
+ def create_demo_video(job_dir: Path, settings: Settings) -> Path:
50
+ destination = job_dir / "source.mp4"
51
+ ffmpeg = shutil.which(settings.ffmpeg_binary)
52
+ if not ffmpeg:
53
+ destination.write_bytes(b"")
54
+ return destination
55
+
56
+ command = [
57
+ ffmpeg,
58
+ "-y",
59
+ "-f",
60
+ "lavfi",
61
+ "-i",
62
+ "testsrc2=size=1280x720:rate=30:duration=120",
63
+ "-f",
64
+ "lavfi",
65
+ "-i",
66
+ "sine=frequency=660:sample_rate=48000:duration=120",
67
+ "-shortest",
68
+ "-c:v",
69
+ "libx264",
70
+ "-pix_fmt",
71
+ "yuv420p",
72
+ "-c:a",
73
+ "aac",
74
+ str(destination),
75
+ ]
76
+ try:
77
+ subprocess.run(command, check=True, capture_output=True, text=True, timeout=45)
78
+ except Exception:
79
+ destination.write_bytes(b"")
80
+ return destination
backend/app/storage.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from uuid import uuid4
4
+
5
+ from app.core.config import Settings
6
+ from app.models.schemas import ChannelProfile, JobSnapshot, utc_now
7
+
8
+
9
+ class JobStore:
10
+ def __init__(self, settings: Settings) -> None:
11
+ self.settings = settings
12
+ self.root = settings.storage_dir
13
+ self.jobs_root = self.root / "jobs"
14
+ self.jobs_root.mkdir(parents=True, exist_ok=True)
15
+
16
+ def create_job(self, profile: ChannelProfile, source: dict) -> JobSnapshot:
17
+ job_id = uuid4().hex
18
+ job_dir = self.job_dir(job_id)
19
+ job_dir.mkdir(parents=True, exist_ok=True)
20
+ snapshot = JobSnapshot(
21
+ id=job_id,
22
+ status="queued",
23
+ progress=0,
24
+ message="Queued",
25
+ source=source,
26
+ profile=profile,
27
+ )
28
+ self.save_job(snapshot)
29
+ return snapshot
30
+
31
+ def job_dir(self, job_id: str) -> Path:
32
+ return self.jobs_root / job_id
33
+
34
+ def media_url(self, job_id: str, filename: str) -> str:
35
+ return f"/media/jobs/{job_id}/{filename}"
36
+
37
+ def save_job(self, snapshot: JobSnapshot) -> JobSnapshot:
38
+ snapshot.updated_at = utc_now()
39
+ path = self.job_dir(snapshot.id) / "job.json"
40
+ path.write_text(snapshot.model_dump_json(indent=2), encoding="utf-8")
41
+ return snapshot
42
+
43
+ def get_job(self, job_id: str) -> JobSnapshot:
44
+ path = self.job_dir(job_id) / "job.json"
45
+ if not path.exists():
46
+ raise FileNotFoundError(job_id)
47
+ data = json.loads(path.read_text(encoding="utf-8"))
48
+ return JobSnapshot.model_validate(data)
49
+
50
+ def update_job(self, job_id: str, **updates) -> JobSnapshot:
51
+ snapshot = self.get_job(job_id)
52
+ updated = snapshot.model_copy(update=updates)
53
+ return self.save_job(updated)
54
+
55
+ def write_json(self, job_id: str, filename: str, payload: object) -> Path:
56
+ path = self.job_dir(job_id) / filename
57
+ path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
58
+ return path
backend/app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Runtime helpers."""
backend/app/utils/rocm.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+
4
+ def detect_accelerator() -> dict[str, Any]:
5
+ try:
6
+ import torch
7
+ except Exception as exc:
8
+ return {
9
+ "torch_available": False,
10
+ "cuda_api_available": False,
11
+ "rocm_hip_version": None,
12
+ "device_name": None,
13
+ "error": str(exc),
14
+ }
15
+
16
+ cuda_available = bool(torch.cuda.is_available())
17
+ device_name = torch.cuda.get_device_name(0) if cuda_available else None
18
+ return {
19
+ "torch_available": True,
20
+ "cuda_api_available": cuda_available,
21
+ "rocm_hip_version": getattr(torch.version, "hip", None),
22
+ "cuda_version": getattr(torch.version, "cuda", None),
23
+ "device_name": device_name,
24
+ "device_count": torch.cuda.device_count() if cuda_available else 0,
25
+ }
26
+
27
+
28
+ def torch_device_index() -> int:
29
+ try:
30
+ import torch
31
+ except Exception:
32
+ return -1
33
+ return 0 if torch.cuda.is_available() else -1
backend/app/workers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Optional async workers."""
backend/app/workers/celery_app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+
3
+ from app.core.config import get_settings
4
+
5
+ settings = get_settings()
6
+
7
+ celery_app = Celery("ai_clip_studio", broker=settings.redis_url, backend=settings.redis_url)
8
+ celery_app.conf.task_serializer = "json"
9
+ celery_app.conf.result_serializer = "json"
10
+ celery_app.conf.accept_content = ["json"]
11
+
12
+
13
+ @celery_app.task(name="pipeline.process_job")
14
+ def process_job(job_id: str) -> str:
15
+ return f"Queued job {job_id}. FastAPI background tasks are active by default."
backend/pyproject.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "elevenclip-ai-backend"
3
+ version = "0.1.0"
4
+ description = "FastAPI backend for ElevenClip.AI on AMD ROCm"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "fastapi>=0.115.0",
8
+ "uvicorn[standard]>=0.30.0",
9
+ "pydantic>=2.8.0",
10
+ "python-multipart>=0.0.9",
11
+ "yt-dlp>=2025.1.15",
12
+ "celery[redis]>=5.4.0",
13
+ "redis>=5.0.0"
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ ai = [
18
+ "transformers>=4.47.0",
19
+ "accelerate>=1.2.0",
20
+ "sentencepiece>=0.2.0",
21
+ "safetensors>=0.4.5"
22
+ ]
23
+ rocm-inference = [
24
+ "vllm>=0.6.6",
25
+ "optimum-amd>=0.1.0; platform_system == 'Linux'"
26
+ ]
27
+ dev = [
28
+ "pytest>=8.3.0",
29
+ "httpx>=0.27.0",
30
+ "ruff>=0.6.0"
31
+ ]
32
+
33
+ [build-system]
34
+ requires = ["setuptools>=69.0"]
35
+ build-backend = "setuptools.build_meta"
36
+
37
+ [tool.setuptools.packages.find]
38
+ include = ["app*"]
39
+
40
+ [tool.ruff]
41
+ line-length = 100
42
+ target-version = "py311"