Spaces:

lablab-ai-amd-developer-hackathon
/

nemoflix-amd-video

Running

App Files Files Community

Rodrigo Ortega commited on 21 days ago

Commit

62ab980

1 Parent(s): 27377d7

Add AI Toolkit training setup and live job progress

Browse files

Files changed (12) hide show

app/nemoflix_amd/api.py +196 -3
requirements.txt +1 -0
scripts/install-ai-toolkit.sh +189 -0
scripts/startup-script.sh +1 -0
studio/src/App.tsx +30 -9
studio/src/components/JobCard.tsx +57 -0
studio/src/types.ts +16 -0
studio/tsconfig.json +21 -0
studio/tsconfig.node.json +10 -0
training/README.md +37 -0
training/flux2_identity_template.yaml +78 -0
training/wan22_i2v_character_template.yaml +77 -0

app/nemoflix_amd/api.py CHANGED Viewed

@@ -1,9 +1,15 @@
 from __future__ import annotations
 import tempfile
 from pathlib import Path
 from typing import Any, Literal
-from uuid import uuid4
 from fastapi import FastAPI, File, HTTPException, UploadFile
 from pydantic import BaseModel, Field
@@ -84,6 +90,143 @@ def comfy() -> ComfyClient:
     return ComfyClient(settings.comfy_url, settings.request_timeout_seconds)
 @app.get("/api/health")
 async def health() -> dict[str, Any]:
     client = comfy()
@@ -170,14 +313,16 @@ async def generate_video(body: VideoGenerateRequest) -> VideoGenerateResponse:
         return VideoGenerateResponse(ok=True, mode=body.mode, workflow=workflow)
     try:
-        result = await comfy().queue_prompt(workflow, client_id=str(uuid4()))
     except Exception as exc:  # noqa: BLE001
         raise HTTPException(status_code=502, detail=f"ComfyUI prompt submission failed: {exc}") from exc
     return VideoGenerateResponse(
         ok="prompt_id" in result,
         mode=body.mode,
-        prompt_id=result.get("prompt_id"),
         number=result.get("number"),
         node_errors=result.get("node_errors"),
     )
@@ -224,6 +369,54 @@ async def _queue_position(client: ComfyClient, prompt_id: str) -> int | None:
     return None
 @app.get("/api/jobs/{prompt_id}", response_model=JobStatusResponse)
 async def job(prompt_id: str) -> JobStatusResponse:
     client = comfy()

 from __future__ import annotations
+import asyncio
+import contextlib
+import json
 import tempfile
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any, Literal
+from urllib.parse import urlparse, urlunparse
+import websockets
 from fastapi import FastAPI, File, HTTPException, UploadFile
 from pydantic import BaseModel, Field
     return ComfyClient(settings.comfy_url, settings.request_timeout_seconds)
+_JOBS: dict[str, dict[str, Any]] = {}
+_COMFY_CLIENT_ID = "nemoflix-amd-gallery"
+_WS_TASK: asyncio.Task | None = None
+def _ws_url(base_url: str) -> str:
+    parsed = urlparse(base_url)
+    scheme = "wss" if parsed.scheme == "https" else "ws"
+    return urlunparse((scheme, parsed.netloc, "/ws", "", f"clientId={_COMFY_CLIENT_ID}", ""))
+def _register_submitted_job(prompt_id: str | None, body: VideoGenerateRequest, status: str = "pending") -> None:
+    if not prompt_id:
+        return
+    _JOBS[prompt_id] = {
+        "prompt_id": prompt_id,
+        "status": status,
+        "mode": body.mode,
+        "prompt": body.prompt,
+        "width": body.width,
+        "height": body.height,
+        "length": body.length,
+        "fps": body.fps,
+        "created_at": datetime.now(UTC).isoformat(),
+        "current_node": None,
+        "step_value": 0,
+        "step_max": 0,
+        "nodes_finished": 0,
+        "nodes_total": 0,
+        "progress_percent": None,
+    }
+def _update_job_from_progress_state(prompt_id: str, nodes: dict[str, Any]) -> None:
+    if not prompt_id:
+        return
+    job = _JOBS.setdefault(prompt_id, {"prompt_id": prompt_id, "status": "running", "created_at": None})
+    total = len(nodes)
+    finished = 0
+    running = 0
+    current_node = None
+    step_value = 0
+    step_max = 0
+    for node_id, node in nodes.items():
+        if not isinstance(node, dict):
+            continue
+        state = node.get("state")
+        if state == "finished":
+            finished += 1
+        elif state == "running":
+            running += 1
+            if current_node is None:
+                current_node = node.get("display_node_id") or node.get("node_id") or node_id
+                step_value = int(node.get("value") or 0)
+                step_max = int(node.get("max") or 0)
+    percent = round((finished / total) * 100, 1) if total else None
+    job.update({
+        "status": "running",
+        "nodes_total": total,
+        "nodes_finished": finished,
+        "nodes_running": running,
+        "current_node": current_node,
+        "step_value": step_value,
+        "step_max": step_max,
+        "progress_percent": percent,
+        "updated_at": datetime.now(UTC).isoformat(),
+    })
+async def _comfy_ws_bridge() -> None:
+    settings = get_settings()
+    url = _ws_url(settings.comfy_url)
+    while True:
+        try:
+            async with websockets.connect(url, ping_interval=20, ping_timeout=20) as ws:
+                async for raw in ws:
+                    if isinstance(raw, bytes):
+                        continue
+                    try:
+                        msg = json.loads(raw)
+                    except json.JSONDecodeError:
+                        continue
+                    msg_type = msg.get("type")
+                    data = msg.get("data", {}) if isinstance(msg.get("data"), dict) else {}
+                    prompt_id = data.get("prompt_id") or data.get("prompt")
+                    if msg_type == "execution_start" and isinstance(prompt_id, str):
+                        _JOBS.setdefault(prompt_id, {"prompt_id": prompt_id, "created_at": None}).update({"status": "running"})
+                    elif msg_type == "progress_state" and isinstance(prompt_id, str):
+                        nodes = data.get("nodes", {})
+                        if isinstance(nodes, dict):
+                            _update_job_from_progress_state(prompt_id, nodes)
+                    elif msg_type == "progress" and isinstance(prompt_id, str):
+                        job = _JOBS.setdefault(prompt_id, {"prompt_id": prompt_id, "status": "running", "created_at": None})
+                        value = int(data.get("value") or 0)
+                        max_value = int(data.get("max") or 0)
+                        job.update({
+                            "status": "running",
+                            "step_value": value,
+                            "step_max": max_value,
+                            "progress_percent": round((value / max_value) * 100, 1) if max_value else None,
+                            "updated_at": datetime.now(UTC).isoformat(),
+                        })
+                    elif msg_type == "execution_success" and isinstance(prompt_id, str):
+                        _JOBS.setdefault(prompt_id, {"prompt_id": prompt_id, "created_at": None}).update({
+                            "status": "completed",
+                            "progress_percent": 100,
+                            "updated_at": datetime.now(UTC).isoformat(),
+                        })
+                    elif msg_type in {"execution_error", "execution_interrupted"} and isinstance(prompt_id, str):
+                        _JOBS.setdefault(prompt_id, {"prompt_id": prompt_id, "created_at": None}).update({
+                            "status": "failed",
+                            "error": data.get("exception_message") or msg_type,
+                            "updated_at": datetime.now(UTC).isoformat(),
+                        })
+        except asyncio.CancelledError:
+            raise
+        except Exception:
+            await asyncio.sleep(3)
+@app.on_event("startup")
+async def start_comfy_bridge() -> None:
+    global _WS_TASK
+    if _WS_TASK is None or _WS_TASK.done():
+        _WS_TASK = asyncio.create_task(_comfy_ws_bridge())
+@app.on_event("shutdown")
+async def stop_comfy_bridge() -> None:
+    global _WS_TASK
+    if _WS_TASK:
+        _WS_TASK.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await _WS_TASK
+        _WS_TASK = None
 @app.get("/api/health")
 async def health() -> dict[str, Any]:
     client = comfy()
         return VideoGenerateResponse(ok=True, mode=body.mode, workflow=workflow)
     try:
+        result = await comfy().queue_prompt(workflow, client_id=_COMFY_CLIENT_ID)
     except Exception as exc:  # noqa: BLE001
         raise HTTPException(status_code=502, detail=f"ComfyUI prompt submission failed: {exc}") from exc
+    prompt_id = result.get("prompt_id")
+    _register_submitted_job(prompt_id, body, "pending")
     return VideoGenerateResponse(
         ok="prompt_id" in result,
         mode=body.mode,
+        prompt_id=prompt_id,
         number=result.get("number"),
         node_errors=result.get("node_errors"),
     )
     return None
+@app.get("/api/jobs")
+async def jobs() -> dict[str, Any]:
+    """Return jobs submitted through this API.
+    ComfyUI is still the execution engine, but this endpoint intentionally does
+    not list arbitrary Comfy queue entries. The gallery should only show jobs we
+    submitted and registered locally; completed media is discovered separately by
+    /api/listing.
+    """
+    client = comfy()
+    try:
+        queue = await client.get("/queue")
+    except Exception as exc:  # noqa: BLE001
+        jobs_list = sorted(_JOBS.values(), key=lambda j: j.get("created_at") or "", reverse=True)
+        return {"jobs": jobs_list, "count": len(jobs_list), "error": str(exc)}
+    running_ids: set[str] = set()
+    pending_positions: dict[str, int] = {}
+    for item in queue.get("queue_running", []) if isinstance(queue, dict) else []:
+        if isinstance(item, list) and len(item) > 1 and isinstance(item[1], str):
+            running_ids.add(item[1])
+    for position, item in enumerate(queue.get("queue_pending", []) if isinstance(queue, dict) else [], start=1):
+        if isinstance(item, list) and len(item) > 1 and isinstance(item[1], str):
+            pending_positions[item[1]] = position
+    for prompt_id, job in _JOBS.items():
+        if job.get("status") in {"completed", "failed"}:
+            continue
+        if prompt_id in running_ids:
+            job["status"] = "running"
+            job["queue_position"] = None
+        elif prompt_id in pending_positions:
+            job["status"] = "pending"
+            job["queue_position"] = pending_positions[prompt_id]
+        elif job.get("status") in {"pending", "running"}:
+            job["status"] = "unknown"
+            job["queue_position"] = None
+    jobs_list = sorted(
+        _JOBS.values(),
+        key=lambda j: (j.get("status") != "running", j.get("queue_position") or 0, j.get("created_at") or ""),
+    )
+    return {"jobs": jobs_list, "count": len(jobs_list)}
 @app.get("/api/jobs/{prompt_id}", response_model=JobStatusResponse)
 async def job(prompt_id: str) -> JobStatusResponse:
     client = comfy()

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ uvicorn[standard]==0.34.0
 httpx==0.28.1
 python-multipart==0.0.20
 pydantic-settings==2.7.1

 httpx==0.28.1
 python-multipart==0.0.20
 pydantic-settings==2.7.1
+websockets==16.0

scripts/install-ai-toolkit.sh ADDED Viewed

	@@ -0,0 +1,189 @@

+#!/bin/bash
+set -Eeuo pipefail
+set -x
+# Install Ostris AI Toolkit on a disposable AMD MI300X ROCm droplet.
+# Run after scripts/startup-script.sh so ROCm/system basics are already present.
+# This is intentionally idempotent: safe to rerun on a fresh or partially initialized box.
+APT_GET="apt-get -o DPkg::Lock::Timeout=300"
+TOOLKIT_DIR="${TOOLKIT_DIR:-/root/ai-toolkit}"
+TOOLKIT_VENV="${TOOLKIT_VENV:-/root/ai-toolkit-venv}"
+TRAINING_DIR="${TRAINING_DIR:-/root/nemoflix-training}"
+ROCM_INDEX_PRIMARY="${ROCM_INDEX_PRIMARY:-https://download.pytorch.org/whl/rocm7.2}"
+ROCM_INDEX_FALLBACK="${ROCM_INDEX_FALLBACK:-https://download.pytorch.org/whl/rocm7.0}"
+AI_TOOLKIT_REF="${AI_TOOLKIT_REF:-main}"
+# CLI training is the default. The UI pulls NodeSource apt repo + nodejs and can
+# trigger service restart/deferred-restart behavior on cloud images; keep it opt-in.
+INSTALL_UI_DEPS="${INSTALL_UI_DEPS:-0}"
+PYTHON_BIN="$TOOLKIT_VENV/bin/python"
+export DEBIAN_FRONTEND=noninteractive
+export NEEDRESTART_MODE=a
+trap 'echo "ERROR: AI Toolkit install failed at line $LINENO"' ERR
+if [ "$(id -u)" -ne 0 ]; then
+  echo "Run as root on the AMD droplet."
+  exit 1
+fi
+echo "=== Installing AI Toolkit prerequisites ==="
+$APT_GET update -y
+$APT_GET install -y git git-lfs python3-pip python3.12-venv python3-dev build-essential pkg-config curl wget ffmpeg libgl1 libglib2.0-0
+git lfs install --system || true
+if command -v /opt/rocm/bin/rocm-smi >/dev/null 2>&1; then
+  echo "=== ROCm GPU check ==="
+  /opt/rocm/bin/rocm-smi || true
+fi
+echo "=== Creating isolated AI Toolkit venv: $TOOLKIT_VENV ==="
+if [ ! -d "$TOOLKIT_VENV" ]; then
+  python3 -m venv "$TOOLKIT_VENV"
+fi
+"$PYTHON_BIN" -m pip install --upgrade pip setuptools wheel
+echo "=== Installing ROCm PyTorch into AI Toolkit venv ==="
+"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url "$ROCM_INDEX_PRIMARY" || \
+"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url "$ROCM_INDEX_FALLBACK"
+"$PYTHON_BIN" - <<'PY'
+import torch
+print('torch', torch.__version__)
+print('cuda api available', torch.cuda.is_available())
+if torch.cuda.is_available():
+    print('device', torch.cuda.get_device_name(0))
+PY
+echo "=== Cloning/updating Ostris AI Toolkit ==="
+if [ ! -d "$TOOLKIT_DIR/.git" ]; then
+  git clone https://github.com/ostris/ai-toolkit.git "$TOOLKIT_DIR"
+fi
+git -C "$TOOLKIT_DIR" fetch --depth 1 origin "$AI_TOOLKIT_REF"
+git -C "$TOOLKIT_DIR" checkout FETCH_HEAD
+git -C "$TOOLKIT_DIR" submodule update --init --recursive
+echo "=== Installing AI Toolkit Python requirements ==="
+# Keep the ROCm torch we installed above; do not allow requirements to swap in CUDA wheels.
+"$PYTHON_BIN" -m pip install -r "$TOOLKIT_DIR/requirements.txt" --extra-index-url "$ROCM_INDEX_PRIMARY"
+"$PYTHON_BIN" -m pip install --upgrade accelerate huggingface_hub hf_transfer
+mkdir -p \
+  "$TRAINING_DIR/datasets" \
+  "$TRAINING_DIR/output" \
+  "$TRAINING_DIR/samples" \
+  "$TRAINING_DIR/config" \
+  "$TOOLKIT_DIR/config"
+# If this script is run from a cloned Nemoflix repo, seed our checked-in config templates
+# into the disposable training workspace.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+if compgen -G "$REPO_DIR/training/*.yaml" >/dev/null; then
+  cp -f "$REPO_DIR"/training/*.yaml "$TRAINING_DIR/config/"
+fi
+cat > "$TRAINING_DIR/README.md" <<'EOF'
+# Nemoflix AI Toolkit Training Workspace
+Persistent-ish training layout for disposable AMD droplets.
+## Paths
+- AI Toolkit: `/root/ai-toolkit`
+- Venv: `/root/ai-toolkit-venv`
+- Datasets: `/root/nemoflix-training/datasets`
+- Configs: `/root/nemoflix-training/config`
+- Outputs/checkpoints: `/root/nemoflix-training/output`
+- Sample control images: `/root/nemoflix-training/samples`
+## Run a config
+```bash
+cd /root/ai-toolkit
+/root/ai-toolkit-venv/bin/python run.py /root/nemoflix-training/config/<config>.yaml
+```
+## Hugging Face token
+For gated models, create `/root/ai-toolkit/.env`:
+```bash
+HF_TOKEN=hf_xxx
+HF_HUB_ENABLE_HF_TRANSFER=1
+```
+## Character dataset conventions
+Image/FLUX LoRA:
+- 20-40 good face/body images
+- one `.txt` caption beside each image
+- include trigger word, e.g. `character_trigger, person, portrait, natural lighting`
+Wan I2V character LoRA:
+- 10-30 short clips, ideally 3-8 seconds
+- one `.txt` caption beside each clip
+- include trigger word, e.g. `character_trigger, person, walking outdoors, close-up face`
+- trim dead time; varied angles/lighting/backgrounds; avoid sunglasses
+EOF
+cat > "$TRAINING_DIR/run-ai-toolkit.sh" <<'EOF'
+#!/bin/bash
+set -Eeuo pipefail
+CONFIG_PATH="${1:?Usage: /root/nemoflix-training/run-ai-toolkit.sh /root/nemoflix-training/config/job.yaml}"
+cd /root/ai-toolkit
+export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
+exec /root/ai-toolkit-venv/bin/python run.py "$CONFIG_PATH"
+EOF
+chmod +x "$TRAINING_DIR/run-ai-toolkit.sh"
+if [ "$INSTALL_UI_DEPS" = "1" ]; then
+  if ! command -v node >/dev/null 2>&1 || ! node -e 'process.exit(Number(process.versions.node.split(".")[0]) >= 20 ? 0 : 1)' 2>/dev/null; then
+    echo "=== Installing Node.js 22 for optional AI Toolkit UI ==="
+    curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
+    $APT_GET install -y nodejs
+  fi
+  if [ -f "$TOOLKIT_DIR/ui/package.json" ]; then
+    echo "=== Installing AI Toolkit UI dependencies ==="
+    (cd "$TOOLKIT_DIR/ui" && npm install)
+  fi
+fi
+if [ "$INSTALL_UI_DEPS" = "1" ]; then
+  cat > /etc/systemd/system/ai-toolkit-ui.service <<EOF
+[Unit]
+Description=Ostris AI Toolkit UI
+After=network-online.target
+Wants=network-online.target
+[Service]
+Type=simple
+User=root
+WorkingDirectory=$TOOLKIT_DIR/ui
+Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+Environment="AI_TOOLKIT_AUTH=change-me"
+ExecStart=/usr/bin/npm run build_and_start
+Restart=on-failure
+RestartSec=5
+[Install]
+WantedBy=multi-user.target
+EOF
+  systemctl daemon-reload
+fi
+"$PYTHON_BIN" - <<'PY'
+import importlib
+mods = ['torch', 'accelerate', 'diffusers', 'transformers', 'huggingface_hub']
+for name in mods:
+    mod = importlib.import_module(name)
+    print(name, getattr(mod, '__version__', 'ok'))
+PY
+echo "=== AI Toolkit install complete ==="
+echo "Toolkit:  $TOOLKIT_DIR"
+echo "Venv:     $TOOLKIT_VENV"
+echo "Training: $TRAINING_DIR"
+echo "Run CLI:  $TRAINING_DIR/run-ai-toolkit.sh /root/nemoflix-training/config/job.yaml"
+echo "UI:       optional; rerun with INSTALL_UI_DEPS=1, then set AI_TOOLKIT_AUTH before starting ai-toolkit-ui.service"

scripts/startup-script.sh CHANGED Viewed

@@ -147,3 +147,4 @@ curl -sS --max-time 5 http://127.0.0.1:8190/api/health
 echo "=== Setup Complete ==="
 echo "Install Wan 2.2 video stack: $APP_DIR/scripts/install-video-stack.sh"

 echo "=== Setup Complete ==="
 echo "Install Wan 2.2 video stack: $APP_DIR/scripts/install-video-stack.sh"
+echo "Install AI Toolkit training stack: $APP_DIR/scripts/install-ai-toolkit.sh"

studio/src/App.tsx CHANGED Viewed

@@ -1,17 +1,24 @@
 import { useState, useEffect, useCallback } from "react";
-import type { MediaItem } from "./types";
 export default function App() {
   const [items, setItems] = useState<MediaItem[]>([]);
   const [loading, setLoading] = useState(true);
   const [selected, setSelected] = useState<string | null>(null);
   const load = useCallback(async () => {
     setLoading(true);
     try {
-      const res = await fetch("/api/listing");
-      const data = await res.json();
-      setItems(data.images || []);
     } catch (e) {
       console.error(e);
     } finally {
@@ -21,25 +28,39 @@ export default function App() {
   useEffect(() => {
     load();
   }, [load]);
   return (
     <div className="min-h-screen bg-black text-white">
       <header className="border-b border-gray-800 px-6 py-4 flex items-center justify-between">
-        <h1 className="text-xl font-semibold">Nemoflix AMD Gallery</h1>
-        <span className="text-sm text-gray-500">{items.length} items</span>
       </header>
       <main className="p-6">
-        {loading && items.length === 0 ? (
           <p className="text-gray-500">Loading...</p>
-        ) : items.length === 0 ? (
           <p className="text-gray-500">No media yet.</p>
         ) : (
           <div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 xl:grid-cols-5 gap-4">
             {items.map((item) => (
               <div
-                key={item.name}
                 onClick={() => setSelected(item.url)}
                 className="cursor-pointer rounded-lg overflow-hidden border border-gray-800 hover:border-rose-600 transition aspect-video bg-gray-900 relative group"
               >

 import { useState, useEffect, useCallback } from "react";
+import { JobCard } from "./components/JobCard";
+import type { JobItem, MediaItem } from "./types";
 export default function App() {
   const [items, setItems] = useState<MediaItem[]>([]);
+  const [jobs, setJobs] = useState<JobItem[]>([]);
   const [loading, setLoading] = useState(true);
   const [selected, setSelected] = useState<string | null>(null);
   const load = useCallback(async () => {
     setLoading(true);
     try {
+      const [listingRes, jobsRes] = await Promise.all([
+        fetch("/api/listing"),
+        fetch("/api/jobs"),
+      ]);
+      const listing = await listingRes.json();
+      const jobData = await jobsRes.json();
+      setItems(listing.images || []);
+      setJobs(jobData.jobs || []);
     } catch (e) {
       console.error(e);
     } finally {
   useEffect(() => {
     load();
+    const id = window.setInterval(load, 3000);
+    return () => window.clearInterval(id);
   }, [load]);
+  const hasContent = jobs.length > 0 || items.length > 0;
   return (
     <div className="min-h-screen bg-black text-white">
       <header className="border-b border-gray-800 px-6 py-4 flex items-center justify-between">
+        <div>
+          <h1 className="text-xl font-semibold">Nemoflix AMD Gallery</h1>
+          <p className="text-xs text-gray-500 mt-1">Live from the MI300X droplet</p>
+        </div>
+        <div className="text-sm text-gray-500">
+          {jobs.length > 0 && <span className="text-amber-400 mr-3">{jobs.length} generating</span>}
+          <span>{items.length} media</span>
+        </div>
       </header>
       <main className="p-6">
+        {loading && !hasContent ? (
           <p className="text-gray-500">Loading...</p>
+        ) : !hasContent ? (
           <p className="text-gray-500">No media yet.</p>
         ) : (
           <div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 xl:grid-cols-5 gap-4">
+            {jobs.map((job) => (
+              <JobCard key={job.prompt_id} job={job} />
+            ))}
             {items.map((item) => (
               <div
+                key={item.url}
                 onClick={() => setSelected(item.url)}
                 className="cursor-pointer rounded-lg overflow-hidden border border-gray-800 hover:border-rose-600 transition aspect-video bg-gray-900 relative group"
               >

studio/src/components/JobCard.tsx ADDED Viewed

	@@ -0,0 +1,57 @@

+import type { JobItem } from "../types";
+interface JobCardProps {
+  job: JobItem;
+}
+function getProgress(job: JobItem): number | null {
+  if (typeof job.progress_percent === "number") {
+    return job.progress_percent;
+  }
+  if (job.step_max && job.step_max > 0) {
+    return Math.round(((job.step_value || 0) / job.step_max) * 100);
+  }
+  return null;
+}
+export function JobCard({ job }: JobCardProps) {
+  const progress = getProgress(job);
+  const progressWidth = `${Math.max(3, progress ?? 3)}%`;
+  return (
+    <div className="rounded-lg overflow-hidden border border-amber-500/40 aspect-video bg-gray-950 relative p-4 flex flex-col justify-between">
+      <div className="absolute inset-0 bg-gradient-to-br from-amber-500/10 via-transparent to-rose-600/10" />
+      <div className="relative flex items-center justify-between gap-2 text-amber-300 text-xs font-medium uppercase tracking-wide">
+        <span className="flex items-center gap-2">
+          <span className="inline-block w-2 h-2 rounded-full bg-amber-400 animate-pulse" />
+          {job.status === "running" ? "Generating" : job.status}
+          {job.queue_position ? ` · Queue ${job.queue_position}` : ""}
+        </span>
+        {progress !== null && <span>{progress}%</span>}
+      </div>
+      <div className="relative space-y-2">
+        <p className="text-sm font-medium line-clamp-2 text-white/90">
+          {job.prompt || "Video generation job"}
+        </p>
+        <div className="space-y-1">
+          <div className="h-1.5 rounded-full bg-gray-800 overflow-hidden">
+            <div className="h-full bg-amber-400 transition-all" style={{ width: progressWidth }} />
+          </div>
+          <p className="text-[11px] text-gray-400 truncate">
+            {job.current_node ? `Node ${job.current_node}` : "Waiting for Comfy progress event"}
+            {job.step_max ? ` · step ${job.step_value || 0}/${job.step_max}` : ""}
+            {job.nodes_total ? ` · nodes ${job.nodes_finished || 0}/${job.nodes_total}` : ""}
+          </p>
+        </div>
+      </div>
+      <p className="relative text-[10px] text-gray-500 font-mono truncate">{job.prompt_id}</p>
+    </div>
+  );
+}

studio/src/types.ts CHANGED Viewed

@@ -6,3 +6,19 @@ export interface MediaItem {
   mtime: number;
   url: string;
 }

   mtime: number;
   url: string;
 }
+export interface JobItem {
+  prompt_id: string;
+  status: "pending" | "running" | "unknown" | "completed" | "failed" | string;
+  mode?: string;
+  prompt?: string;
+  created_at?: string;
+  queue_position?: number | null;
+  current_node?: string | null;
+  step_value?: number;
+  step_max?: number;
+  nodes_finished?: number;
+  nodes_total?: number;
+  progress_percent?: number | null;
+  error?: string;
+}

studio/tsconfig.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["DOM", "DOM.Iterable", "ES2020"],
+    "allowJs": false,
+    "skipLibCheck": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "forceConsistentCasingInFileNames": true,
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx"
+  },
+  "include": ["src"],
+  "references": [{ "path": "./tsconfig.node.json" }]
+}

studio/tsconfig.node.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "compilerOptions": {
+    "composite": true,
+    "skipLibCheck": true,
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "allowSyntheticDefaultImports": true
+  },
+  "include": ["vite.config.ts"]
+}

training/README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Nemoflix AMD Training
+Scripts/config templates for disposable AMD MI300X droplets.
+## Install AI Toolkit on the droplet
+After `scripts/startup-script.sh` finishes on a fresh droplet:
+```bash
+cd /root/nemoflix
+bash scripts/install-ai-toolkit.sh
+```
+The installer creates:
+- `/root/ai-toolkit` — Ostris AI Toolkit checkout
+- `/root/ai-toolkit-venv` — isolated ROCm Python venv
+- `/root/nemoflix-training` — datasets/configs/output workspace
+- `/root/nemoflix-training/run-ai-toolkit.sh` — CLI runner
+## Train
+Copy a config into `/root/nemoflix-training/config/`, put media in `/root/nemoflix-training/datasets/...`, then:
+```bash
+/root/nemoflix-training/run-ai-toolkit.sh /root/nemoflix-training/config/<job>.yaml
+```
+## Optional UI
+The installer creates but does not start:
+```bash
+ai-toolkit-ui.service
+```
+Before exposing it, set a real `AI_TOOLKIT_AUTH` value in the service or an override.

training/flux2_identity_template.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+# FLUX.2 [dev] identity/character LoRA template for AI Toolkit.
+# Source: Ostris AI Toolkit FLUX.2 UI defaults + RunComfy FLUX.2 LoRA guide.
+---
+job: extension
+config:
+  name: "identity_flux2_lora_v1"
+  process:
+    - type: "sd_trainer"
+      training_folder: "/root/nemoflix-training/output"
+      device: cuda:0
+      # Replace with the private trigger for the subject, e.g. a short unique token.
+      trigger_word: "character_trigger"
+      network:
+        type: "lora"
+        linear: 32
+        linear_alpha: 32
+      save:
+        dtype: bf16
+        save_every: 250
+        max_step_saves_to_keep: 4
+        push_to_hub: false
+      datasets:
+        - folder_path: "/root/nemoflix-training/datasets/identity-flux2"
+          caption_ext: "txt"
+          # Static captions + cached text embeddings: keep dropout at 0.
+          caption_dropout_rate: 0
+          shuffle_tokens: false
+          cache_latents_to_disk: true
+          resolution: [768, 896, 1024]
+      train:
+        batch_size: 1
+        steps: 1800
+        gradient_accumulation_steps: 1
+        train_unet: true
+        train_text_encoder: false
+        gradient_checkpointing: true
+        noise_scheduler: "flowmatch"
+        timestep_type: "weighted"
+        optimizer: "adamw8bit"
+        lr: 1e-4
+        optimizer_params:
+          weight_decay: 1e-4
+        dtype: bf16
+        # First pass: no DOP, static captions, cache embeddings for speed/VRAM.
+        unload_text_encoder: false
+        cache_text_embeddings: true
+        ema_config:
+          use_ema: false
+      model:
+        name_or_path: "black-forest-labs/FLUX.2-dev"
+        arch: "flux2"
+        quantize: true
+        qtype: "qfloat8"
+        quantize_te: true
+        qtype_te: "qfloat8"
+        low_vram: false
+        model_kwargs:
+          match_target_res: false
+      sample:
+        sampler: "flowmatch"
+        sample_every: 250
+        width: 1024
+        height: 1024
+        prompts:
+          - "character_trigger, man, realistic Instagram travel photo, standing on a cliff at golden hour, ocean in the background, natural pose, shot on a mirrorless camera"
+          - "character_trigger, man, lifestyle creator photo, sitting at an outdoor cafe with a laptop and coffee, warm afternoon light, candid social media photography"
+          - "character_trigger, man, action sports photo, snowboarding down a mountain slope, powder snow, dynamic pose, realistic telephoto shot"
+          - "character_trigger, man, fitness lifestyle photo, post-workout portrait outside a modern gym, athletic casual clothing, natural light, authentic Instagram content"
+          - "character_trigger, man, editorial streetwear photo, walking through a downtown city street at sunset, stylish outfit, shallow depth of field, realistic fashion photography"
+          - "photo of a man, realistic social media portrait, white background, medium shot, studio lighting"
+        neg: ""
+        seed: 42
+        walk_seed: false
+        guidance_scale: 1
+        sample_steps: 25
+meta:
+  name: "[name]"
+  version: "1.0"

training/wan22_i2v_character_template.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+# Wan 2.2 I2V 14B character LoRA for a character likeness.
+# Source template: ostris/ai-toolkit config/examples/train_lora_wan22_14b_24gb.yaml
+# I2V-specific arch/options confirmed in ui/src/app/jobs/new/options.ts.
+---
+job: extension
+config:
+  name: "character_wan22_i2v_v1"
+  process:
+    - type: "sd_trainer"
+      training_folder: "/root/nemoflix-training/output"
+      device: cuda:0
+      trigger_word: "character_trigger"
+      network:
+        type: "lora"
+        linear: 32
+        linear_alpha: 32
+      save:
+        dtype: bf16
+        save_every: 250
+        max_step_saves_to_keep: 6
+      datasets:
+        - folder_path: "/root/nemoflix-training/datasets/character-wan-i2v"
+          caption_ext: "txt"
+          caption_dropout_rate: 0.05
+          num_frames: 41
+          resolution: [512, 768, 1024]
+      train:
+        batch_size: 1
+        steps: 2500
+        gradient_accumulation: 1
+        train_unet: true
+        train_text_encoder: false
+        gradient_checkpointing: true
+        noise_scheduler: "flowmatch"
+        timestep_type: "sigmoid"
+        optimizer: "adamw8bit"
+        lr: 1e-4
+        optimizer_params:
+          weight_decay: 1e-4
+        dtype: bf16
+        switch_boundary_every: 10
+        # Keep captions live for character training and DOP experiments.
+        # If memory is tight, switch to cache_text_embeddings and remove DOP.
+        cache_text_embeddings: false
+      model:
+        name_or_path: "ai-toolkit/Wan2.2-I2V-A14B-Diffusers-bf16"
+        arch: "wan22_14b_i2v"
+        quantize: true
+        qtype: "uint4|ostris/accuracy_recovery_adapters/wan22_14b_i2v_torchao_uint4.safetensors"
+        quantize_te: true
+        qtype_te: "qfloat8"
+        low_vram: false
+        model_kwargs:
+          train_high_noise: true
+          train_low_noise: true
+      sample:
+        sampler: "flowmatch"
+        sample_every: 250
+        width: 768
+        height: 768
+        num_frames: 41
+        fps: 16
+        # Wan I2V samples require a prompt + ctrl_img pair.
+        # Replace this once we have character control/reference frames.
+        ctrl_img: "/root/nemoflix-training/samples/character_control.jpg"
+        prompts:
+          - "character_trigger, person, cinematic portrait, walking in heavy rain, dramatic lighting"
+          - "character_trigger, person, wearing futuristic armor, rain-soaked city street, cinematic"
+          - "character_trigger, person, close-up face, intense expression, film still, shallow depth of field"
+        neg: ""
+        seed: 42
+        walk_seed: true
+        guidance_scale: 3.5
+        sample_steps: 25
+meta:
+  name: "[name]"
+  version: "1.0"