Spaces:

MathFrames
/

manim-render-api

Sleeping

App Files Files Community

faizr206 commited on Oct 11, 2025

Commit

ffc29c7

1 Parent(s): a8c3255

move repo

Browse files

Files changed (2) hide show

Dockerfile +15 -0
app/main.py +410 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM manimcommunity/manim:stable
+WORKDIR /app
+COPY app /app
+# App deps only (no LaTeX)
+RUN python -m pip install --upgrade pip && \
+    python -m pip install --no-cache-dir \
+      fastapi "uvicorn[standard]" pydantic python-dotenv google-genai
+ENV PORT=7860
+EXPOSE 7860
+# Run uvicorn via python -m so PATH is never an issue
+CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app/main.py ADDED Viewed

	@@ -0,0 +1,410 @@

+import os, re, uuid, subprocess, sys, time, traceback, threading
+from collections import deque
+from pathlib import Path
+from typing import Optional, Tuple
+from fastapi import FastAPI, HTTPException, Response
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+# Optional .env for local testing
+from dotenv import load_dotenv
+load_dotenv()
+# -------- Gemini (same SDK style as your Flask app) --------
+from google import genai
+from google.genai import types
+# something
+API_KEY = os.getenv("GEMINI_API_KEY", "")
+# Switch to 2.5 Flash as requested
+MODEL   = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+PORT    = int(os.getenv("PORT", "7860"))
+client = genai.Client(api_key=API_KEY) if API_KEY else None
+# -------- FastAPI app --------
+app = FastAPI(title="Manim Render API (error + visual refine)")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],   # tighten in prod
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
+# ---------------- simple 10 RPM rate limiter ----------------
+class RateLimiter:
+    def __init__(self, max_per_minute: int):
+        self.max = max_per_minute
+        self.lock = threading.Lock()
+        self.events = deque()  # timestamps (time.time())
+    def acquire(self):
+        with self.lock:
+            now = time.time()
+            # drop events older than 60s
+            while self.events and now - self.events[0] >= 60:
+                self.events.popleft()
+            if len(self.events) < self.max:
+                self.events.append(now)
+                return
+            # need to wait until the oldest is 60s old
+            wait_for = 60 - (now - self.events[0])
+        if wait_for > 0:
+            time.sleep(wait_for + 0.01)
+        # recurse once to record post-sleep
+        self.acquire()
+limiter = RateLimiter(10)
+def gemini_call(*, system: str, contents):
+    """Wrapper to: enforce RPM and standardize text extraction."""
+    if not client:
+        raise RuntimeError("Gemini client is not configured")
+    limiter.acquire()
+    resp = client.models.generate_content(
+        model=MODEL,
+        config=types.GenerateContentConfig(system_instruction=system),
+        contents=contents,
+    )
+    return getattr(resp, "text", str(resp))
+# ---------------- prompts ----------------
+SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
+Return ONLY valid Python code (no backticks, no prose).
+Define exactly one class: AutoScene(Scene).
+Keep it short (preferably ≤ ~60 s) and quickly renderable.
+Use: from manim import *
+Allowed imports: manim, math, numpy.
+Forbidden: os, subprocess, sys, requests, pathlib, socket, shutil, psutil, any file/network/OS access.
+# CAPTURE POLICY (must follow exactly)
+- Insert a comment line `# CAPTURE_POINT` at the final, steady layout of the scene.
+- Right after `# CAPTURE_POINT`, call self.wait(0.75) and then END THE SCENE.
+- DO NOT add any outro animations, fades, or camera moves after `# CAPTURE_POINT`.
+- Ensure all intended elements are visible and legible at `# CAPTURE_POINT` (adequate margins, no overlaps, font ≥ 32 px at 854x480).
+# Common Manim CE 0.19 API constraints (must follow)
+- Do NOT use `vertex=` with RightAngle(...). Choose the corner by line ordering or set quadrant=(±1, ±1).
+- Do NOT call `.to_center()` (not a valid method). Use `.center()` or `.move_to(ORIGIN)`.
+- Prefer `.move_to()`, `.align_to()`, `.to_edge()`, `.scale()`, `.next_to()` for layout/placement.
+"""
+DEFAULT_SCENE = """from manim import *
+class AutoScene(Scene):
+    def construct(self):
+        t = Text("Hello from Manim").scale(1)
+        self.play(Write(t))
+        # CAPTURE_POINT
+        self.wait(0.75)
+"""
+# ---------- NEW: carry full CLI error back to the refiner ----------
+class RenderError(Exception):
+    def __init__(self, log: str):
+        super().__init__("Manim render failed")
+        self.log = log or ""
+# ---------------- helpers ----------------
+def _clean_code(text: str) -> str:
+    """Strip common Markdown fences like ```python ... ``` or ``` ..."""
+    if not text:
+        return ""
+    text = re.sub(r"^```(?:\s*python)?\s*", "", text.strip(), flags=re.IGNORECASE)
+    text = re.sub(r"\s*```$", "", text)
+    return text.strip()
+def _preflight_sanitize(code: str) -> str:
+    """
+    Auto-correct a few frequent Manim CE 0.19 mistakes to reduce trivial crashes.
+    - .to_center() -> .center()
+    - Remove vertex=... from RightAngle(...), then normalize commas.
+    """
+    c = code
+    # 1) replace invalid method
+    c = re.sub(r"\.to_center\(\)", ".center()", c)
+    # 2) remove vertex=... kwarg inside RightAngle(...)
+    # Case A: middle of arg list with trailing comma
+    c = re.sub(
+        r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*,)",
+        r"\1\2",
+        c,
+        flags=re.DOTALL,
+    )
+    # Case B: last kwarg before ')'
+    c = re.sub(
+        r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*\))",
+        r"\1\2",
+        c,
+        flags=re.DOTALL,
+    )
+    # Normalize doubled commas or commas before ')'
+    c = re.sub(r",\s*,", ", ", c)
+    c = re.sub(r",\s*\)", ")", c)
+    return c
+def _run_manim(scene_code: str, run_id: Optional[str] = None) -> Tuple[bytes, Optional[Path]]:
+    """Render MP4 (fast) and also save a steady-state PNG (last frame)."""
+    run_id = run_id or str(uuid.uuid4())[:8]
+    work = RUNS / run_id; work.mkdir(parents=True, exist_ok=True)
+    media = work / "media"; media.mkdir(parents=True, exist_ok=True)
+    scene_path = work / "scene.py"
+    # Write scene code (after sanitizer)
+    safe_code = _preflight_sanitize(scene_code)
+    scene_path.write_text(safe_code, encoding="utf-8")
+    env = os.environ.copy()
+    env["PYTHONPATH"] = str(work)
+    # 1) Render video
+    cmd_video = [
+        "manim", "-ql", "--disable_caching",
+        "--media_dir", str(media),
+        "-o", f"{run_id}.mp4",
+        str(scene_path), "AutoScene",
+    ]
+    proc_v = subprocess.run(
+        cmd_video,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        env=env,
+    )
+    if proc_v.returncode != 0:
+        log = proc_v.stdout or ""
+        print("Manim stdout/stderr:\n", log, file=sys.stderr)
+        raise RenderError(log)
+    # Locate output mp4
+    mp4 = None
+    for p in media.rglob(f"{run_id}.mp4"):
+        mp4 = p; break
+    if not mp4:
+        for p in media.rglob("*.mp4"):
+            mp4 = p; break
+    if not mp4:
+        raise RenderError("Rendered video not found")
+    # 2) Save last frame PNG (leverages our CAPTURE_POINT rule)
+    png_path = None
+    cmd_png = [
+        "manim", "-ql", "--disable_caching", "-s",  # -s saves the last frame as an image
+        "--media_dir", str(media),
+        str(scene_path), "AutoScene",
+    ]
+    proc_p = subprocess.run(
+        cmd_png,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        env=env,
+    )
+    if proc_p.returncode == 0:
+        cand = None
+        for p in media.rglob("*.png"):
+            cand = p
+        png_path = cand
+    return mp4.read_bytes(), png_path
+def _upload_image_to_gemini(png_path: Path):
+    """Upload an image to Gemini Files API and return the file reference."""
+    if not client or not png_path or not png_path.exists():
+        return None
+    limiter.acquire()
+    with open(png_path, "rb") as f:
+        file_ref = client.files.upload(
+            file=f,                      # <-- keyword arg, not positional
+            config={"mime_type": "image/png"}  # helpful but optional
+        )
+    return file_ref
+def llm_generate_manim_code(prompt: str, previous_code: Optional[str] = None) -> str:
+    """First-pass generation (capture-aware)."""
+    if not client:
+        return DEFAULT_SCENE
+    try:
+        contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
+        response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
+        code = _clean_code(response_text)
+        if "class AutoScene" not in code:
+            code = previous_code or DEFAULT_SCENE
+        return code
+    except Exception:
+        print("LLM generate error:", file=sys.stderr)
+        traceback.print_exc()
+        return previous_code or DEFAULT_SCENE
+def llm_refine_from_error(previous_code: str, error_message: str, original_user_prompt: str) -> str:
+    """When Manim fails; send the *real* CLI log/trace to Gemini."""
+    if not client:
+        return previous_code or DEFAULT_SCENE
+    try:
+        trimmed = error_message[-4000:] if error_message else ""
+        user_prompt = f"""Original user prompt:
+{original_user_prompt}
+The following Manim CE (0.19.x) code failed to render. Fix it.
+Current code:
+{previous_code}
+Error / stack trace (tail):
+{trimmed}
+Requirements:
+- Fix the bug while preserving the math logic and planned animations.
+- Keep exactly one class AutoScene(Scene).
+- Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
+- Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
+- Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
+- Return ONLY the corrected Python code (no backticks).
+"""
+        response_text = gemini_call(system=SYSTEM_PROMPT, contents=user_prompt)
+        code = _clean_code(response_text)
+        if "class AutoScene" not in code:
+            return previous_code or DEFAULT_SCENE
+        return code
+    except Exception:
+        print("LLM refine error:", file=sys.stderr)
+        traceback.print_exc()
+        return previous_code or DEFAULT_SCENE
+def llm_visual_refine_from_image(original_user_prompt: str, previous_code: str, png_path: Optional[Path]) -> str:
+    """
+    Use the screenshot to request layout/legibility/placement fixes.
+    Includes the original prompt and current code, and asks for minimal edits.
+    """
+    if not client or not png_path or not png_path.exists():
+        return previous_code
+    try:
+        file_ref = _upload_image_to_gemini(png_path)
+        if not file_ref:
+            return previous_code
+        visual_prompt = f"""You are refining a Manim CE (0.19.x) scene based on its steady-state screenshot.
+Original user prompt:
+{original_user_prompt}
+Current Manim code:
+{previous_code}
+Tasks (optimize for readability and visual quality without changing the math meaning):
+- Fix layout issues (overlaps, cramped margins, alignment, consistent scaling).
+- Improve text legibility (minimum size ~32 px at 854x480, adequate contrast).
+- Ensure all intended elements are visible at the capture point.
+- Keep animation semantics as-is unless they're obviously broken.
+- Keep exactly one class AutoScene(Scene).
+- Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
+Return ONLY the revised Python code (no backticks).
+"""
+        response_text = gemini_call(system=SYSTEM_PROMPT, contents=[file_ref, visual_prompt])
+        code = _clean_code(response_text)
+        if "class AutoScene" not in code:
+            return previous_code
+        return code
+    except Exception:
+        print("LLM visual refine error:", file=sys.stderr)
+        traceback.print_exc()
+        return previous_code
+def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine: bool = True) -> bytes:
+    """
+    Generate → render; on error, refine up to N times from Manim traceback → re-render.
+    If first render succeeds and do_visual_refine==True, run an image-based refinement
+    using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
+    """
+    # 1) initial generation (capture-aware)
+    code = llm_generate_manim_code(user_prompt)
+    # 2) render attempt
+    try:
+        mp4_bytes, png_path = _run_manim(code, run_id="iter0")
+    except RenderError as e:
+        print("Render failed (iter0), attempting error-based refinement...", file=sys.stderr)
+        if max_error_refines <= 0:
+            raise
+        attempts = 0
+        last_err = e.log or ""
+        while attempts < max_error_refines:
+            attempts += 1
+            refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
+            try:
+                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
+                code = refined
+                break
+            except RenderError as e2:
+                last_err = e2.log or last_err
+                if attempts >= max_error_refines:
+                    raise
+            except Exception:
+                last_err = traceback.format_exc()
+                if attempts >= max_error_refines:
+                    raise
+    except Exception:
+        print("Unexpected error path; refining from Python traceback...", file=sys.stderr)
+        attempts = 0
+        last_err = traceback.format_exc()
+        while attempts < max_error_refines:
+            attempts += 1
+            refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
+            try:
+                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
+                code = refined
+                break
+            except Exception:
+                last_err = traceback.format_exc()
+                if attempts >= max_error_refines:
+                    raise
+    # 3) optional visual refinement loop
+    if do_visual_refine and png_path and png_path.exists():
+        refined2 = llm_visual_refine_from_image(original_user_prompt=user_prompt, previous_code=code, png_path=png_path)
+        if refined2.strip() != code.strip():
+            try:
+                mp4_bytes2, _ = _run_manim(refined2, run_id="iter2")
+                return mp4_bytes2
+            except Exception:
+                print("Visual refine render failed; returning best known render.", file=sys.stderr)
+                return mp4_bytes
+    return mp4_bytes
+# ---------------- API ----------------
+class PromptIn(BaseModel):
+    prompt: str
+@app.get("/")
+def health():
+    return {"ok": True, "model": MODEL, "has_gemini": bool(API_KEY)}
+@app.post("/generate-code")
+def generate_code(inp: PromptIn):
+    """Return ONLY the generated Manim Python code (no rendering)."""
+    if not inp.prompt or not inp.prompt.strip():
+        raise HTTPException(400, "Missing prompt")
+    code = llm_generate_manim_code(inp.prompt.strip())
+    return {"code": code}
+@app.post("/generate-and-render")
+def generate_and_render(inp: PromptIn):
+    if not inp.prompt or not inp.prompt.strip():
+        raise HTTPException(400, "Missing prompt")
+    try:
+        mp4 = refine_loop(inp.prompt.strip(), max_error_refines=3, do_visual_refine=True)
+    except Exception:
+        raise HTTPException(500, "Failed to produce video after refinement")
+    return Response(
+        content=mp4,
+        media_type="video/mp4",
+        headers={"Content-Disposition": 'inline; filename="result.mp4"'}
+    )