faizr206 commited on
Commit
ffc29c7
·
1 Parent(s): a8c3255

move repo

Browse files
Files changed (2) hide show
  1. Dockerfile +15 -0
  2. app/main.py +410 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM manimcommunity/manim:stable
2
+
3
+ WORKDIR /app
4
+ COPY app /app
5
+
6
+ # App deps only (no LaTeX)
7
+ RUN python -m pip install --upgrade pip && \
8
+ python -m pip install --no-cache-dir \
9
+ fastapi "uvicorn[standard]" pydantic python-dotenv google-genai
10
+
11
+ ENV PORT=7860
12
+ EXPOSE 7860
13
+
14
+ # Run uvicorn via python -m so PATH is never an issue
15
+ CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, uuid, subprocess, sys, time, traceback, threading
2
+ from collections import deque
3
+ from pathlib import Path
4
+ from typing import Optional, Tuple
5
+
6
+ from fastapi import FastAPI, HTTPException, Response
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+
10
+ # Optional .env for local testing
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ # -------- Gemini (same SDK style as your Flask app) --------
15
+ from google import genai
16
+ from google.genai import types
17
+
18
+ # something
19
+ API_KEY = os.getenv("GEMINI_API_KEY", "")
20
+ # Switch to 2.5 Flash as requested
21
+ MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
22
+ PORT = int(os.getenv("PORT", "7860"))
23
+
24
+ client = genai.Client(api_key=API_KEY) if API_KEY else None
25
+
26
+ # -------- FastAPI app --------
27
+ app = FastAPI(title="Manim Render API (error + visual refine)")
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"], # tighten in prod
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+
36
+ RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
37
+
38
+ # ---------------- simple 10 RPM rate limiter ----------------
39
+ class RateLimiter:
40
+ def __init__(self, max_per_minute: int):
41
+ self.max = max_per_minute
42
+ self.lock = threading.Lock()
43
+ self.events = deque() # timestamps (time.time())
44
+
45
+ def acquire(self):
46
+ with self.lock:
47
+ now = time.time()
48
+ # drop events older than 60s
49
+ while self.events and now - self.events[0] >= 60:
50
+ self.events.popleft()
51
+ if len(self.events) < self.max:
52
+ self.events.append(now)
53
+ return
54
+ # need to wait until the oldest is 60s old
55
+ wait_for = 60 - (now - self.events[0])
56
+ if wait_for > 0:
57
+ time.sleep(wait_for + 0.01)
58
+ # recurse once to record post-sleep
59
+ self.acquire()
60
+
61
+ limiter = RateLimiter(10)
62
+
63
+ def gemini_call(*, system: str, contents):
64
+ """Wrapper to: enforce RPM and standardize text extraction."""
65
+ if not client:
66
+ raise RuntimeError("Gemini client is not configured")
67
+ limiter.acquire()
68
+ resp = client.models.generate_content(
69
+ model=MODEL,
70
+ config=types.GenerateContentConfig(system_instruction=system),
71
+ contents=contents,
72
+ )
73
+ return getattr(resp, "text", str(resp))
74
+
75
+ # ---------------- prompts ----------------
76
+ SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
77
+ Return ONLY valid Python code (no backticks, no prose).
78
+ Define exactly one class: AutoScene(Scene).
79
+ Keep it short (preferably ≤ ~60 s) and quickly renderable.
80
+
81
+ Use: from manim import *
82
+ Allowed imports: manim, math, numpy.
83
+ Forbidden: os, subprocess, sys, requests, pathlib, socket, shutil, psutil, any file/network/OS access.
84
+
85
+ # CAPTURE POLICY (must follow exactly)
86
+ - Insert a comment line `# CAPTURE_POINT` at the final, steady layout of the scene.
87
+ - Right after `# CAPTURE_POINT`, call self.wait(0.75) and then END THE SCENE.
88
+ - DO NOT add any outro animations, fades, or camera moves after `# CAPTURE_POINT`.
89
+ - Ensure all intended elements are visible and legible at `# CAPTURE_POINT` (adequate margins, no overlaps, font ≥ 32 px at 854x480).
90
+
91
+ # Common Manim CE 0.19 API constraints (must follow)
92
+ - Do NOT use `vertex=` with RightAngle(...). Choose the corner by line ordering or set quadrant=(±1, ±1).
93
+ - Do NOT call `.to_center()` (not a valid method). Use `.center()` or `.move_to(ORIGIN)`.
94
+ - Prefer `.move_to()`, `.align_to()`, `.to_edge()`, `.scale()`, `.next_to()` for layout/placement.
95
+ """
96
+
97
+ DEFAULT_SCENE = """from manim import *
98
+
99
+ class AutoScene(Scene):
100
+ def construct(self):
101
+ t = Text("Hello from Manim").scale(1)
102
+ self.play(Write(t))
103
+ # CAPTURE_POINT
104
+ self.wait(0.75)
105
+ """
106
+
107
+ # ---------- NEW: carry full CLI error back to the refiner ----------
108
+ class RenderError(Exception):
109
+ def __init__(self, log: str):
110
+ super().__init__("Manim render failed")
111
+ self.log = log or ""
112
+
113
+ # ---------------- helpers ----------------
114
+ def _clean_code(text: str) -> str:
115
+ """Strip common Markdown fences like ```python ... ``` or ``` ..."""
116
+ if not text:
117
+ return ""
118
+ text = re.sub(r"^```(?:\s*python)?\s*", "", text.strip(), flags=re.IGNORECASE)
119
+ text = re.sub(r"\s*```$", "", text)
120
+ return text.strip()
121
+
122
+ def _preflight_sanitize(code: str) -> str:
123
+ """
124
+ Auto-correct a few frequent Manim CE 0.19 mistakes to reduce trivial crashes.
125
+ - .to_center() -> .center()
126
+ - Remove vertex=... from RightAngle(...), then normalize commas.
127
+ """
128
+ c = code
129
+ # 1) replace invalid method
130
+ c = re.sub(r"\.to_center\(\)", ".center()", c)
131
+
132
+ # 2) remove vertex=... kwarg inside RightAngle(...)
133
+ # Case A: middle of arg list with trailing comma
134
+ c = re.sub(
135
+ r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*,)",
136
+ r"\1\2",
137
+ c,
138
+ flags=re.DOTALL,
139
+ )
140
+ # Case B: last kwarg before ')'
141
+ c = re.sub(
142
+ r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*\))",
143
+ r"\1\2",
144
+ c,
145
+ flags=re.DOTALL,
146
+ )
147
+ # Normalize doubled commas or commas before ')'
148
+ c = re.sub(r",\s*,", ", ", c)
149
+ c = re.sub(r",\s*\)", ")", c)
150
+ return c
151
+
152
+ def _run_manim(scene_code: str, run_id: Optional[str] = None) -> Tuple[bytes, Optional[Path]]:
153
+ """Render MP4 (fast) and also save a steady-state PNG (last frame)."""
154
+ run_id = run_id or str(uuid.uuid4())[:8]
155
+ work = RUNS / run_id; work.mkdir(parents=True, exist_ok=True)
156
+ media = work / "media"; media.mkdir(parents=True, exist_ok=True)
157
+ scene_path = work / "scene.py"
158
+
159
+ # Write scene code (after sanitizer)
160
+ safe_code = _preflight_sanitize(scene_code)
161
+ scene_path.write_text(safe_code, encoding="utf-8")
162
+
163
+ env = os.environ.copy()
164
+ env["PYTHONPATH"] = str(work)
165
+
166
+ # 1) Render video
167
+ cmd_video = [
168
+ "manim", "-ql", "--disable_caching",
169
+ "--media_dir", str(media),
170
+ "-o", f"{run_id}.mp4",
171
+ str(scene_path), "AutoScene",
172
+ ]
173
+ proc_v = subprocess.run(
174
+ cmd_video,
175
+ stdout=subprocess.PIPE,
176
+ stderr=subprocess.STDOUT,
177
+ text=True,
178
+ env=env,
179
+ )
180
+ if proc_v.returncode != 0:
181
+ log = proc_v.stdout or ""
182
+ print("Manim stdout/stderr:\n", log, file=sys.stderr)
183
+ raise RenderError(log)
184
+
185
+ # Locate output mp4
186
+ mp4 = None
187
+ for p in media.rglob(f"{run_id}.mp4"):
188
+ mp4 = p; break
189
+ if not mp4:
190
+ for p in media.rglob("*.mp4"):
191
+ mp4 = p; break
192
+ if not mp4:
193
+ raise RenderError("Rendered video not found")
194
+
195
+ # 2) Save last frame PNG (leverages our CAPTURE_POINT rule)
196
+ png_path = None
197
+ cmd_png = [
198
+ "manim", "-ql", "--disable_caching", "-s", # -s saves the last frame as an image
199
+ "--media_dir", str(media),
200
+ str(scene_path), "AutoScene",
201
+ ]
202
+ proc_p = subprocess.run(
203
+ cmd_png,
204
+ stdout=subprocess.PIPE,
205
+ stderr=subprocess.STDOUT,
206
+ text=True,
207
+ env=env,
208
+ )
209
+ if proc_p.returncode == 0:
210
+ cand = None
211
+ for p in media.rglob("*.png"):
212
+ cand = p
213
+ png_path = cand
214
+
215
+ return mp4.read_bytes(), png_path
216
+
217
+ def _upload_image_to_gemini(png_path: Path):
218
+ """Upload an image to Gemini Files API and return the file reference."""
219
+ if not client or not png_path or not png_path.exists():
220
+ return None
221
+ limiter.acquire()
222
+ with open(png_path, "rb") as f:
223
+ file_ref = client.files.upload(
224
+ file=f, # <-- keyword arg, not positional
225
+ config={"mime_type": "image/png"} # helpful but optional
226
+ )
227
+ return file_ref
228
+
229
+
230
+ def llm_generate_manim_code(prompt: str, previous_code: Optional[str] = None) -> str:
231
+ """First-pass generation (capture-aware)."""
232
+ if not client:
233
+ return DEFAULT_SCENE
234
+ try:
235
+ contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
236
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
237
+ code = _clean_code(response_text)
238
+ if "class AutoScene" not in code:
239
+ code = previous_code or DEFAULT_SCENE
240
+ return code
241
+ except Exception:
242
+ print("LLM generate error:", file=sys.stderr)
243
+ traceback.print_exc()
244
+ return previous_code or DEFAULT_SCENE
245
+
246
+ def llm_refine_from_error(previous_code: str, error_message: str, original_user_prompt: str) -> str:
247
+ """When Manim fails; send the *real* CLI log/trace to Gemini."""
248
+ if not client:
249
+ return previous_code or DEFAULT_SCENE
250
+ try:
251
+ trimmed = error_message[-4000:] if error_message else ""
252
+ user_prompt = f"""Original user prompt:
253
+ {original_user_prompt}
254
+
255
+ The following Manim CE (0.19.x) code failed to render. Fix it.
256
+
257
+ Current code:
258
+ {previous_code}
259
+
260
+ Error / stack trace (tail):
261
+ {trimmed}
262
+
263
+ Requirements:
264
+ - Fix the bug while preserving the math logic and planned animations.
265
+ - Keep exactly one class AutoScene(Scene).
266
+ - Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
267
+ - Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
268
+ - Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
269
+ - Return ONLY the corrected Python code (no backticks).
270
+ """
271
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=user_prompt)
272
+ code = _clean_code(response_text)
273
+ if "class AutoScene" not in code:
274
+ return previous_code or DEFAULT_SCENE
275
+ return code
276
+ except Exception:
277
+ print("LLM refine error:", file=sys.stderr)
278
+ traceback.print_exc()
279
+ return previous_code or DEFAULT_SCENE
280
+
281
+ def llm_visual_refine_from_image(original_user_prompt: str, previous_code: str, png_path: Optional[Path]) -> str:
282
+ """
283
+ Use the screenshot to request layout/legibility/placement fixes.
284
+ Includes the original prompt and current code, and asks for minimal edits.
285
+ """
286
+ if not client or not png_path or not png_path.exists():
287
+ return previous_code
288
+ try:
289
+ file_ref = _upload_image_to_gemini(png_path)
290
+ if not file_ref:
291
+ return previous_code
292
+
293
+ visual_prompt = f"""You are refining a Manim CE (0.19.x) scene based on its steady-state screenshot.
294
+ Original user prompt:
295
+ {original_user_prompt}
296
+
297
+ Current Manim code:
298
+ {previous_code}
299
+
300
+ Tasks (optimize for readability and visual quality without changing the math meaning):
301
+ - Fix layout issues (overlaps, cramped margins, alignment, consistent scaling).
302
+ - Improve text legibility (minimum size ~32 px at 854x480, adequate contrast).
303
+ - Ensure all intended elements are visible at the capture point.
304
+ - Keep animation semantics as-is unless they're obviously broken.
305
+ - Keep exactly one class AutoScene(Scene).
306
+ - Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
307
+ Return ONLY the revised Python code (no backticks).
308
+ """
309
+
310
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=[file_ref, visual_prompt])
311
+ code = _clean_code(response_text)
312
+ if "class AutoScene" not in code:
313
+ return previous_code
314
+ return code
315
+ except Exception:
316
+ print("LLM visual refine error:", file=sys.stderr)
317
+ traceback.print_exc()
318
+ return previous_code
319
+
320
+ def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine: bool = True) -> bytes:
321
+ """
322
+ Generate → render; on error, refine up to N times from Manim traceback → re-render.
323
+ If first render succeeds and do_visual_refine==True, run an image-based refinement
324
+ using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
325
+ """
326
+ # 1) initial generation (capture-aware)
327
+ code = llm_generate_manim_code(user_prompt)
328
+
329
+ # 2) render attempt
330
+ try:
331
+ mp4_bytes, png_path = _run_manim(code, run_id="iter0")
332
+ except RenderError as e:
333
+ print("Render failed (iter0), attempting error-based refinement...", file=sys.stderr)
334
+ if max_error_refines <= 0:
335
+ raise
336
+ attempts = 0
337
+ last_err = e.log or ""
338
+ while attempts < max_error_refines:
339
+ attempts += 1
340
+ refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
341
+ try:
342
+ mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
343
+ code = refined
344
+ break
345
+ except RenderError as e2:
346
+ last_err = e2.log or last_err
347
+ if attempts >= max_error_refines:
348
+ raise
349
+ except Exception:
350
+ last_err = traceback.format_exc()
351
+ if attempts >= max_error_refines:
352
+ raise
353
+ except Exception:
354
+ print("Unexpected error path; refining from Python traceback...", file=sys.stderr)
355
+ attempts = 0
356
+ last_err = traceback.format_exc()
357
+ while attempts < max_error_refines:
358
+ attempts += 1
359
+ refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
360
+ try:
361
+ mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
362
+ code = refined
363
+ break
364
+ except Exception:
365
+ last_err = traceback.format_exc()
366
+ if attempts >= max_error_refines:
367
+ raise
368
+
369
+ # 3) optional visual refinement loop
370
+ if do_visual_refine and png_path and png_path.exists():
371
+ refined2 = llm_visual_refine_from_image(original_user_prompt=user_prompt, previous_code=code, png_path=png_path)
372
+ if refined2.strip() != code.strip():
373
+ try:
374
+ mp4_bytes2, _ = _run_manim(refined2, run_id="iter2")
375
+ return mp4_bytes2
376
+ except Exception:
377
+ print("Visual refine render failed; returning best known render.", file=sys.stderr)
378
+ return mp4_bytes
379
+
380
+ return mp4_bytes
381
+
382
+ # ---------------- API ----------------
383
+ class PromptIn(BaseModel):
384
+ prompt: str
385
+
386
+ @app.get("/")
387
+ def health():
388
+ return {"ok": True, "model": MODEL, "has_gemini": bool(API_KEY)}
389
+
390
+ @app.post("/generate-code")
391
+ def generate_code(inp: PromptIn):
392
+ """Return ONLY the generated Manim Python code (no rendering)."""
393
+ if not inp.prompt or not inp.prompt.strip():
394
+ raise HTTPException(400, "Missing prompt")
395
+ code = llm_generate_manim_code(inp.prompt.strip())
396
+ return {"code": code}
397
+
398
+ @app.post("/generate-and-render")
399
+ def generate_and_render(inp: PromptIn):
400
+ if not inp.prompt or not inp.prompt.strip():
401
+ raise HTTPException(400, "Missing prompt")
402
+ try:
403
+ mp4 = refine_loop(inp.prompt.strip(), max_error_refines=3, do_visual_refine=True)
404
+ except Exception:
405
+ raise HTTPException(500, "Failed to produce video after refinement")
406
+ return Response(
407
+ content=mp4,
408
+ media_type="video/mp4",
409
+ headers={"Content-Disposition": 'inline; filename="result.mp4"'}
410
+ )