import os, sys, shutil, tempfile, zipfile, asyncio, subprocess from pathlib import Path from fastapi import FastAPI, UploadFile, File, Form, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse REPO_ROOT = Path(__file__).parent.resolve() CODES_DIR = REPO_ROOT / "codes" app = FastAPI(title="Blog2Code API", version="1.0.0") ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",") app.add_middleware( CORSMiddleware, allow_origins=ALLOWED_ORIGINS, allow_methods=["*"], allow_headers=["*"], ) def _run(script: str, args: list, extra_env: dict) -> None: cmd = [sys.executable, str(CODES_DIR / script)] + args result = subprocess.run( cmd, cwd=str(REPO_ROOT), env={**os.environ, **extra_env}, capture_output=True, text=True, ) if result.returncode != 0: raise RuntimeError( f"{script} failed (exit {result.returncode}):\n" f"STDOUT: {result.stdout[-2000:]}\n" f"STDERR: {result.stderr[-2000:]}" ) @app.get("/health") def health(): return {"status": "ok"} @app.post("/generate") async def generate( url: str = Form(None), file: UploadFile = File(None), ): if not url and not file: raise HTTPException(400, "Provide either 'url' or 'file'.") tmp = Path(tempfile.mkdtemp()) data_dir = tmp / "data" output_dir = tmp / "output" data_dir.mkdir(parents=True) output_dir.mkdir(parents=True) try: if file: suffix = Path(file.filename).suffix or ".md" input_path = tmp / f"blog{suffix}" input_path.write_bytes(await file.read()) source_args = ["--input_path", str(input_path)] else: source_args = ["--url", url.strip()] provider = os.getenv("PROVIDER", "gemma") # Default model for NVIDIA/Llama — overridable via MODEL env var default_model = "meta/llama-3.3-70b-instruct" model = os.getenv("MODEL", default_model) extra_env = {"MODEL": model} blog_json = data_dir / "blog_data.json" def run_pipeline(): # Stage 0 – parse blog (no LLM, no --model needed) _run("0_blog_process.py", source_args + ["--output_json_path", str(blog_json)], extra_env) if not blog_json.exists(): candidates = list(data_dir.glob("*.json")) if not candidates: raise RuntimeError("Stage 0: no JSON output found.") blog_json_path = candidates[0] else: blog_json_path = blog_json # Stage 1 – planning _run("1_planning.py", [ "--blog_json_path", str(blog_json_path), "--output_dir", str(data_dir), "--provider", provider, "--content_type", "blog", "--model", model, ], extra_env) # Stage 1.1 – extract config (no LLM, no --model needed) _run("1_1_extract_config.py", [ "--output_dir", str(data_dir), ], extra_env) config_yaml = data_dir / "planning_config.yaml" if not config_yaml.exists(): raise RuntimeError("Stage 1.1: planning_config.yaml not found.") # Stage 2 – analysis _run("2_analyzing.py", [ "--pdf_json_path", str(blog_json_path), "--output_dir", str(data_dir), "--provider", provider, "--model", model, ], extra_env) # Stage 3 – code generation _run("3_coding.py", [ "--pdf_json_path", str(blog_json_path), "--output_dir", str(data_dir), "--output_repo_dir", str(output_dir), "--provider", provider, "--model", model, ], extra_env) await asyncio.get_event_loop().run_in_executor(None, run_pipeline) zip_path = tmp / "repo.zip" files = [f for f in output_dir.rglob("*") if f.is_file()] if not files: raise HTTPException(500, "Pipeline produced no output files.") with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for f in files: zf.write(f, f.relative_to(output_dir)) return FileResponse( path=str(zip_path), media_type="application/zip", filename="generated-repo.zip", ) except HTTPException: shutil.rmtree(tmp, ignore_errors=True) raise except Exception as exc: shutil.rmtree(tmp, ignore_errors=True) raise HTTPException(500, str(exc)) from exc