Spaces:

srishtichugh
/

blog2code-api

Sleeping

File size: 4,872 Bytes

import os, sys, shutil, tempfile, zipfile, asyncio, subprocess
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse

REPO_ROOT = Path(__file__).parent.resolve()
CODES_DIR = REPO_ROOT / "codes"

app = FastAPI(title="Blog2Code API", version="1.0.0")
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",")
app.add_middleware(
    CORSMiddleware,
    allow_origins=ALLOWED_ORIGINS,
    allow_methods=["*"],
    allow_headers=["*"],
)

def _run(script: str, args: list, extra_env: dict) -> None:
    cmd = [sys.executable, str(CODES_DIR / script)] + args
    result = subprocess.run(
        cmd,
        cwd=str(REPO_ROOT),
        env={**os.environ, **extra_env},
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise RuntimeError(
            f"{script} failed (exit {result.returncode}):\n"
            f"STDOUT: {result.stdout[-2000:]}\n"
            f"STDERR: {result.stderr[-2000:]}"
        )

@app.get("/health")
def health():
    return {"status": "ok"}

@app.post("/generate")
async def generate(
    url:  str        = Form(None),
    file: UploadFile = File(None),
):
    if not url and not file:
        raise HTTPException(400, "Provide either 'url' or 'file'.")

    tmp        = Path(tempfile.mkdtemp())
    data_dir   = tmp / "data"
    output_dir = tmp / "output"
    data_dir.mkdir(parents=True)
    output_dir.mkdir(parents=True)

    try:
        if file:
            suffix     = Path(file.filename).suffix or ".md"
            input_path = tmp / f"blog{suffix}"
            input_path.write_bytes(await file.read())
            source_args = ["--input_path", str(input_path)]
        else:
            source_args = ["--url", url.strip()]

        provider = os.getenv("PROVIDER", "gemma")
        # Default model for NVIDIA/Llama — overridable via MODEL env var
        default_model = "meta/llama-3.3-70b-instruct"
        model = os.getenv("MODEL", default_model)
        extra_env = {"MODEL": model}

        blog_json = data_dir / "blog_data.json"

        def run_pipeline():
            # Stage 0 – parse blog (no LLM, no --model needed)
            _run("0_blog_process.py",
                 source_args + ["--output_json_path", str(blog_json)],
                 extra_env)

            if not blog_json.exists():
                candidates = list(data_dir.glob("*.json"))
                if not candidates:
                    raise RuntimeError("Stage 0: no JSON output found.")
                blog_json_path = candidates[0]
            else:
                blog_json_path = blog_json

            # Stage 1 – planning
            _run("1_planning.py", [
                "--blog_json_path", str(blog_json_path),
                "--output_dir",     str(data_dir),
                "--provider",       provider,
                "--content_type",   "blog",
                "--model",          model,
            ], extra_env)

            # Stage 1.1 – extract config (no LLM, no --model needed)
            _run("1_1_extract_config.py", [
                "--output_dir", str(data_dir),
            ], extra_env)

            config_yaml = data_dir / "planning_config.yaml"
            if not config_yaml.exists():
                raise RuntimeError("Stage 1.1: planning_config.yaml not found.")

            # Stage 2 – analysis
            _run("2_analyzing.py", [
                "--pdf_json_path", str(blog_json_path),
                "--output_dir",    str(data_dir),
                "--provider",      provider,
                "--model",         model,
            ], extra_env)

            # Stage 3 – code generation
            _run("3_coding.py", [
                "--pdf_json_path",   str(blog_json_path),
                "--output_dir",      str(data_dir),
                "--output_repo_dir", str(output_dir),
                "--provider",        provider,
                "--model",           model,
            ], extra_env)

        await asyncio.get_event_loop().run_in_executor(None, run_pipeline)

        zip_path = tmp / "repo.zip"
        files = [f for f in output_dir.rglob("*") if f.is_file()]
        if not files:
            raise HTTPException(500, "Pipeline produced no output files.")

        with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
            for f in files:
                zf.write(f, f.relative_to(output_dir))

        return FileResponse(
            path=str(zip_path),
            media_type="application/zip",
            filename="generated-repo.zip",
        )

    except HTTPException:
        shutil.rmtree(tmp, ignore_errors=True)
        raise
    except Exception as exc:
        shutil.rmtree(tmp, ignore_errors=True)
        raise HTTPException(500, str(exc)) from exc