| """Recap MI300X premium-mode backend. Runs on the AMD Developer Cloud droplet. |
| |
| Deploy: |
| cd backend |
| pip install -r requirements.txt |
| # ROCm torch installed separately on the droplet image. |
| uvicorn backend.server:app --host 0.0.0.0 --port 8080 |
| |
| Then expose to the public Space via ngrok / cloudflared and set |
| RECAP_MI300X_URL in the Space's env to the public URL. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| from contextlib import asynccontextmanager |
|
|
| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
|
|
| from backend import serve |
|
|
| EAGER_LOAD = os.getenv("RECAP_EAGER_LOAD", "1") == "1" |
|
|
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| if EAGER_LOAD: |
| |
| |
| try: |
| serve._ensure_loaded() |
| except Exception as e: |
| print(f"[server] eager load failed: {e}", flush=True) |
| yield |
|
|
|
|
| app = FastAPI(title="Recap Premium Backend", version="0.1.0", lifespan=lifespan) |
|
|
|
|
| class GenRequest(BaseModel): |
| system: str |
| user: str |
| max_new_tokens: int = 384 |
|
|
|
|
| class GenResponse(BaseModel): |
| text: str |
|
|
|
|
| @app.post("/medgemma", response_model=GenResponse) |
| def medgemma(req: GenRequest) -> GenResponse: |
| try: |
| text = serve.medgemma_extract(req.system, req.user, req.max_new_tokens) |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) from e |
| return GenResponse(text=text) |
|
|
|
|
| @app.post("/qwen", response_model=GenResponse) |
| def qwen(req: GenRequest) -> GenResponse: |
| try: |
| text = serve.qwen_synthesize(req.system, req.user, req.max_new_tokens) |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) from e |
| return GenResponse(text=text) |
|
|
|
|
| @app.get("/health") |
| def health() -> dict: |
| return { |
| "ok": True, |
| "loaded": serve._state.get("loaded", False), |
| "memory": serve.memory_stats(), |
| "models": { |
| "medgemma_id": serve.MEDGEMMA_ID, |
| "qwen_id": serve.QWEN_ID, |
| }, |
| } |
|
|