""" Sakhi API — FastAPI backend for React frontend. Endpoints: POST /api/process-audio — Upload audio file → transcript + form + danger signs POST /api/process-text — Submit transcript text → form + danger signs GET /api/health — Health check GET /api/examples — List example transcripts Runs on port 8000. React frontend runs on port 3000. """ import os import json import time import tempfile os.environ["TORCH_COMPILE_DISABLE"] = "1" os.environ["TORCHDYNAMO_DISABLE"] = "1" from fastapi import FastAPI, UploadFile, File, Form, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from typing import Optional # Import pipeline functions from app.py from app import ( transcribe_audio, extract_form, extract_danger_signs, extract_all, detect_visit_type, init_schemas, validate_form_output, postprocess_transcript, translate_to_english, warm_whisper, WHISPER_MODEL, ) app = FastAPI(title="Sakhi API", version="1.0.0") # CORS for React dev server app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Startup: load schemas + pre-warm Whisper so the Space only reports ready # when the audio path is hot. Whisper load is wrapped in try/except — if the # eager load fails (no GPU, network blip), fall back to lazy loading on # first audio request instead of blocking the whole boot. @app.on_event("startup") def startup(): init_schemas() try: warm_whisper() except Exception as e: print(f"[startup] WARN: Whisper pre-warm failed ({e!r}); falling back to lazy load") # ── Models ── class PatientMetadata(BaseModel): """ASHA-entered patient identifier fields. All optional — pipeline still runs without them. When provided, override LLM-extracted name/age/sex in the form (see apply_metadata in app.py).""" patient_name: Optional[str] = None patient_age: Optional[int] = None age_unit: Optional[str] = None # "years" | "months" patient_sex: Optional[str] = None # "male" | "female" patient_mobile: Optional[str] = None asha_id: Optional[str] = None visit_date: Optional[str] = None # ISO date string class TextRequest(BaseModel): transcript: str visit_type: Optional[str] = "auto" metadata: Optional[PatientMetadata] = None class TranslateRequest(BaseModel): text: str class ExtractionResult(BaseModel): visit_type: str form: Optional[dict] = None danger: Optional[dict] = None metadata: Optional[dict] = None transcript: Optional[str] = None timing: dict = {} tool_calls: Optional[list] = None error: Optional[str] = None def _metadata_dict(meta): """Coerce a PatientMetadata or None into a dict (or None if empty).""" if meta is None: return None d = meta.dict() if hasattr(meta, "dict") else dict(meta) # Drop all-None entries so apply_metadata short-circuits cleanly return {k: v for k, v in d.items() if v is not None and v != ""} or None # ── Endpoints ── @app.get("/api/health") def health(): return { "status": "ok", "model": os.environ.get("OLLAMA_MODEL", "gemma4:e4b-it-q4_K_M"), "whisper": WHISPER_MODEL, } @app.get("/api/examples") def examples(): from app import EXAMPLE_TRANSCRIPTS return [ {"label": ex[0], "transcript": ex[1], "default": i == 1} for i, ex in enumerate(EXAMPLE_TRANSCRIPTS) ] # index 1 = "ANC Visit — Preeclampsia (DANGER)" — best for demo (has danger signs) @app.post("/api/translate") def translate(req: TranslateRequest): """Hindi / Hinglish → English. Uses the same Gemma model already in VRAM, so the cost is one extra ~3-5s LLM call. Reviewer-facing convenience; never invoked from the main extraction path.""" t0 = time.time() english = translate_to_english(req.text) return {"english": english, "time_s": round(time.time() - t0, 2)} _DEMO_AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "demo_audio") @app.get("/api/audio-examples") def audio_examples(): """Curated voice clips bundled into the image. Returns playable URLs relative to the Space origin, so the frontend can both