Spaces:

lablab-ai-amd-developer-hackathon
/

recap

Running

App Files Files Community

afif-ahmed commited on 2 days ago

Commit

ba54ea9

verified ·

1 Parent(s): 0a2effe

deploy: sync from fe7cce1

Browse files

Files changed (50) hide show

.gitignore +45 -0
Dockerfile +25 -0
README.md +125 -3
app.py +143 -0
backend/__init__.py +0 -0
backend/requirements.txt +13 -0
backend/serve.py +111 -0
backend/server.py +79 -0
pyproject.toml +13 -0
requirements.txt +12 -0
scripts/build_hf_readme.sh +15 -0
scripts/deploy_hf_space.py +75 -0
scripts/deploy_hf_space.sh +30 -0
scripts/generate_synthea_case.sh +49 -0
space/header.md +11 -0
src/recap/__init__.py +1 -0
src/recap/cases.py +93 -0
src/recap/config.py +23 -0
src/recap/demo_patient.py +107 -0
src/recap/inference/__init__.py +3 -0
src/recap/inference/gateway.py +63 -0
src/recap/inference/mi300x_client.py +48 -0
src/recap/inference/mock.py +22 -0
src/recap/ingestion/__init__.py +3 -0
src/recap/ingestion/fhir.py +225 -0
src/recap/ingestion/image.py +38 -0
src/recap/ingestion/pdf.py +22 -0
src/recap/models.py +47 -0
src/recap/prompts.py +25 -0
src/recap/reasoner.py +45 -0
src/recap/retrieval.py +53 -0
src/recap/timeline.py +17 -0
src/recap/ui/__init__.py +3 -0
src/recap/ui/timeline_view.py +77 -0
static/app.jsx +859 -0
static/index.html +40 -0
tests/__init__.py +0 -0
tests/fixtures/_make_tiny_pdf.py +19 -0
tests/fixtures/tiny_fhir.json +79 -0
tests/fixtures/tiny_lab.pdf +68 -0
tests/test_cases.py +129 -0
tests/test_inference_gateway.py +63 -0
tests/test_ingestion_fhir.py +75 -0
tests/test_ingestion_image.py +35 -0
tests/test_ingestion_pdf.py +19 -0
tests/test_mi300x_client.py +85 -0
tests/test_models.py +34 -0
tests/test_reasoner.py +63 -0
tests/test_retrieval.py +41 -0
tests/test_timeline.py +33 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+docs/
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.venv/
+venv/
+env/
+.env
+.env.local
+# Jupyter
+.ipynb_checkpoints/
+# Hugging Face / model caches
+.cache/
+*.pt
+*.bin
+*.safetensors
+hf_cache/
+# HF Space deploy artifact — generated by scripts/build_hf_readme.sh
+space/README.md
+# Data — generated bundles and Synthea outputs are not tracked.
+# Only manifest.json files and curated images are kept in the repo.
+data/cases/*/fhir.json
+data/cases/*/synthea-output/
+data/cases/*/docs/
+data/raw/
+data/cache/
+*.dcm
+*.svs
+# OS
+.DS_Store
+Thumbs.db
+# IDE
+.vscode/
+.idea/
+*.swp

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11-slim
+# System deps for pdf/image parsing.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      libxml2-dev libxslt1-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# HF Spaces runs as a non-root user — make sure caches go somewhere writable.
+ENV HF_HOME=/app/.cache/huggingface \
+    TRANSFORMERS_CACHE=/app/.cache/huggingface \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app/src
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Default to mock backend on HF until MI300X tunnel is configured via env var.
+ENV RECAP_BACKEND=mock
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,132 @@
 ---
 title: Recap
-emoji: 👀
-colorFrom: purple
 colorTo: purple
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Recap
+emoji: 🩺
+colorFrom: blue
 colorTo: purple
 sdk: docker
+app_port: 7860
 pinned: false
+license: mit
+short_description: Recap reads the whole chart so you don't have to.
 ---
+---
+title: Recap
+emoji: 🩺
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: false
+license: mit
+short_description: Recap reads the whole chart so you don't have to.
+---
+# Recap
+> *Reads the whole chart so you don't have to.*
+Drop in a patient's scattered medical records — lab PDFs, scans, photos, discharge summaries — and Recap gives you back two things:
+1. **A chronological timeline** of every event, color-coded by type
+2. **A chat box** where you can ask plain-language questions, with every answer cited to the exact source page or lab row
+No diagnosis. No treatment. Just *"read everything and answer questions about what's been read."*
+## The hackathon angle
+Recap is built for the [AMD x LabLab.ai Developer Hackathon](https://lablab.ai/ai-hackathons/amd-developer) (May 2026). The technical headline:
+> **The only GPU with enough memory to keep a patient's whole record co-resident with the reasoner.**
+The premium-mode backend runs **MedGemma-27B-MM** (medical multimodal specialist) and **Qwen-32B** (reasoning + multilingual orchestrator) **co-resident on a single AMD MI300X (192 GB HBM3)** along with cached imaging-foundation embeddings and a 128 K-token KV cache. Impossible on H100/A100 80 GB cards.
+The public Hugging Face Space runs a lite version (MedGemma-4B-MM on ZeroGPU H200) so anyone can try it.
+## Architecture
+```
+            ┌────────────── HF Space (Gradio) ──────────────┐
+            │  3 preloaded showcase patients                │
+            │  Plotly timeline + chat with citations        │
+            └────────────────┬─────────────────┬────────────┘
+                             │                 │
+                  ┌──────────┴──────┐ ┌────────┴───────────┐
+                  │ ZeroGPU (H200)  │ │ AMD MI300X (192GB) │
+                  │ MedGemma-4B-MM  │ │ MedGemma-27B-MM    │
+                  │ Always-on lite  │ │ + Qwen-32B reasoner│
+                  │                 │ │ + foundation cache │
+                  └─────────────────┘ └────────────────────┘
+```
+## Project structure
+```
+src/recap/
+├── config.py             # env-driven config
+├── models.py             # Event, Citation, Patient, Answer
+├── ingestion/
+│   ├── fhir.py           # Synthea bundles → events
+│   ├── pdf.py            # lab PDFs → page records
+│   └── image.py          # medical images → events
+├── timeline.py           # chronological event view (TBD)
+├── retrieval.py          # BM25 over events (TBD)
+├── inference/            # gateway routing zerogpu vs mi300x (TBD)
+├── reasoner.py           # two-stage MedGemma → Qwen (TBD)
+└── ui/                   # Gradio components (TBD)
+backend/                  # FastAPI on MI300X (TBD)
+data/cases/               # showcase patients (Synthea + curated images)
+scripts/                  # generators + smoke tests
+space/                    # HF Space deploy artifacts
+tests/                    # 13 passing unit tests
+```
+## Showcase cases
+Built from [Synthea](https://github.com/synthetichealth/synthea) (Apache 2.0 synthetic patient generator) paired with condition-matched public imaging:
+- **Sarah, 67** — kidney decline over 8 years (tests time-axis questions)
+- **Marcus, 54** — suspicious lump → cancer journey (tests multimodal grounding)
+- **Aisha, 29** — immigrant patient with foreign-language records (tests Qwen multilingual)
+## Running locally
+```bash
+uv venv .venv --python 3.11
+uv pip install --python .venv/bin/python -r requirements.txt
+.venv/bin/python -m pytest tests/ -v       # 13 passing
+.venv/bin/python app.py                    # local Gradio at :7860
+```
+Environment variables (all prefixed `RECAP_*`):
+| Var | Default | Meaning |
+|---|---|---|
+| `RECAP_BACKEND` | `zerogpu` | One of `zerogpu`, `mi300x`, `mock` |
+| `RECAP_MI300X_URL` | — | Premium-mode backend URL (set when the MI300X box is up) |
+| `RECAP_MEDGEMMA_LITE` | `google/medgemma-1.5-4b-it` | Public-Space model |
+| `RECAP_MEDGEMMA_PREMIUM` | `google/medgemma-27b-it` | MI300X model |
+| `RECAP_QWEN` | `Qwen/Qwen3.6-27B` | Reasoner model — latest dense Qwen (Apr 2026), matched 27B class to MedGemma. Fallbacks: `Qwen/Qwen3-32B`, `Qwen/Qwen3-14B`, `Qwen/Qwen3.6-35B-A3B` |
+## Hugging Face Space deployment
+The HF Space requires YAML frontmatter at the top of its README, which GitHub renders as an ugly metadata table. To keep the GitHub README clean and the HF README correct, the frontmatter lives in `space/header.md` and the deploy script assembles a combined `space/README.md` before pushing to the HF Space remote:
+```bash
+./scripts/build_hf_readme.sh                # writes space/README.md
+# then push space/README.md to the HF Space repo
+```
+## Tech stack
+- **Models:** Google MedGemma 1.5 (4B-MM lite, 27B-MM premium), Alibaba **Qwen 3.6-27B** (latest, released 2026-04-22)
+- **Serving:** vLLM-on-ROCm on MI300X, HF Transformers + ZeroGPU `@spaces.GPU` on the Space
+- **Frontend:** Gradio 4.44, Plotly
+- **Data:** Synthea synthetic FHIR + public CC0 imaging, packaged as an HF Dataset
+## Disclaimer
+**Not for clinical use.** Demo only. All patients are synthetic — no real PHI is touched, stored, or processed. The model card for MedGemma explicitly forbids unmodified clinical deployment.
+## License
+MIT (this repo). Upstream models retain their respective licenses (MedGemma → Google's Health AI Developer Foundations terms; Qwen → Tongyi Qianwen License).

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""Recap — FastAPI app entry point. Serves the React UI + JSON inference API.
+GET  /                  → static index.html (React via CDN, Babel-compiled JSX in browser)
+GET  /static/*          → static assets (app.jsx, css)
+GET  /api/patients      → list of patients with full event timelines
+POST /api/answer        → run the inference gateway and return a cited answer
+GET  /api/health        → liveness + backend selection
+"""
+from pathlib import Path
+from fastapi import FastAPI
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from recap.cases import load_case
+from recap.config import load as load_config
+from recap.demo_patient import build_demo_patient
+from recap.inference import answer as answer_question
+from recap.models import Patient
+CFG = load_config()
+ROOT = Path(__file__).parent
+STATIC_DIR = ROOT / "static"
+def _discover_cases() -> dict[str, Patient]:
+    cases: dict[str, Patient] = {}
+    cases_dir = Path(CFG.cases_dir)
+    if cases_dir.exists():
+        for d in sorted(cases_dir.iterdir()):
+            if (d / "manifest.json").exists():
+                try:
+                    cases[d.name] = load_case(CFG.cases_dir, d.name)
+                except Exception as e:  # noqa: BLE001 — keep one bad case from breaking the whole API
+                    print(f"[recap] failed to load case {d.name}: {e}")
+    if not cases:
+        cases["demo"] = build_demo_patient()
+    return cases
+PATIENTS: dict[str, Patient] = _discover_cases()
+app = FastAPI(title="Recap", version="0.1.0")
+app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+class AnswerRequest(BaseModel):
+    patient_id: str
+    question: str
+@app.get("/")
+def index() -> FileResponse:
+    return FileResponse(STATIC_DIR / "index.html")
+@app.get("/api/patients")
+def list_patients() -> JSONResponse:
+    """Serialize all loaded patients in a shape the React app expects."""
+    out = []
+    for pid, p in PATIENTS.items():
+        out.append({
+            "id": p.id,
+            "display_name": p.display_name,
+            "age": p.age,
+            "gender": p.gender,
+            "mrn": getattr(p, "mrn", None) or f"MRN-{abs(hash(p.id)) % 9999999:07d}",
+            "summary": _patient_summary(p),
+            "hook": _patient_hook(p),
+            "tags": _patient_tags(p),
+            "events": [_event_to_dict(e) for e in p.events],
+        })
+    return JSONResponse(out)
+@app.post("/api/answer")
+def answer(req: AnswerRequest) -> JSONResponse:
+    if req.patient_id not in PATIENTS:
+        return JSONResponse({"error": f"unknown patient {req.patient_id}"}, status_code=404)
+    p = PATIENTS[req.patient_id]
+    a = answer_question(req.question, p.events)
+    return JSONResponse({
+        "text": a.text,
+        "citations": [
+            {"source_id": c.source_id, "page": c.page, "snippet": c.snippet}
+            for c in a.citations
+        ],
+    })
+@app.get("/api/health")
+def health() -> JSONResponse:
+    return JSONResponse({
+        "ok": True,
+        "backend": CFG.backend,
+        "patient_count": len(PATIENTS),
+        "patient_ids": list(PATIENTS.keys()),
+    })
+# ─── Helpers ───────────────────────────────────────────────────────────
+def _event_to_dict(e) -> dict:
+    return {
+        "id": e.id,
+        "date": e.date.date().isoformat(),
+        "category": e.category,
+        "title": e.title,
+        "source": e.source,
+        "body": e.body,
+        "page": e.metadata.get("page"),
+        "snippet": e.metadata.get("snippet"),
+        "flag": e.metadata.get("flag"),
+    }
+def _patient_summary(p: Patient) -> str:
+    """One-sentence dossier summary. Real cases override via manifest.summary later."""
+    n = len(p.events)
+    years = sorted({e.date.year for e in p.events})
+    span = f"{years[0]}–{years[-1]}" if years else "no record"
+    return f"{n} clinical events on file from {span}."
+def _patient_hook(p: Patient) -> str:
+    return ""
+def _patient_tags(p: Patient) -> list[str]:
+    """Surface the most recent diagnosis titles as tag chips."""
+    dx = [e.title for e in sorted(p.events, key=lambda e: e.date, reverse=True) if e.category == "diagnosis"]
+    return dx[:3]
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

backend/__init__.py ADDED Viewed

File without changes

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# Backend deps for the MI300X premium-mode FastAPI server.
+# torch is installed separately on the droplet via the ROCm-flavored wheel:
+#   pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.2
+#
+# Everything below is ROCm-agnostic.
+fastapi==0.115.5
+uvicorn[standard]==0.32.0
+transformers==4.46.0
+accelerate==1.1.1
+pydantic==2.9.2
+sentencepiece==0.2.0
+protobuf==5.28.3

backend/serve.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""Loads MedGemma-27B-MM + Qwen-32B co-resident on a single AMD MI300X.
+Designed to run inside the FastAPI server on the droplet. Models are loaded
+lazily (first request triggers load) so the health endpoint is responsive
+even before the heavy weights touch GPU memory.
+"""
+from __future__ import annotations
+import os
+import time
+from threading import Lock
+from typing import Any
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForImageTextToText,
+    AutoProcessor,
+    AutoTokenizer,
+)
+MEDGEMMA_ID = os.getenv("MEDGEMMA_ID", "google/medgemma-27b-it")
+QWEN_ID = os.getenv("QWEN_ID", "Qwen/Qwen3.6-27B")
+DEVICE = "cuda:0"
+DTYPE = torch.bfloat16
+_state: dict[str, Any] = {"loaded": False}
+_lock = Lock()
+def _ensure_loaded() -> None:
+    """Load both models into GPU memory once. Idempotent + thread-safe."""
+    if _state["loaded"]:
+        return
+    with _lock:
+        if _state["loaded"]:  # double-checked
+            return
+        t0 = time.time()
+        print(f"[serve] loading MedGemma: {MEDGEMMA_ID}", flush=True)
+        _state["medgemma_proc"] = AutoProcessor.from_pretrained(MEDGEMMA_ID)
+        _state["medgemma"] = AutoModelForImageTextToText.from_pretrained(
+            MEDGEMMA_ID, torch_dtype=DTYPE, device_map=DEVICE,
+        )
+        torch.cuda.synchronize()
+        peak_after_mg = torch.cuda.max_memory_allocated() / 1e9
+        print(f"[serve] medgemma loaded in {time.time() - t0:.1f}s, peak {peak_after_mg:.1f} GB", flush=True)
+        t1 = time.time()
+        print(f"[serve] loading Qwen: {QWEN_ID}", flush=True)
+        _state["qwen_tok"] = AutoTokenizer.from_pretrained(QWEN_ID)
+        _state["qwen"] = AutoModelForCausalLM.from_pretrained(
+            QWEN_ID, torch_dtype=DTYPE, device_map=DEVICE,
+        )
+        torch.cuda.synchronize()
+        peak = torch.cuda.max_memory_allocated() / 1e9
+        print(f"[serve] qwen loaded in {time.time() - t1:.1f}s, total peak {peak:.1f} GB", flush=True)
+        _state["loaded"] = True
+        _state["peak_after_load_gb"] = peak
+def memory_stats() -> dict[str, float]:
+    if not torch.cuda.is_available():
+        return {"available": False}
+    return {
+        "available": True,
+        "allocated_gb": torch.cuda.memory_allocated() / 1e9,
+        "reserved_gb": torch.cuda.memory_reserved() / 1e9,
+        "total_gb": torch.cuda.get_device_properties(0).total_memory / 1e9,
+        "peak_after_load_gb": _state.get("peak_after_load_gb"),
+        "device_name": torch.cuda.get_device_name(0),
+    }
+def medgemma_extract(system: str, user: str, max_new_tokens: int = 384) -> str:
+    """First stage of the two-stage reasoner: read records, surface relevant findings."""
+    _ensure_loaded()
+    msgs = [
+        {"role": "system", "content": [{"type": "text", "text": system}]},
+        {"role": "user",   "content": [{"type": "text", "text": user}]},
+    ]
+    inputs = _state["medgemma_proc"].apply_chat_template(
+        msgs, add_generation_prompt=True, tokenize=True,
+        return_dict=True, return_tensors="pt",
+    ).to(DEVICE)
+    out = _state["medgemma"].generate(
+        **inputs, max_new_tokens=max_new_tokens, do_sample=False,
+    )
+    new_tokens = out[0][inputs["input_ids"].shape[-1]:]
+    return _state["medgemma_proc"].decode(new_tokens, skip_special_tokens=True)
+def qwen_synthesize(system: str, user: str, max_new_tokens: int = 512) -> str:
+    """Second stage: synthesize MedGemma's findings into the final cited answer."""
+    _ensure_loaded()
+    msgs = [
+        {"role": "system", "content": system},
+        {"role": "user",   "content": user},
+    ]
+    text = _state["qwen_tok"].apply_chat_template(
+        msgs, add_generation_prompt=True, tokenize=False,
+    )
+    inputs = _state["qwen_tok"](text, return_tensors="pt").to(DEVICE)
+    out = _state["qwen"].generate(
+        **inputs, max_new_tokens=max_new_tokens, do_sample=False,
+    )
+    new_tokens = out[0][inputs["input_ids"].shape[-1]:]
+    return _state["qwen_tok"].decode(new_tokens, skip_special_tokens=True)

backend/server.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Recap MI300X premium-mode backend. Runs on the AMD Developer Cloud droplet.
+Deploy:
+    cd backend
+    pip install -r requirements.txt
+    # ROCm torch installed separately on the droplet image.
+    uvicorn backend.server:app --host 0.0.0.0 --port 8080
+Then expose to the public Space via ngrok / cloudflared and set
+RECAP_MI300X_URL in the Space's env to the public URL.
+"""
+from __future__ import annotations
+import os
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from backend import serve
+EAGER_LOAD = os.getenv("RECAP_EAGER_LOAD", "1") == "1"
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    if EAGER_LOAD:
+        # Load models at startup so the first /medgemma request is fast.
+        # Set RECAP_EAGER_LOAD=0 if you want a fast boot for debugging.
+        try:
+            serve._ensure_loaded()
+        except Exception as e:  # noqa: BLE001 — defer the failure to first request
+            print(f"[server] eager load failed: {e}", flush=True)
+    yield
+app = FastAPI(title="Recap Premium Backend", version="0.1.0", lifespan=lifespan)
+class GenRequest(BaseModel):
+    system: str
+    user: str
+    max_new_tokens: int = 384
+class GenResponse(BaseModel):
+    text: str
+@app.post("/medgemma", response_model=GenResponse)
+def medgemma(req: GenRequest) -> GenResponse:
+    try:
+        text = serve.medgemma_extract(req.system, req.user, req.max_new_tokens)
+    except Exception as e:  # noqa: BLE001
+        raise HTTPException(status_code=500, detail=str(e)) from e
+    return GenResponse(text=text)
+@app.post("/qwen", response_model=GenResponse)
+def qwen(req: GenRequest) -> GenResponse:
+    try:
+        text = serve.qwen_synthesize(req.system, req.user, req.max_new_tokens)
+    except Exception as e:  # noqa: BLE001
+        raise HTTPException(status_code=500, detail=str(e)) from e
+    return GenResponse(text=text)
+@app.get("/health")
+def health() -> dict:
+    return {
+        "ok": True,
+        "loaded": serve._state.get("loaded", False),
+        "memory": serve.memory_stats(),
+        "models": {
+            "medgemma_id": serve.MEDGEMMA_ID,
+            "qwen_id": serve.QWEN_ID,
+        },
+    }

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "recap"
+version = "0.1.0"
+description = "Longitudinal patient-records copilot — recaps a patient's whole chart in seconds. Built for the AMD x LabLab.ai hackathon."
+requires-python = ">=3.11"
+license = "MIT"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["src"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi==0.115.5
+uvicorn[standard]==0.32.0
+transformers==4.46.0
+torch==2.5.1
+accelerate==1.1.1
+spaces==0.30.4
+pypdf==5.1.0
+Pillow==11.0.0
+pydantic==2.9.2
+fhir.resources==7.1.0
+httpx==0.27.2
+python-dateutil==2.9.0

scripts/build_hf_readme.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env bash
+# Assemble the HF-Space-bound README by prepending the HF YAML frontmatter
+# to the GitHub README. Outputs to space/README.md.
+#
+# Run before pushing to the HF Space remote.
+set -euo pipefail
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+HEADER="${REPO_ROOT}/space/header.md"
+BODY="${REPO_ROOT}/README.md"
+OUT="${REPO_ROOT}/space/README.md"
+cat "${HEADER}" "${BODY}" > "${OUT}"
+echo "Wrote ${OUT} ($(wc -l < "${OUT}") lines)"

scripts/deploy_hf_space.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""Deploy current working tree to the HF Space via HfApi.upload_folder.
+Bypasses git push (and macOS keychain credential issues). Uses the locally
+configured HF token. Run after committing locally if you want git history
+on GitHub to match what's on the Space.
+"""
+from __future__ import annotations
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from huggingface_hub import HfApi
+REPO_ID = "lablab-ai-amd-developer-hackathon/recap"
+ROOT = Path(__file__).resolve().parent.parent
+IGNORE = {
+    ".git", ".venv", "venv", "env",
+    "__pycache__", ".pytest_cache",
+    "docs", "node_modules",
+    ".DS_Store",
+}
+def _build_hf_readme(staging: Path) -> None:
+    """Concatenate space/header.md + README.md into staging/README.md."""
+    header = (ROOT / "space" / "header.md").read_text()
+    body = (ROOT / "README.md").read_text()
+    if body.startswith("---\n"):
+        end = body.find("\n---\n", 4)
+        body = body[end + 5:].lstrip() if end != -1 else body
+    (staging / "README.md").write_text(header + body)
+def _copy_to_staging(staging: Path) -> None:
+    for entry in ROOT.iterdir():
+        if entry.name in IGNORE:
+            continue
+        dst = staging / entry.name
+        if entry.is_dir():
+            shutil.copytree(entry, dst, ignore=shutil.ignore_patterns(*IGNORE))
+        else:
+            shutil.copy2(entry, dst)
+def main() -> None:
+    api = HfApi()
+    with tempfile.TemporaryDirectory() as tmp:
+        staging = Path(tmp)
+        _copy_to_staging(staging)
+        _build_hf_readme(staging)
+        rev_short = subprocess.run(
+            ["git", "rev-parse", "--short", "HEAD"],
+            cwd=ROOT, capture_output=True, text=True, check=False,
+        ).stdout.strip()
+        commit_msg = f"deploy: sync from {rev_short or 'local'}"
+        print(f"Uploading {len(list(staging.rglob('*')))} entries to {REPO_ID}…")
+        api.upload_folder(
+            folder_path=str(staging),
+            repo_id=REPO_ID,
+            repo_type="space",
+            commit_message=commit_msg,
+        )
+    print(f"Done. https://huggingface.co/spaces/{REPO_ID}")
+if __name__ == "__main__":
+    main()

scripts/deploy_hf_space.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# Deploy current master to the HF Space, swapping README.md for the
+# frontmatter-prefixed version. master stays clean for GitHub.
+#
+# Usage:  ./scripts/deploy_hf_space.sh
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "${REPO_ROOT}"
+SPACE_REMOTE="space"
+SPACE_URL="https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/recap"
+git remote get-url "${SPACE_REMOTE}" >/dev/null 2>&1 || git remote add "${SPACE_REMOTE}" "${SPACE_URL}"
+./scripts/build_hf_readme.sh
+CURRENT_BRANCH="$(git symbolic-ref --short HEAD)"
+git checkout -B hf-deploy
+cp space/README.md README.md
+git add README.md
+git commit -q -m "deploy: hf space readme with sdk frontmatter" || echo "no readme change"
+git push -f "${SPACE_REMOTE}" hf-deploy:master
+git checkout "${CURRENT_BRANCH}"
+git checkout -- README.md
+echo
+echo "Pushed to ${SPACE_URL}"
+echo "Watch the build at: ${SPACE_URL}?logs=build"

scripts/generate_synthea_case.sh ADDED Viewed

	@@ -0,0 +1,49 @@

+#!/usr/bin/env bash
+# Generate a Synthea synthetic patient FHIR bundle for a showcase case.
+#
+# Usage:
+#   ./scripts/generate_synthea_case.sh <case_name> [seed]
+#
+# Examples:
+#   ./scripts/generate_synthea_case.sh sarah 3923
+#   ./scripts/generate_synthea_case.sh marcus 7711
+#
+# Requires: java 11+, internet access for the first run.
+set -euo pipefail
+CASE_NAME="${1:?Usage: $0 <case_name> [seed]}"
+SEED="${2:-3923}"
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+OUT_DIR="${REPO_ROOT}/data/cases/${CASE_NAME}"
+WORK_DIR="${OUT_DIR}/synthea-output"
+JAR_URL="https://github.com/synthetichealth/synthea/releases/download/v3.3.0/synthea-with-dependencies.jar"
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
+if [ ! -f synthea-with-dependencies.jar ]; then
+  echo "Downloading Synthea jar..."
+  curl -fL -o synthea-with-dependencies.jar "${JAR_URL}"
+fi
+echo "Generating patient with seed ${SEED}..."
+java -jar synthea-with-dependencies.jar \
+  -p 1 -s "${SEED}" \
+  --exporter.fhir.export=true \
+  --exporter.csv.export=false \
+  --generate.only_alive_patients=true \
+  --exporter.years_of_history=10 \
+  Massachusetts \
+  -a 60-75 -g F
+# The first FHIR bundle file is the patient bundle (others are hospital/practitioner).
+PATIENT_BUNDLE="$(ls output/fhir/*.json | grep -v 'hospitalInformation\|practitionerInformation' | head -1)"
+if [ -z "${PATIENT_BUNDLE}" ]; then
+  echo "ERROR: no patient bundle produced. Check Synthea output above." >&2
+  exit 1
+fi
+cp "${PATIENT_BUNDLE}" "${OUT_DIR}/fhir.json"
+echo "Wrote ${OUT_DIR}/fhir.json"

space/header.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Recap
+emoji: 🩺
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: false
+license: mit
+short_description: Recap reads the whole chart so you don't have to.
+---

src/recap/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "0.1.0"

src/recap/cases.py ADDED Viewed

	@@ -0,0 +1,93 @@

+"""Load a Patient from a case directory (manifest.json + bundles + docs + images).
+A case directory looks like:
+    data/cases/<case_id>/
+    ├── manifest.json        # required
+    ├── fhir.json            # optional Synthea bundle
+    ├── docs/                # optional PDF lab/discharge docs
+    │   └── lab_2022.pdf
+    └── images/              # optional scans/photos
+        └── fundus.png
+If a FHIR bundle is present, the patient's display name, age, and gender
+are pulled from it automatically — manifest can omit `display_name`.
+"""
+import json
+from datetime import datetime
+from pathlib import Path
+from recap.ingestion.fhir import load_bundle, load_demographics
+from recap.ingestion.image import load_image_event
+from recap.ingestion.pdf import load_pdf
+from recap.models import Event, Patient
+def _normalize_date(s: str) -> str:
+    if "T" not in s:
+        s = f"{s}T00:00:00+00:00"
+    if s.endswith("Z"):
+        s = s[:-1] + "+00:00"
+    return s
+def _events_from_pdf(case_dir: Path, doc: dict) -> list[Event]:
+    file = doc["file"]
+    src = Path(file).name
+    pages = load_pdf(str(case_dir / file), source_id=src)
+    date = datetime.fromisoformat(_normalize_date(doc["date"]))
+    return [
+        Event(
+            id=f"pdf-{src}-p{p.page_number}",
+            date=date,
+            category=doc.get("category", "note"),
+            title=doc.get("title", src),
+            source=src,
+            body=p.text,
+            metadata={"page": p.page_number},
+        )
+        for p in pages
+    ]
+def load_case(cases_dir: str, case_id: str) -> Patient:
+    base = Path(cases_dir) / case_id
+    manifest = json.loads((base / "manifest.json").read_text())
+    events: list[Event] = []
+    # Default demographics from manifest (used as fallback or override).
+    display_name = manifest.get("display_name")
+    age: int | None = manifest.get("age")
+    gender: str | None = manifest.get("gender")
+    if manifest.get("fhir_bundle"):
+        bundle_path = str(base / manifest["fhir_bundle"])
+        events.extend(load_bundle(bundle_path, source_id=manifest["fhir_bundle"]))
+        # Pull demographics from FHIR Patient resource — manifest values, if any, win.
+        demo = load_demographics(bundle_path)
+        if demo is not None:
+            display_name = display_name or demo.display_name
+            age = age if age is not None else demo.age
+            gender = gender or demo.gender
+    for doc in manifest.get("docs", []):
+        events.extend(_events_from_pdf(base, doc))
+    for img in manifest.get("images", []):
+        events.append(load_image_event(
+            str(base / img["file"]),
+            category=img.get("category", "scan"),
+            title=img.get("title", img["file"]),
+            date_iso=img["date"],
+            source_id=Path(img["file"]).name,
+        ))
+    return Patient(
+        id=manifest["id"],
+        display_name=display_name or manifest["id"],  # final fallback: case_id
+        age=age,
+        gender=gender,
+        events=events,
+    )

src/recap/config.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class Config:
+    backend: str  # "zerogpu" | "mi300x" | "mock"
+    mi300x_url: str
+    medgemma_lite_id: str
+    medgemma_premium_id: str
+    qwen_id: str
+    cases_dir: str
+def load() -> Config:
+    return Config(
+        backend=os.getenv("RECAP_BACKEND", "zerogpu"),
+        mi300x_url=os.getenv("RECAP_MI300X_URL", ""),
+        medgemma_lite_id=os.getenv("RECAP_MEDGEMMA_LITE", "google/medgemma-1.5-4b-it"),
+        medgemma_premium_id=os.getenv("RECAP_MEDGEMMA_PREMIUM", "google/medgemma-27b-it"),
+        qwen_id=os.getenv("RECAP_QWEN", "Qwen/Qwen3.6-27B"),
+        cases_dir=os.getenv("RECAP_CASES_DIR", "data/cases"),
+    )

src/recap/demo_patient.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""Synthetic Sarah Johnson — CKD progression over 8 years, generated in memory.
+Used so the UI is functional before real Synthea data is curated. Once
+`data/cases/sarah/` exists with a manifest, the case loader takes over
+and this is no longer used.
+"""
+from datetime import datetime, timedelta, timezone
+from recap.models import Event, Patient
+def _ev(eid: str, date: datetime, category: str, title: str, source: str, body: str = "") -> Event:
+    return Event(
+        id=eid,
+        date=date,
+        category=category,
+        title=title,
+        source=source,
+        body=body or title,
+    )
+def build_demo_patient() -> Patient:
+    """Build a richly-populated synthetic patient for UI demo purposes."""
+    base = datetime(2017, 1, 15, tzinfo=timezone.utc)
+    events: list[Event] = []
+    # Initial diagnosis (2017): T2DM
+    events.append(_ev("dx-1", base, "diagnosis", "Type 2 diabetes mellitus",
+                      "fhir.json", "Diagnosis: Type 2 diabetes mellitus, newly identified"))
+    events.append(_ev("v-1", base, "visit", "Annual physical exam", "fhir.json"))
+    events.append(_ev("med-1", base + timedelta(days=2), "med", "Metformin 500mg BID",
+                      "fhir.json", "Prescribed: Metformin 500mg twice daily"))
+    # Year 1-3: stable, occasional labs
+    cr_values = [0.9, 0.95, 1.0, 1.0, 1.05, 1.1]  # creatinine slowly rising
+    a1c_values = [7.4, 7.2, 7.0, 7.1, 6.9, 7.3]
+    for i, (cr, a1c) in enumerate(zip(cr_values, a1c_values)):
+        d = base + timedelta(days=180 * (i + 1))
+        events.append(_ev(f"lab-cr-{i}", d, "lab", f"Creatinine: {cr} mg/dL",
+                          f"lab_{d.date()}.pdf",
+                          f"Creatinine value: {cr} mg/dL (Reference: 0.6-1.2)"))
+        events.append(_ev(f"lab-a1c-{i}", d, "lab", f"HbA1c: {a1c}%",
+                          f"lab_{d.date()}.pdf",
+                          f"HbA1c value: {a1c}% (Target: <7.0)"))
+    # Year 4 (2021): first abnormal Cr — kidney decline begins
+    decline_start = datetime(2021, 3, 14, tzinfo=timezone.utc)
+    events.append(_ev("lab-cr-abnormal", decline_start, "lab", "Creatinine: 1.4 mg/dL (high)",
+                      f"lab_{decline_start.date()}.pdf",
+                      "Creatinine value: 1.4 mg/dL (FIRST abnormal — reference 0.6-1.2)"))
+    events.append(_ev("lab-egfr-abnormal", decline_start, "lab", "eGFR: 52 mL/min/1.73m²",
+                      f"lab_{decline_start.date()}.pdf",
+                      "eGFR value: 52 (low — stage 3 CKD threshold)"))
+    events.append(_ev("rep-cmp-1", decline_start, "report",
+                      "Comprehensive metabolic panel",
+                      f"lab_{decline_start.date()}.pdf",
+                      "Mildly elevated creatinine consistent with stage 3 CKD."))
+    # Nephrology referral
+    nephro = decline_start + timedelta(days=45)
+    events.append(_ev("v-nephro-1", nephro, "visit", "Nephrology consultation",
+                      "fhir.json", "Referred for evaluation of declining renal function."))
+    events.append(_ev("dx-ckd", nephro, "diagnosis", "Chronic kidney disease, stage 3",
+                      "fhir.json", "Diagnosis: CKD stage 3, likely diabetic nephropathy."))
+    events.append(_ev("med-ace", nephro + timedelta(days=2), "med", "Lisinopril 10mg daily",
+                      "fhir.json", "Prescribed: Lisinopril 10mg for renal protection."))
+    # Renal ultrasound
+    us = nephro + timedelta(days=10)
+    events.append(_ev("proc-us", us, "procedure", "Renal ultrasound",
+                      "fhir.json", "Bilateral kidneys imaged."))
+    events.append(_ev("scan-us", us, "scan", "Renal ultrasound (bilateral)",
+                      "kidney_us_2021.png",
+                      "Imaging: bilateral renal cortices mildly thinned, no obstruction."))
+    # Year 5 (2022): continued decline
+    cr_2022 = [1.5, 1.6, 1.55]
+    for i, cr in enumerate(cr_2022):
+        d = datetime(2022, 3 + i * 4, 1, tzinfo=timezone.utc)
+        events.append(_ev(f"lab-cr-22-{i}", d, "lab", f"Creatinine: {cr} mg/dL",
+                          f"lab_{d.date()}.pdf",
+                          f"Creatinine value: {cr} mg/dL (continued elevation)"))
+    # Diabetic retinopathy screening (2023)
+    eye = datetime(2023, 4, 1, tzinfo=timezone.utc)
+    events.append(_ev("v-ophth-1", eye, "visit", "Diabetic retinopathy screening", "fhir.json"))
+    events.append(_ev("scan-fundus", eye, "scan", "Right fundus photograph",
+                      "fundus_2023.png",
+                      "Mild non-proliferative diabetic retinopathy in right eye."))
+    events.append(_ev("dx-dr", eye, "diagnosis", "Mild non-proliferative diabetic retinopathy",
+                      "fhir.json", "Diagnosis: NPDR, mild — annual follow-up."))
+    # Recent (2024-2025): stable on lisinopril
+    for i, cr in enumerate([1.6, 1.55, 1.6, 1.7]):
+        d = datetime(2024, 3 + i * 3, 1, tzinfo=timezone.utc)
+        events.append(_ev(f"lab-cr-24-{i}", d, "lab", f"Creatinine: {cr} mg/dL",
+                          f"lab_{d.date()}.pdf"))
+    return Patient(
+        id="demo",
+        display_name="Sarah Johnson, 67 (demo)",
+        age=67,
+        gender="female",
+        events=events,
+    )

src/recap/inference/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from recap.inference.gateway import answer
2	+
3	+ __all__ = ["answer"]

src/recap/inference/gateway.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""Gateway: question in, cited Answer out.
+This is the only place the rest of the codebase talks to. The UI calls
+`answer(...)`; the gateway handles retrieval, prompt assembly, backend
+routing, and citation parsing. Backends (mock/zerogpu/mi300x) are imported
+lazily so importing this module doesn't drag in torch on a CPU laptop.
+"""
+import re
+from recap.config import load
+from recap.models import Answer, Citation, Event
+from recap.prompts import PATIENT_QA_SYSTEM, build_user_prompt
+from recap.retrieval import retrieve
+_CITATION_RE = re.compile(r"\[src:([^\]#]+)(?:#p(\d+))?\]")
+def answer(question: str, events: list[Event], top_k: int = 6) -> Answer:
+    """Run the full question→retrieved→generated→cited pipeline."""
+    cfg = load()
+    retrieved = retrieve(question, events, top_k=top_k)
+    user_prompt = build_user_prompt(question, retrieved)
+    text = _generate(cfg.backend, PATIENT_QA_SYSTEM, user_prompt)
+    citations = _parse_citations(text, retrieved)
+    return Answer(text=text, citations=citations)
+def _generate(backend: str, system: str, user: str) -> str:
+    if backend == "mi300x":
+        from recap.inference.mi300x_client import generate_premium
+        return generate_premium(system=system, user=user)
+    if backend == "mock":
+        from recap.inference.mock import generate_mock
+        return generate_mock(system=system, user=user)
+    # default: zerogpu
+    from recap.inference.zerogpu import generate_lite
+    return generate_lite(system=system, user=user)
+def _parse_citations(text: str, retrieved: list[Event]) -> list[Citation]:
+    """Extract `[src:foo#p2]` markers and resolve each to a Citation.
+    Drops citations to sources that weren't in the retrieved set (defensive
+    against the model hallucinating a source name it never saw).
+    """
+    by_source: dict[str, Event] = {e.source: e for e in retrieved}
+    seen: set[tuple[str, int | None]] = set()
+    out: list[Citation] = []
+    for m in _CITATION_RE.finditer(text):
+        src = m.group(1)
+        page = int(m.group(2)) if m.group(2) else None
+        if src not in by_source:
+            continue
+        key = (src, page)
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(Citation(source_id=src, page=page, snippet=by_source[src].title))
+    return out

src/recap/inference/mi300x_client.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from __future__ import annotations
+import time
+import httpx
+from recap.config import load
+from recap.reasoner import two_stage
+def _post(endpoint: str, system: str, user: str, *, timeout: float = 180.0) -> str:
+    cfg = load()
+    if not cfg.mi300x_url:
+        raise RuntimeError(
+            "RECAP_MI300X_URL is not set. Point it at the backend, e.g. "
+            "RECAP_MI300X_URL=https://abc-123.ngrok-free.app"
+        )
+    url = f"{cfg.mi300x_url.rstrip('/')}/{endpoint}"
+    payload = {"system": system, "user": user}
+    last_err: Exception | None = None
+    for attempt in range(3):
+        try:
+            r = httpx.post(url, json=payload, timeout=timeout)
+            r.raise_for_status()
+            return r.json()["text"]
+        except (httpx.HTTPStatusError, httpx.TransportError) as e:
+            last_err = e
+            if attempt < 2:
+                time.sleep(1.5 ** attempt)
+    raise RuntimeError(f"MI300X backend call failed after 3 attempts: {last_err}")
+def generate_premium(system: str, user: str) -> str:
+    if "Question:" in user:
+        block, question = user.rsplit("Question:", 1)
+        retrieved_block = block.strip()
+        question = question.strip()
+    else:
+        retrieved_block = user
+        question = "Summarize what's in these records."
+    return two_stage(
+        question,
+        retrieved_block,
+        extract_fn=lambda s, u: _post("medgemma", s, u),
+        synthesize_fn=lambda s, u: _post("qwen", s, u),
+    )

src/recap/inference/mock.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Mock backend for CPU-only local dev. Returns canned text without loading a model."""
+import re
+def generate_mock(system: str, user: str) -> str:
+    """Pretend-answer that always cites the first source it sees in the user prompt."""
+    m = re.search(r"\[src:([^\]]+)\]", user)
+    src = m.group(1) if m else "unknown.pdf"
+    # Try to surface the first event date and title for a slightly more useful demo string.
+    date_match = re.search(r"(\d{4}-\d{2}-\d{2})", user)
+    snippet_match = re.search(r"\[src:[^\]]+\][^\n]*?— (.+?)(?:\n|$)", user)
+    date_str = date_match.group(1) if date_match else "an unknown date"
+    snippet = snippet_match.group(1).strip() if snippet_match else "a record"
+    return (
+        f"[mock answer] Based on the records, the earliest relevant signal "
+        f"appears on {date_str}: {snippet} [src:{src}]. "
+        f"Set RECAP_BACKEND=zerogpu or mi300x for real inference."
+    )

src/recap/ingestion/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from recap.ingestion.fhir import load_bundle as load_fhir_bundle
2	+
3	+ __all__ = ["load_fhir_bundle"]

src/recap/ingestion/fhir.py ADDED Viewed

	@@ -0,0 +1,225 @@

+"""Parse Synthea-style FHIR bundles into demographics + chronological Events.
+Handles these FHIR resource types:
+- Patient            → demographics (name, age, gender)
+- Observation        → "lab" events
+- Encounter          → "visit" events
+- MedicationRequest  → "med" events
+- Condition          → "diagnosis" events
+- Procedure          → "procedure" events
+- DiagnosticReport   → "report" events
+Other Synthea-emitted types (Claim, ExplanationOfBenefit, CarePlan, Goal,
+Immunization, AllergyIntolerance) are ignored for now — they're either
+financial (no clinical value to the demo) or low-signal compared to the
+above. We can add them if a showcase question needs them.
+"""
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from recap.models import Event
+_TRAILING_DIGITS_RE = re.compile(r"\d+$")
+@dataclass
+class Demographics:
+    display_name: str
+    age: int | None
+    gender: str | None  # "male" | "female" | "other"
+def _parse_date(s: str) -> datetime:
+    if "T" not in s:
+        s = f"{s}T00:00:00+00:00"
+    if s.endswith("Z"):
+        s = s[:-1] + "+00:00"
+    return datetime.fromisoformat(s)
+def _strip_synthea_digits(s: str) -> str:
+    """Synthea suffixes names with digits (e.g. 'Sarah123 Smith45') so they look fake."""
+    return _TRAILING_DIGITS_RE.sub("", s)
+def _compute_age(birth_date: str, as_of: datetime | None = None) -> int | None:
+    try:
+        bd = _parse_date(birth_date)
+    except Exception:
+        return None
+    now = as_of or datetime.now(timezone.utc)
+    years = now.year - bd.year - ((now.month, now.day) < (bd.month, bd.day))
+    return max(years, 0)
+def _patient_to_demographics(r: dict) -> Demographics:
+    names = r.get("name") or []
+    family = ""
+    given = ""
+    if names:
+        family = _strip_synthea_digits(names[0].get("family", ""))
+        givens = names[0].get("given") or []
+        if givens:
+            given = _strip_synthea_digits(givens[0])
+    full_name = f"{given} {family}".strip() or "Patient"
+    age = _compute_age(r["birthDate"]) if r.get("birthDate") else None
+    display = f"{full_name}, {age}" if age is not None else full_name
+    return Demographics(display_name=display, age=age, gender=r.get("gender"))
+def _observation_to_event(r: dict, source_id: str) -> Event | None:
+    code = r.get("code", {}).get("text") or ""
+    value = r.get("valueQuantity", {})
+    v_str = ""
+    if value:
+        v_str = f"{value.get('value')} {value.get('unit', '')}".strip()
+    date_str = r.get("effectiveDateTime") or r.get("issued")
+    if not date_str:
+        return None
+    rid = r.get("id", "")
+    title = f"{code}: {v_str}".strip(": ") if v_str else code or "Observation"
+    return Event(
+        id=f"obs-{rid}",
+        date=_parse_date(date_str),
+        category="lab",
+        title=title,
+        source=source_id,
+        body=f"{code} value: {v_str}" if v_str else code,
+        metadata={"resource_id": rid},
+    )
+def _encounter_to_event(r: dict, source_id: str) -> Event | None:
+    reasons = r.get("reasonCode") or []
+    reason = reasons[0].get("text", "Encounter") if reasons else "Encounter"
+    start = r.get("period", {}).get("start")
+    if not start:
+        return None
+    rid = r.get("id", "")
+    return Event(
+        id=f"enc-{rid}",
+        date=_parse_date(start),
+        category="visit",
+        title=reason,
+        source=source_id,
+        body=reason,
+        metadata={"resource_id": rid},
+    )
+def _medication_to_event(r: dict, source_id: str) -> Event | None:
+    med = r.get("medicationCodeableConcept", {}).get("text", "Medication")
+    authored = r.get("authoredOn")
+    if not authored:
+        return None
+    rid = r.get("id", "")
+    return Event(
+        id=f"med-{rid}",
+        date=_parse_date(authored),
+        category="med",
+        title=med,
+        source=source_id,
+        body=f"Prescribed: {med}",
+        metadata={"resource_id": rid},
+    )
+def _condition_to_event(r: dict, source_id: str) -> Event | None:
+    name = r.get("code", {}).get("text", "Condition")
+    date_str = r.get("onsetDateTime") or r.get("recordedDate")
+    if not date_str:
+        return None
+    rid = r.get("id", "")
+    clinical = r.get("clinicalStatus", {}).get("coding", [{}])[0].get("code", "")
+    return Event(
+        id=f"cond-{rid}",
+        date=_parse_date(date_str),
+        category="diagnosis",
+        title=name,
+        source=source_id,
+        body=f"Diagnosis: {name}" + (f" (status: {clinical})" if clinical else ""),
+        metadata={"resource_id": rid, "clinical_status": clinical},
+    )
+def _procedure_to_event(r: dict, source_id: str) -> Event | None:
+    name = r.get("code", {}).get("text", "Procedure")
+    perf = r.get("performedDateTime") or r.get("performedPeriod", {}).get("start")
+    if not perf:
+        return None
+    rid = r.get("id", "")
+    return Event(
+        id=f"proc-{rid}",
+        date=_parse_date(perf),
+        category="procedure",
+        title=name,
+        source=source_id,
+        body=f"Procedure: {name}",
+        metadata={"resource_id": rid},
+    )
+def _diagnostic_report_to_event(r: dict, source_id: str) -> Event | None:
+    name = r.get("code", {}).get("text", "Report")
+    date_str = r.get("effectiveDateTime") or r.get("issued")
+    if not date_str:
+        return None
+    rid = r.get("id", "")
+    conclusion = r.get("conclusion", "")
+    return Event(
+        id=f"rep-{rid}",
+        date=_parse_date(date_str),
+        category="report",
+        title=name,
+        source=source_id,
+        body=f"{name}. {conclusion}".strip("."),
+        metadata={"resource_id": rid},
+    )
+_DISPATCH = {
+    "Observation": _observation_to_event,
+    "Encounter": _encounter_to_event,
+    "MedicationRequest": _medication_to_event,
+    "Condition": _condition_to_event,
+    "Procedure": _procedure_to_event,
+    "DiagnosticReport": _diagnostic_report_to_event,
+}
+def _iter_resources(bundle: dict):
+    for entry in bundle.get("entry", []):
+        r = entry.get("resource", {})
+        yield r.get("resourceType"), r
+def load_bundle(path: str, source_id: str) -> list[Event]:
+    """Parse a FHIR Bundle and return Events for known clinical resource types."""
+    with Path(path).open() as f:
+        bundle = json.load(f)
+    events: list[Event] = []
+    for rtype, r in _iter_resources(bundle):
+        handler = _DISPATCH.get(rtype)
+        if handler is None:
+            continue
+        ev = handler(r, source_id)
+        if ev is not None:
+            events.append(ev)
+    return events
+def load_demographics(path: str) -> Demographics | None:
+    """Extract Patient demographics from a FHIR Bundle. Returns None if no Patient resource."""
+    with Path(path).open() as f:
+        bundle = json.load(f)
+    for rtype, r in _iter_resources(bundle):
+        if rtype == "Patient":
+            return _patient_to_demographics(r)
+    return None

src/recap/ingestion/image.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Wrap medical images as Events with caller-provided date and category.
+We do not auto-extract dates from EXIF — clinical workflow requires curation.
+The caller (case manifest, upload handler) provides the date explicitly.
+"""
+from datetime import datetime
+from pathlib import Path
+from recap.models import Event, EventCategory
+def _parse_date(s: str) -> datetime:
+    if "T" not in s:
+        s = f"{s}T00:00:00+00:00"
+    if s.endswith("Z"):
+        s = s[:-1] + "+00:00"
+    return datetime.fromisoformat(s)
+def load_image_event(
+    path: str,
+    *,
+    category: EventCategory,
+    title: str,
+    date_iso: str,
+    source_id: str | None = None,
+) -> Event:
+    src = source_id or Path(path).name
+    return Event(
+        id=f"img-{src}",
+        date=_parse_date(date_iso),
+        category=category,
+        title=title,
+        source=src,
+        body=f"Image: {title}",
+        metadata={"path": path},
+    )

src/recap/ingestion/pdf.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""Parse PDFs into per-page records with source metadata for citation grounding."""
+from dataclasses import dataclass
+from pathlib import Path
+from pypdf import PdfReader
+@dataclass
+class PdfPage:
+    source_id: str
+    page_number: int  # 1-indexed
+    text: str
+def load_pdf(path: str, source_id: str | None = None) -> list[PdfPage]:
+    src = source_id or Path(path).name
+    reader = PdfReader(path)
+    return [
+        PdfPage(source_id=src, page_number=i, text=page.extract_text() or "")
+        for i, page in enumerate(reader.pages, start=1)
+    ]

src/recap/models.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from datetime import datetime
+from typing import Literal
+from pydantic import BaseModel, Field
+EventCategory = Literal[
+    "lab",
+    "visit",
+    "scan",
+    "med",
+    "note",
+    "photo",
+    "diagnosis",
+    "procedure",
+    "report",
+    "other",
+]
+class Citation(BaseModel):
+    source_id: str
+    page: int | None = None
+    snippet: str | None = None
+    region: tuple[float, float, float, float] | None = None
+class Event(BaseModel):
+    id: str
+    date: datetime
+    category: EventCategory
+    title: str
+    source: str
+    body: str = ""
+    metadata: dict = Field(default_factory=dict)
+class Patient(BaseModel):
+    id: str
+    display_name: str
+    age: int | None = None
+    gender: str | None = None  # "male" | "female" | "other"
+    events: list[Event] = Field(default_factory=list)
+class Answer(BaseModel):
+    text: str
+    citations: list[Citation] = Field(default_factory=list)

src/recap/prompts.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""System prompts and prompt builders. Centralized for easy tuning."""
+PATIENT_QA_SYSTEM = """You are a careful medical reading assistant. You have access to a patient's records (labs, visits, medications, scans). When asked a question:
+1. Cite the exact source for every claim using the format [src:<source_id>#p<page>] or [src:<source_id>] if no page.
+2. If the answer is not in the provided records, say so explicitly.
+3. Never speculate beyond what the records show.
+4. Never give medical advice or recommend treatment changes.
+Output format:
+- A direct answer in 2-4 sentences with inline citations.
+- Then a bullet list of the cited records you relied on.
+"""
+def build_user_prompt(question: str, retrieved_events: list) -> str:
+    """Render retrieved events into the user-turn prompt."""
+    lines = ["Patient records (most relevant first):", ""]
+    for e in retrieved_events:
+        lines.append(f"- [src:{e.source}] {e.date.date().isoformat()} — {e.title}")
+        if e.body and e.body != e.title:
+            lines.append(f"  {e.body}")
+    lines.append("")
+    lines.append(f"Question: {question}")
+    return "\n".join(lines)

src/recap/reasoner.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Two-stage reasoner: MedGemma extracts evidence, Qwen writes the answer."""
+from __future__ import annotations
+from typing import Callable, Protocol
+EXTRACT_SYSTEM = (
+    "You are a medical evidence extractor. Given a patient's records and a "
+    "question, identify the most relevant data points and quote them verbatim. "
+    "Always include the source citation in [src:source_id] or "
+    "[src:source_id#p<page>] format. Do not synthesize, interpret, or speculate "
+    "— extract only."
+)
+SYNTHESIZE_SYSTEM = (
+    "You are a careful medical reading assistant. You will be given:\n"
+    "1. A user question\n"
+    "2. Evidence extracted from the patient's records, with citations\n\n"
+    "Synthesize a 2-4 sentence answer using only the evidence. Preserve every "
+    "[src:...] citation exactly as given. If the evidence is insufficient, say "
+    "so. Never give medical advice or recommend treatment changes."
+)
+class GenerateFn(Protocol):
+    def __call__(self, system: str, user: str) -> str: ...
+def two_stage(
+    question: str,
+    retrieved_block: str,
+    *,
+    extract_fn: GenerateFn,
+    synthesize_fn: GenerateFn,
+) -> str:
+    extract_user = (
+        f"Patient records:\n{retrieved_block}\n\n"
+        f"Question: {question}\n\n"
+        "Extract the most relevant data points with citations:"
+    )
+    evidence = extract_fn(EXTRACT_SYSTEM, extract_user).strip()
+    synth_user = f"Question: {question}\n\nEvidence:\n{evidence}\n\nAnswer:"
+    return synthesize_fn(SYNTHESIZE_SYSTEM, synth_user).strip()

src/recap/retrieval.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""BM25 retrieval over patient events. No external deps."""
+import re
+from collections import Counter
+from math import log
+from recap.models import Event
+def _tokenize(text: str) -> list[str]:
+    return re.findall(r"[A-Za-z0-9]+", text.lower())
+def retrieve(query: str, events: list[Event], top_k: int = 5) -> list[Event]:
+    """Rank events by BM25 over title+body.
+    On no-match, falls back to the first `top_k` events so the caller always
+    gets something to send to the LLM rather than an empty context.
+    """
+    if not events:
+        return []
+    query_tokens = _tokenize(query)
+    if not query_tokens:
+        return events[:top_k]
+    docs = [_tokenize(f"{e.title} {e.body}") for e in events]
+    avgdl = sum(len(d) for d in docs) / len(docs)
+    df: Counter = Counter()
+    for d in docs:
+        for tok in set(d):
+            df[tok] += 1
+    n = len(docs)
+    k1, b = 1.5, 0.75
+    scores: list[tuple[float, int]] = []
+    for i, d in enumerate(docs):
+        score = 0.0
+        tf = Counter(d)
+        for q in query_tokens:
+            if q not in tf:
+                continue
+            idf = log((n - df[q] + 0.5) / (df[q] + 0.5) + 1)
+            num = tf[q] * (k1 + 1)
+            den = tf[q] + k1 * (1 - b + b * len(d) / max(avgdl, 1))
+            score += idf * num / den
+        scores.append((score, i))
+    scores.sort(reverse=True)
+    ranked = [events[i] for s, i in scores[:top_k] if s > 0]
+    return ranked or events[:top_k]

src/recap/timeline.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""Build a chronological timeline view of a patient's events."""
+from dataclasses import dataclass
+from recap.models import Event
+@dataclass
+class Timeline:
+    events: list[Event]
+    years_covered: list[int]
+def build_timeline(events: list[Event]) -> Timeline:
+    sorted_events = sorted(events, key=lambda e: e.date)
+    years = sorted({e.date.year for e in sorted_events})
+    return Timeline(events=sorted_events, years_covered=years)

src/recap/ui/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from recap.ui.timeline_view import build_timeline_figure
2	+
3	+ __all__ = ["build_timeline_figure"]

src/recap/ui/timeline_view.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Render a Patient's events as an interactive Plotly timeline.
+X-axis is time, Y-axis groups events by category (lab, visit, scan, …).
+Hovering a marker shows the event title and source. Clicking is wired
+up later to scroll the chat to the relevant citation.
+"""
+import pandas as pd
+import plotly.express as px
+from recap.models import Patient
+# Stable category order — controls the y-axis lane positions.
+CATEGORY_ORDER = [
+    "diagnosis",
+    "visit",
+    "lab",
+    "report",
+    "scan",
+    "procedure",
+    "med",
+    "note",
+    "photo",
+    "other",
+]
+# Color per category — chosen for legibility on dark theme + colorblind-friendly.
+CATEGORY_COLORS = {
+    "diagnosis": "#e63946",
+    "visit": "#2a9d8f",
+    "lab": "#457b9d",
+    "report": "#264653",
+    "scan": "#f4a261",
+    "procedure": "#9b5de5",
+    "med": "#e76f51",
+    "note": "#6c757d",
+    "photo": "#bdb2ff",
+    "other": "#adb5bd",
+}
+def build_timeline_figure(patient: Patient):
+    """Return a Plotly Figure (or None if patient has no events)."""
+    if not patient.events:
+        return None
+    df = pd.DataFrame([
+        {
+            "date": e.date,
+            "category": e.category,
+            "title": e.title,
+            "source": e.source,
+            "year": e.date.year,
+        }
+        for e in patient.events
+    ])
+    fig = px.scatter(
+        df,
+        x="date",
+        y="category",
+        color="category",
+        category_orders={"category": CATEGORY_ORDER},
+        color_discrete_map=CATEGORY_COLORS,
+        hover_data={"title": True, "source": True, "category": False, "date": "|%Y-%m-%d"},
+        title=f"{patient.display_name} — {len(patient.events)} events",
+    )
+    fig.update_traces(marker=dict(size=11, opacity=0.85, line=dict(width=0.5, color="white")))
+    fig.update_layout(
+        height=340,
+        margin=dict(t=50, b=40, l=40, r=20),
+        showlegend=True,
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        xaxis_title=None,
+        yaxis_title=None,
+    )
+    return fig

static/app.jsx ADDED Viewed

	@@ -0,0 +1,859 @@

+// Recap — Bold Editorial UI
+// Ported from the design bundle (direction-editorial.jsx) and wired to the
+// real FastAPI backend at /api/patients and /api/answer. Single-instance
+// app (no canvas), full-window, light + dark.
+const { useState, useEffect, useMemo, useRef } = React;
+const PALETTE = {
+  light: {
+    bg: '#f4ede2', paper: '#fbf7ef',
+    ink: '#1a1410', inkSoft: '#3a2e25',
+    muted: '#6b5c4a', faint: '#a8967f',
+    rule: '#d6c8b4', ruleSoft: '#e8ddc9',
+    accent: '#b8412e', accentSoft: '#f3dcd0',
+    mark: '#d4af37',
+  },
+  dark: {
+    bg: '#1a1410', paper: '#221a14',
+    ink: '#f4ede2', inkSoft: '#d6c8b4',
+    muted: '#a8967f', faint: '#6b5c4a',
+    rule: '#3a2e25', ruleSoft: '#2a2017',
+    accent: '#e8755e', accentSoft: '#2a1814',
+    mark: '#e0c060',
+  },
+};
+const CAT = {
+  diagnosis: { label: 'Diagnosis',  hint: 'Clinical condition'         },
+  visit:     { label: 'Visit',      hint: 'Patient encounter'          },
+  lab:       { label: 'Lab',        hint: 'Laboratory result'          },
+  report:    { label: 'Report',     hint: 'Clinical report or summary' },
+  scan:      { label: 'Scan',       hint: 'Medical imaging'            },
+  procedure: { label: 'Procedure',  hint: 'Operation or intervention'  },
+  med:       { label: 'Medication', hint: 'Prescribed drug'            },
+  note:      { label: 'Note',       hint: 'Free-text clinical note'    },
+  photo:     { label: 'Photo',      hint: 'Patient-supplied image'     },
+  other:     { label: 'Other',      hint: 'Uncategorized event'        },
+};
+// Inline lucide-style icons (24x24 viewBox). stroke inherits from parent.
+// One glyph per event category, picked for instant recognition.
+const ICONS = {
+  // alert-octagon — signals clinical importance for any diagnosis
+  diagnosis: (
+    <g>
+      <path d="M7.86 2h8.28L22 7.86v8.28L16.14 22H7.86L2 16.14V7.86z" />
+      <path d="M12 8v4" />
+      <path d="M12 16h.01" />
+    </g>
+  ),
+  // stethoscope
+  visit: (
+    <g>
+      <path d="M11 2v2" />
+      <path d="M5 2v2" />
+      <path d="M5 3H4a2 2 0 0 0-2 2v4a6 6 0 0 0 12 0V5a2 2 0 0 0-2-2h-1" />
+      <path d="M8 15a6 6 0 0 0 12 0v-3" />
+      <circle cx="20" cy="10" r="2" />
+    </g>
+  ),
+  // flask-conical
+  lab: (
+    <g>
+      <path d="M10 2v6.5L3.5 19a1 1 0 0 0 .9 1.5h15.2a1 1 0 0 0 .9-1.5L14 8.5V2" />
+      <path d="M9 2h6" />
+      <path d="M6.4 14.5h11.2" />
+    </g>
+  ),
+  // file-text
+  report: (
+    <g>
+      <path d="M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7Z" />
+      <path d="M14 2v6h6" />
+      <path d="M9 13h6" />
+      <path d="M9 17h4" />
+    </g>
+  ),
+  // image (frame + small sun + mountain) — universal "imaging" symbol
+  scan: (
+    <g>
+      <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+      <circle cx="9" cy="9" r="2" />
+      <path d="m21 15-3.086-3.086a2 2 0 0 0-2.828 0L6 21" />
+    </g>
+  ),
+  // scissors
+  procedure: (
+    <g>
+      <circle cx="6" cy="6" r="3" />
+      <path d="M8.12 8.12 12 12" />
+      <path d="M20 4 8.12 15.88" />
+      <circle cx="6" cy="18" r="3" />
+      <path d="M14.8 14.8 20 20" />
+    </g>
+  ),
+  // pill
+  med: (
+    <g>
+      <path d="m10.5 20.5 10-10a4.95 4.95 0 1 0-7-7l-10 10a4.95 4.95 0 1 0 7 7Z" />
+      <path d="m8.5 8.5 7 7" />
+    </g>
+  ),
+  // pen-line
+  note: (
+    <g>
+      <path d="M12 20h9" />
+      <path d="M16.5 3.5a2.121 2.121 0 0 1 3 3L7 19l-4 1 1-4Z" />
+    </g>
+  ),
+  // camera
+  photo: (
+    <g>
+      <path d="M14.5 4h-5L7 7H4a2 2 0 0 0-2 2v9a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2h-3l-2.5-3z" />
+      <circle cx="12" cy="13" r="3" />
+    </g>
+  ),
+  // dot fallback
+  other: (
+    <circle cx="12" cy="12" r="3" />
+  ),
+};
+function EventIcon({ category, size = 12 }) {
+  const paths = ICONS[category] || ICONS.other;
+  return (
+    <svg width={size} height={size} viewBox="0 0 24 24"
+         fill="none" stroke="currentColor" strokeWidth="2"
+         strokeLinecap="round" strokeLinejoin="round"
+         style={{ display: 'block' }}>
+      {paths}
+    </svg>
+  );
+}
+const SERIF = '"Source Serif 4", "GT Sectra", "Tiempos Headline", Charter, Georgia, serif';
+const SANS  = '"Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif';
+const MONO  = '"JetBrains Mono", "SF Mono", ui-monospace, monospace';
+function fmtDate(iso, opts = { y: true }) {
+  const d = new Date(iso + (iso.length === 10 ? 'T00:00:00Z' : ''));
+  if (opts.short) return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
+  return d.toLocaleDateString('en-US', {
+    year: opts.y ? 'numeric' : undefined,
+    month: 'short',
+    day: 'numeric',
+  });
+}
+// ─────────────────────────────────────────────────────────────────────
+// Suggested questions per patient. Used as starter prompts only —
+// actual answers come from /api/answer (real LLM via inference gateway).
+const SUGGESTED = {
+  sarah: [
+    'When did her kidney function start declining?',
+    'What medications was she on when CKD was diagnosed?',
+    'Summarize her trajectory in 3 sentences.',
+  ],
+  marcus: [
+    'How long from first symptom to diagnosis?',
+    'What was the response to R-CHOP?',
+    'Summarize this patient\'s journey.',
+  ],
+  aisha: [
+    'What records does she have in foreign languages?',
+    'Is her current anemia recurrent or new?',
+    'What is her current pregnancy status?',
+  ],
+  demo: [
+    'When did her kidney function start declining?',
+    'What was her first abnormal creatinine reading?',
+    'What medications was she on when CKD was diagnosed?',
+  ],
+};
+// ─────────────────────────────────────────────────────────────────────
+function App() {
+  const [patients, setPatients] = useState([]);
+  const [patientId, setPatientId] = useState(null);
+  const [dark, setDark] = useState(false);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState(null);
+  useEffect(() => {
+    fetch('/api/patients')
+      .then((r) => r.json())
+      .then((data) => {
+        setPatients(data);
+        if (data.length > 0) setPatientId(data[0].id);
+        setLoading(false);
+      })
+      .catch((e) => {
+        setError(String(e));
+        setLoading(false);
+      });
+  }, []);
+  const c = dark ? PALETTE.dark : PALETTE.light;
+  const patient = useMemo(
+    () => patients.find((p) => p.id === patientId),
+    [patients, patientId],
+  );
+  if (loading) {
+    return <Loading c={c} />;
+  }
+  if (error) {
+    return <ErrorView c={c} message={error} />;
+  }
+  if (!patient) {
+    return <ErrorView c={c} message="No patients available." />;
+  }
+  return (
+    <div style={{
+      position: 'absolute', inset: 0, display: 'flex', flexDirection: 'column',
+      background: c.bg, color: c.ink, fontFamily: SANS, fontSize: 13,
+    }}>
+      <Masthead c={c} dark={dark} patient={patient} allPatients={patients}
+                onPatientChange={setPatientId}
+                onDarkToggle={() => setDark(!dark)} />
+      <div style={{ flex: 1, display: 'flex', minHeight: 0 }}>
+        <Document c={c} patient={patient} />
+        <ChatColumn c={c} patient={patient} />
+      </div>
+    </div>
+  );
+}
+function Loading({ c }) {
+  return (
+    <div style={{
+      position: 'absolute', inset: 0, background: c.bg, color: c.muted,
+      display: 'grid', placeItems: 'center', fontFamily: SERIF,
+    }}>
+      <div style={{ textAlign: 'center' }}>
+        <div style={{ fontSize: 42, color: c.ink, letterSpacing: '-.03em' }}>
+          Recap<span style={{ color: c.accent }}>.</span>
+        </div>
+        <div style={{ fontStyle: 'italic', marginTop: 8 }}>loading the chart…</div>
+      </div>
+    </div>
+  );
+}
+function ErrorView({ c, message }) {
+  return (
+    <div style={{
+      position: 'absolute', inset: 0, background: c.bg, color: c.ink,
+      display: 'grid', placeItems: 'center', fontFamily: SERIF, padding: 24,
+    }}>
+      <div style={{ textAlign: 'center', maxWidth: 480 }}>
+        <div style={{ fontSize: 32, color: c.accent, letterSpacing: '-.02em' }}>
+          Something is off.
+        </div>
+        <div style={{ marginTop: 12, color: c.muted, fontStyle: 'italic' }}>{message}</div>
+      </div>
+    </div>
+  );
+}
+// ─────────────────────────────────────────────────────────────────────
+function Masthead({ c, dark, patient, allPatients, onPatientChange, onDarkToggle }) {
+  const [open, setOpen] = useState(false);
+  return (
+    <div style={{
+      padding: '14px 28px', borderBottom: `1px solid ${c.rule}`,
+      background: c.bg, display: 'flex', alignItems: 'center', gap: 18,
+    }}>
+      <div style={{
+        fontFamily: SERIF, fontSize: 26, fontWeight: 500,
+        letterSpacing: '-0.025em', lineHeight: 1, color: c.ink,
+      }}>
+        Recap<span style={{ color: c.accent }}>.</span>
+      </div>
+      <div style={{
+        paddingLeft: 16, borderLeft: `1px solid ${c.rule}`,
+        fontFamily: SERIF, fontStyle: 'italic', fontSize: 13.5, color: c.muted,
+        lineHeight: 1.3, maxWidth: 280,
+      }}>
+        Reads the whole chart so you don't have to.
+      </div>
+      <div style={{ flex: 1 }} />
+      <div style={{ position: 'relative' }}>
+        <button onClick={() => setOpen(!open)} style={{
+          display: 'flex', alignItems: 'center', gap: 10,
+          padding: '6px 12px', border: `1px solid ${c.rule}`, borderRadius: 2,
+          background: c.paper, color: c.ink, cursor: 'pointer',
+          fontFamily: SANS, fontSize: 12,
+        }}>
+          <span style={{ fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.06em' }}>
+            CASE №
+          </span>
+          <span style={{ fontFamily: SERIF, fontSize: 14, fontWeight: 500, color: c.ink }}>
+            {patient.display_name}
+          </span>
+          <span style={{ color: c.faint }}>▾</span>
+        </button>
+        {open && (
+          <div style={{
+            position: 'absolute', top: '110%', right: 0, width: 320, marginTop: 4,
+            background: c.paper, border: `1px solid ${c.rule}`, borderRadius: 2,
+            padding: 4, zIndex: 10,
+            boxShadow: dark ? '0 8px 32px rgba(0,0,0,.5)' : '0 8px 32px rgba(0,0,0,.12)',
+          }}>
+            {allPatients.map((p, i) => (
+              <button key={p.id}
+                      onClick={() => { onPatientChange(p.id); setOpen(false); }}
+                      style={{
+                        width: '100%', textAlign: 'left', padding: '10px 12px',
+                        borderRadius: 2, border: 'none', cursor: 'pointer',
+                        background: p.id === patient.id ? c.accentSoft : 'transparent',
+                        color: c.ink, fontFamily: 'inherit',
+                      }}>
+                <div style={{ display: 'flex', alignItems: 'baseline', gap: 8 }}>
+                  <span style={{
+                    fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.06em',
+                  }}>{String(i + 1).padStart(2, '0')}</span>
+                  <span style={{ fontFamily: SERIF, fontSize: 16, fontWeight: 500 }}>
+                    {p.display_name}
+                  </span>
+                  {p.age != null && (
+                    <span style={{ fontSize: 11, color: c.muted }}>· {p.age}y</span>
+                  )}
+                </div>
+                <div style={{
+                  fontSize: 11.5, color: c.muted, marginTop: 4, lineHeight: 1.45,
+                  fontStyle: 'italic', fontFamily: SERIF,
+                }}>
+                  {p.summary}
+                </div>
+              </button>
+            ))}
+          </div>
+        )}
+      </div>
+      <BackendBadge c={c} />
+      <button onClick={onDarkToggle} style={{
+        width: 28, height: 28, borderRadius: 2,
+        border: `1px solid ${c.rule}`, background: c.paper,
+        color: c.muted, cursor: 'pointer', fontSize: 14,
+      }}>{dark ? '☀' : '☾'}</button>
+    </div>
+  );
+}
+function BackendBadge({ c }) {
+  const [info, setInfo] = useState({ backend: '...' });
+  useEffect(() => {
+    fetch('/api/health').then((r) => r.json()).then(setInfo).catch(() => {});
+  }, []);
+  return (
+    <div style={{
+      display: 'flex', alignItems: 'center', gap: 6,
+      padding: '4px 10px', border: `1px solid ${c.rule}`, borderRadius: 2,
+      background: c.paper,
+      fontFamily: MONO, fontSize: 10, color: c.muted, letterSpacing: '0.04em',
+    }}>
+      <span style={{ width: 5, height: 5, borderRadius: '50%', background: c.accent }} />
+      AMD MI300X · 192 GB · {info.backend}
+    </div>
+  );
+}
+// ─────────────────────────────────────────────────────────────────────
+function Document({ c, patient }) {
+  const events = patient.events;
+  const groups = {};
+  events.forEach((e) => {
+    const y = e.date.slice(0, 4);
+    (groups[y] = groups[y] || []).push(e);
+  });
+  const years = Object.keys(groups).sort();
+  return (
+    <div style={{
+      flex: 1.4, minWidth: 0, overflowY: 'auto',
+      background: c.paper, borderRight: `1px solid ${c.rule}`,
+    }}>
+      <div style={{ padding: '32px 36px 24px', borderBottom: `1px solid ${c.rule}` }}>
+        <div style={{
+          fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.12em',
+          textTransform: 'uppercase', marginBottom: 12,
+        }}>
+          Patient Dossier · {events.length} events · {years.length} year{years.length === 1 ? '' : 's'} on record
+        </div>
+        <h1 style={{
+          fontFamily: SERIF, fontSize: 48, fontWeight: 500,
+          letterSpacing: '-0.03em', lineHeight: 1.05, color: c.ink,
+          margin: '0 0 12px',
+        }}>
+          {patient.display_name}<span style={{ color: c.accent }}>.</span>
+        </h1>
+        <div style={{
+          fontFamily: SERIF, fontStyle: 'italic', fontSize: 18, color: c.muted,
+          lineHeight: 1.45, maxWidth: 620, textWrap: 'pretty',
+        }}>
+          {patient.summary}
+        </div>
+        <div style={{ display: 'flex', gap: 24, marginTop: 22, alignItems: 'baseline' }}>
+          {patient.age != null && <Stat c={c} value={patient.age} label="years old" />}
+          {patient.gender && <Stat c={c} value={patient.gender} label="gender" />}
+          {patient.mrn && <Stat c={c} value={patient.mrn} label="MRN" mono />}
+          <Stat c={c} value={new Set(events.map((e) => e.source)).size} label="source docs" />
+        </div>
+        {patient.tags && patient.tags.length > 0 && (
+          <div style={{ display: 'flex', gap: 8, marginTop: 20, flexWrap: 'wrap' }}>
+            {patient.tags.map((t) => (
+              <span key={t} style={{
+                fontFamily: SANS, fontSize: 11, color: c.inkSoft,
+                padding: '3px 10px', border: `1px solid ${c.rule}`, borderRadius: 2,
+                background: c.bg,
+              }}>{t}</span>
+            ))}
+          </div>
+        )}
+      </div>
+      <div style={{ padding: '24px 36px 48px' }}>
+        {years.map((y, yi) => (
+          <YearSection key={y} c={c} year={y} events={groups[y]} first={yi === 0} />
+        ))}
+      </div>
+    </div>
+  );
+}
+function Stat({ c, value, label, mono }) {
+  return (
+    <div>
+      <div style={{
+        fontFamily: mono ? MONO : SERIF,
+        fontSize: mono ? 14 : 22, fontWeight: 500, color: c.ink, lineHeight: 1,
+      }}>{value}</div>
+      <div style={{
+        fontFamily: MONO, fontSize: 9.5, color: c.faint, letterSpacing: '0.1em',
+        textTransform: 'uppercase', marginTop: 5,
+      }}>{label}</div>
+    </div>
+  );
+}
+function YearSection({ c, year, events, first }) {
+  const [activeId, setActiveId] = useState(null);
+  return (
+    <div style={{ position: 'relative', marginBottom: 32 }}>
+      <div style={{
+        display: 'flex', alignItems: 'baseline', gap: 14,
+        marginBottom: 8, paddingBottom: 8,
+        borderBottom: `1px solid ${c.ruleSoft}`, marginLeft: 80,
+      }}>
+        <h2 style={{
+          fontFamily: SERIF, fontSize: 32, fontWeight: 500, letterSpacing: '-0.02em',
+          color: c.ink, margin: 0, lineHeight: 1,
+        }}>{year}</h2>
+        <div style={{
+          fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.1em',
+          textTransform: 'uppercase',
+        }}>
+          {events.length} {events.length === 1 ? 'event' : 'events'}
+        </div>
+      </div>
+      <div style={{
+        position: 'absolute', left: 100, top: 56, bottom: 0,
+        width: 1, background: c.rule,
+      }} />
+      {events.map((e, ei) => (
+        <DocEvent key={e.id} c={c} e={e} index={ei}
+                  active={activeId === e.id}
+                  onClick={() => setActiveId(activeId === e.id ? null : e.id)} />
+      ))}
+    </div>
+  );
+}
+function DocEvent({ c, e, index, active, onClick }) {
+  const cat = CAT[e.category] || CAT.other;
+  const [iconHover, setIconHover] = useState(false);
+  // Vertical center of the title line is roughly 26px from the top of the
+  // content button (≈12px category label + 3px gap + half of 22px title line).
+  // Center the icon there so it visually anchors to the title, not the date.
+  const iconCenterY = 26;
+  const iconSize = active ? 30 : 26;
+  const iconPadTop = Math.max(iconCenterY - iconSize / 2, 0);
+  return (
+    <div style={{
+      display: 'flex', alignItems: 'flex-start',
+      padding: '12px 0', position: 'relative',
+    }}>
+      <div style={{
+        width: 76, flexShrink: 0, paddingRight: 8,
+        paddingTop: 16, textAlign: 'right',
+      }}>
+        <div style={{
+          fontFamily: SERIF, fontSize: 14, color: c.ink, fontWeight: 500,
+          letterSpacing: '-0.01em',
+        }}>
+          {fmtDate(e.date, { y: false, short: true })}
+        </div>
+        <div style={{
+          fontFamily: MONO, fontSize: 9.5, color: c.faint, letterSpacing: '0.06em',
+          marginTop: 2,
+        }}>
+          {String(index + 1).padStart(2, '0')}
+        </div>
+      </div>
+      <div style={{
+        width: 32, flexShrink: 0, display: 'flex', justifyContent: 'center',
+        paddingTop: iconPadTop, position: 'relative', zIndex: 1,
+      }}>
+        {/* Hover wrapper sits exactly on the icon — tooltip uses bottom:100% relative to it */}
+        <div style={{ position: 'relative', display: 'inline-block' }}
+             onMouseEnter={() => setIconHover(true)}
+             onMouseLeave={() => setIconHover(false)}>
+          <div style={{
+            width: iconSize, height: iconSize,
+            borderRadius: e.category === 'diagnosis' ? 4 : '50%',
+            background: active ? c.accent : c.paper,
+            border: `1px solid ${active ? c.accent : c.rule}`,
+            display: 'grid', placeItems: 'center',
+            color: active ? c.paper : c.muted,
+            transition: 'all .15s',
+            boxShadow: iconHover && !active ? `0 0 0 4px ${c.accentSoft}` : 'none',
+          }}>
+            <EventIcon category={e.category} size={active ? 16 : 14} />
+          </div>
+          {iconHover && (
+            <div role="tooltip" style={{
+              position: 'absolute', bottom: '100%', left: '50%',
+              transform: 'translateX(-50%)', marginBottom: 8,
+              background: c.ink, color: c.bg,
+              padding: '6px 10px', borderRadius: 2,
+              fontFamily: MONO, fontSize: 10, letterSpacing: '0.06em',
+              whiteSpace: 'nowrap', pointerEvents: 'none', zIndex: 50,
+              boxShadow: '0 4px 14px rgba(0,0,0,.18)',
+              display: 'flex', alignItems: 'baseline', gap: 6,
+            }}>
+              <span style={{ fontWeight: 600, textTransform: 'uppercase' }}>
+                {cat.label}
+              </span>
+              <span style={{ opacity: 0.65 }}>· {cat.hint}</span>
+              {/* Tooltip tail */}
+              <span style={{
+                position: 'absolute', top: '100%', left: '50%',
+                transform: 'translateX(-50%)',
+                width: 0, height: 0,
+                borderLeft: '5px solid transparent',
+                borderRight: '5px solid transparent',
+                borderTop: `5px solid ${c.ink}`,
+              }} />
+            </div>
+          )}
+        </div>
+      </div>
+      <button onClick={onClick} style={{
+        flex: 1, marginLeft: 18, textAlign: 'left',
+        background: active ? c.accentSoft : 'transparent',
+        padding: active ? '10px 14px' : '0',
+        marginTop: active ? -4 : 0, marginBottom: active ? -4 : 0,
+        border: 'none', cursor: 'pointer', color: c.ink, fontFamily: 'inherit',
+        borderRadius: 2,
+      }}>
+        <div style={{ display: 'flex', alignItems: 'baseline', gap: 8, flexWrap: 'wrap' }}>
+          <span style={{
+            fontFamily: MONO, fontSize: 9.5, color: c.faint,
+            textTransform: 'uppercase', letterSpacing: '0.1em',
+          }}>{e.category}</span>
+          {e.flag === 'critical' && (
+            <span style={{
+              fontFamily: MONO, fontSize: 9.5, color: c.accent,
+              textTransform: 'uppercase', letterSpacing: '0.1em', fontWeight: 600,
+            }}>· Critical</span>
+          )}
+          {(e.flag === 'high' || e.flag === 'low') && (
+            <span style={{
+              fontFamily: MONO, fontSize: 9.5, color: c.mark,
+              textTransform: 'uppercase', letterSpacing: '0.1em', fontWeight: 600,
+            }}>· {e.flag === 'high' ? 'High' : 'Low'}</span>
+          )}
+        </div>
+        <div style={{
+          fontFamily: SERIF, fontSize: 17, fontWeight: 500, color: c.ink,
+          letterSpacing: '-0.012em', lineHeight: 1.3, marginTop: 3, textWrap: 'balance',
+        }}>{e.title}</div>
+        {e.body && e.body !== e.title && (active || e.flag === 'critical') && (
+          <div style={{
+            fontFamily: SERIF, fontSize: 14, color: c.inkSoft, lineHeight: 1.5,
+            marginTop: 6, fontStyle: 'italic', maxWidth: 540, textWrap: 'pretty',
+          }}>{e.body}</div>
+        )}
+        {active && (
+          <div style={{
+            marginTop: 10, paddingTop: 8, borderTop: `1px solid ${c.rule}`,
+            fontFamily: MONO, fontSize: 10.5, color: c.muted,
+            display: 'flex', alignItems: 'center', gap: 12, flexWrap: 'wrap',
+          }}>
+            <span>Source: <span style={{ color: c.ink }}>{e.source}</span></span>
+            {e.page && <span>Page {e.page}</span>}
+            {e.snippet && (
+              <span style={{
+                fontStyle: 'italic', fontFamily: SERIF, fontSize: 12.5, color: c.inkSoft,
+              }}>"{e.snippet}"</span>
+            )}
+          </div>
+        )}
+      </button>
+    </div>
+  );
+}
+// ─────────────────────────────────────────────────────────────────────
+function ChatColumn({ c, patient }) {
+  const [history, setHistory] = useState([]);
+  const [input, setInput] = useState('');
+  const [thinking, setThinking] = useState(false);
+  const scroller = useRef(null);
+  useEffect(() => {
+    setHistory([]);
+    setInput('');
+  }, [patient.id]);
+  useEffect(() => {
+    if (scroller.current) scroller.current.scrollTop = scroller.current.scrollHeight;
+  }, [history, thinking]);
+  const send = async (text) => {
+    const q = (text || input).trim();
+    if (!q) return;
+    setInput('');
+    setHistory((h) => [...h, { role: 'user', text: q }]);
+    setThinking(true);
+    try {
+      const r = await fetch('/api/answer', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ patient_id: patient.id, question: q }),
+      });
+      const data = await r.json();
+      if (data.error) {
+        setHistory((h) => [...h, { role: 'assistant', text: `Error: ${data.error}`, citations: [] }]);
+      } else {
+        setHistory((h) => [...h, {
+          role: 'assistant',
+          text: data.text,
+          citations: data.citations || [],
+        }]);
+      }
+    } catch (err) {
+      setHistory((h) => [...h, {
+        role: 'assistant',
+        text: `Network error: ${String(err)}`,
+        citations: [],
+      }]);
+    } finally {
+      setThinking(false);
+    }
+  };
+  const examples = SUGGESTED[patient.id] || SUGGESTED.demo || [];
+  return (
+    <div style={{
+      flex: 1, minWidth: 0, display: 'flex', flexDirection: 'column', background: c.bg,
+    }}>
+      <div style={{ padding: '20px 24px 14px', borderBottom: `1px solid ${c.rule}` }}>
+        <div style={{
+          fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.12em',
+          textTransform: 'uppercase', marginBottom: 6,
+        }}>
+          The Reading Room
+        </div>
+        <div style={{
+          fontFamily: SERIF, fontSize: 22, fontWeight: 500,
+          letterSpacing: '-0.02em', color: c.ink, lineHeight: 1.15,
+        }}>
+          Ask a question.<br />
+          <span style={{ fontStyle: 'italic', color: c.muted }}>Get a cited answer.</span>
+        </div>
+      </div>
+      <div ref={scroller} style={{ flex: 1, overflowY: 'auto', padding: '18px 24px' }}>
+        {history.length === 0 && (
+          <div>
+            <div style={{
+              fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.1em',
+              textTransform: 'uppercase', marginBottom: 12,
+            }}>Suggested</div>
+            {examples.map((ex, i) => (
+              <button key={ex} onClick={() => send(ex)} style={{
+                display: 'flex', gap: 12, alignItems: 'flex-start',
+                width: '100%', textAlign: 'left',
+                padding: '14px 0',
+                borderTop: i === 0 ? `1px solid ${c.rule}` : 'none',
+                borderBottom: `1px solid ${c.rule}`,
+                background: 'transparent', border: 'none', cursor: 'pointer',
+                borderRadius: 0, color: c.ink, fontFamily: 'inherit',
+              }}>
+                <span style={{
+                  fontFamily: MONO, fontSize: 10, color: c.faint, letterSpacing: '0.1em',
+                  width: 22, paddingTop: 4, flexShrink: 0,
+                }}>0{i + 1}</span>
+                <span style={{
+                  flex: 1, fontFamily: SERIF, fontSize: 16, lineHeight: 1.4,
+                  color: c.ink, fontWeight: 500, letterSpacing: '-0.01em',
+                }}>{ex}</span>
+                <span style={{ color: c.accent, fontSize: 16, marginTop: 1 }}>→</span>
+              </button>
+            ))}
+          </div>
+        )}
+        {history.map((m, i) => (
+          <div key={i} style={{ marginBottom: 22 }}>
+            {m.role === 'user' ? (
+              <div>
+                <div style={{
+                  fontFamily: MONO, fontSize: 9.5, color: c.faint, letterSpacing: '0.1em',
+                  textTransform: 'uppercase', marginBottom: 6,
+                }}>You asked</div>
+                <div style={{
+                  fontFamily: SERIF, fontSize: 19, lineHeight: 1.35, color: c.ink,
+                  fontWeight: 500, letterSpacing: '-0.015em',
+                }}>"{m.text}"</div>
+              </div>
+            ) : (
+              <AssistantMessage c={c} m={m} patient={patient} />
+            )}
+          </div>
+        ))}
+        {thinking && (
+          <div style={{
+            fontFamily: MONO, fontSize: 11, color: c.muted, letterSpacing: '0.04em',
+            padding: '8px 0',
+          }}>
+            <span style={{ animation: 'edit-blink 1s infinite' }}>▌</span>
+            {' '}reading {patient.events.length} events…
+          </div>
+        )}
+      </div>
+      <div style={{ padding: '14px 24px 18px', borderTop: `1px solid ${c.rule}` }}>
+        <div style={{
+          display: 'flex', alignItems: 'center', gap: 10,
+          border: `1px solid ${c.rule}`, borderRadius: 2,
+          background: c.paper, padding: '10px 14px',
+        }}>
+          <span style={{
+            fontFamily: SERIF, color: c.accent, fontSize: 18, fontStyle: 'italic',
+          }}>?</span>
+          <input value={input} onChange={(e) => setInput(e.target.value)}
+                 onKeyDown={(e) => e.key === 'Enter' && send()}
+                 placeholder="Ask anything about this chart…"
+                 style={{
+                   flex: 1, border: 'none', background: 'transparent',
+                   color: c.ink, fontSize: 14, outline: 'none',
+                   fontFamily: SERIF, padding: '2px 0',
+                 }} />
+          <button onClick={() => send()} style={{
+            padding: '6px 14px', borderRadius: 2,
+            background: c.accent, border: 'none', color: c.paper,
+            cursor: 'pointer', fontSize: 12, fontWeight: 500,
+            fontFamily: SANS, letterSpacing: '0.02em',
+          }}>Ask →</button>
+        </div>
+      </div>
+    </div>
+  );
+}
+// Render markdown-ish bold + inline citation markers like [src:foo.pdf#p2].
+function AssistantMessage({ c, m, patient }) {
+  // Replace [src:foo.pdf#p2] with superscript clickable cite numbers.
+  const citationsByKey = {};
+  let counter = 0;
+  const text = (m.text || '').replace(/\[src:([^\]#]+)(?:#p(\d+))?\]/g, (_match, src, page) => {
+    const key = `${src}|${page || ''}`;
+    if (!(key in citationsByKey)) {
+      counter += 1;
+      citationsByKey[key] = { n: counter, src, page: page ? parseInt(page, 10) : null };
+    }
+    return `‹CITE:${citationsByKey[key].n}›`;
+  });
+  // Now split on the placeholders + bold markdown.
+  const segments = text.split(/(‹CITE:\d+›|\*\*[^*]+\*\*)/g);
+  return (
+    <div>
+      <div style={{
+        fontFamily: MONO, fontSize: 9.5, color: c.faint, letterSpacing: '0.1em',
+        textTransform: 'uppercase', marginBottom: 8,
+      }}>The chart says</div>
+      <div style={{
+        fontFamily: SERIF, fontSize: 16.5, lineHeight: 1.55, color: c.ink,
+        letterSpacing: '-0.005em', textWrap: 'pretty',
+      }}>
+        {segments.map((seg, i) => {
+          if (seg.startsWith('‹CITE:')) {
+            const n = parseInt(seg.slice(6, -1), 10);
+            return (
+              <sup key={i} style={{
+                color: c.accent, fontFamily: SERIF, fontStyle: 'italic',
+                fontWeight: 700, fontSize: 11, padding: '0 2px',
+              }}>{n}</sup>
+            );
+          }
+          if (seg.startsWith('**') && seg.endsWith('**')) {
+            return <strong key={i} style={{ fontWeight: 600 }}>{seg.slice(2, -2)}</strong>;
+          }
+          return <React.Fragment key={i}>{seg}</React.Fragment>;
+        })}
+      </div>
+      {(m.citations && m.citations.length > 0) && (
+        <div style={{ marginTop: 16, paddingTop: 12, borderTop: `1px solid ${c.rule}` }}>
+          <div style={{
+            fontFamily: MONO, fontSize: 9.5, color: c.faint, letterSpacing: '0.1em',
+            textTransform: 'uppercase', marginBottom: 8,
+          }}>Drawn from</div>
+          {m.citations.map((cit, i) => (
+            <div key={i} style={{
+              display: 'flex', alignItems: 'baseline', gap: 12, padding: '6px 0',
+            }}>
+              <span style={{
+                fontFamily: SERIF, fontStyle: 'italic', color: c.accent,
+                fontSize: 14, width: 18, flexShrink: 0,
+              }}>{i + 1}.</span>
+              <span style={{ flex: 1, fontSize: 12.5, lineHeight: 1.45 }}>
+                {cit.snippet && (
+                  <span style={{ fontFamily: SERIF, color: c.ink, fontWeight: 500 }}>
+                    {cit.snippet}
+                  </span>
+                )}
+                {cit.snippet && <span style={{ color: c.muted }}> · </span>}
+                <span style={{ fontFamily: MONO, fontSize: 10.5, color: c.muted }}>
+                  {cit.source_id}{cit.page ? ` p.${cit.page}` : ''}
+                </span>
+              </span>
+            </div>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
+// Blinking cursor keyframes
+if (!document.getElementById('edit-keyframes')) {
+  const s = document.createElement('style');
+  s.id = 'edit-keyframes';
+  s.textContent = `@keyframes edit-blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }`;
+  document.head.appendChild(s);
+}
+ReactDOM.createRoot(document.getElementById('root')).render(<App />);

static/index.html ADDED Viewed

	@@ -0,0 +1,40 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <title>Recap — reads the whole chart</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="description" content="Recap reads a patient's whole chart so you don't have to. Powered by MedGemma + Qwen on AMD MI300X." />
+  <link rel="preconnect" href="https://fonts.googleapis.com" />
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Source+Serif+4:ital,opsz,wght@0,8..60,400;0,8..60,500;0,8..60,600;1,8..60,400;1,8..60,500&family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" />
+  <style>
+    html, body, #root {
+      margin: 0; padding: 0; height: 100%; min-height: 100%;
+      background: #f4ede2;
+      font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif;
+      -webkit-font-smoothing: antialiased;
+      -moz-osx-font-smoothing: grayscale;
+    }
+    *::-webkit-scrollbar { width: 8px; height: 8px; }
+    *::-webkit-scrollbar-thumb { background: rgba(127,127,127,.28); border-radius: 4px; }
+    *::-webkit-scrollbar-thumb:hover { background: rgba(127,127,127,.45); }
+    *::-webkit-scrollbar-track { background: transparent; }
+  </style>
+  <script src="https://unpkg.com/react@18.3.1/umd/react.production.min.js" crossorigin="anonymous"></script>
+  <script src="https://unpkg.com/react-dom@18.3.1/umd/react-dom.production.min.js" crossorigin="anonymous"></script>
+  <script src="https://unpkg.com/@babel/standalone@7.29.0/babel.min.js" crossorigin="anonymous"></script>
+</head>
+<body>
+  <div id="root">
+    <div style="position:absolute;inset:0;display:grid;place-items:center;font-family:'Source Serif 4',Georgia,serif;color:#3a2e25;">
+      <div style="text-align:center;">
+        <div style="font-size:42px;letter-spacing:-.03em;">Recap<span style="color:#b8412e;">.</span></div>
+        <div style="font-style:italic;color:#6b5c4a;margin-top:8px;">loading the chart…</div>
+      </div>
+    </div>
+  </div>
+  <script type="text/babel" data-presets="env,react" src="/static/app.jsx"></script>
+</body>
+</html>

tests/__init__.py ADDED Viewed

File without changes

tests/fixtures/_make_tiny_pdf.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""One-shot fixture generator. Produces tests/fixtures/tiny_lab.pdf.
+Run: python tests/fixtures/_make_tiny_pdf.py
+We commit the resulting PDF so tests can run without reportlab in CI.
+"""
+from reportlab.pdfgen import canvas
+OUT = "tests/fixtures/tiny_lab.pdf"
+c = canvas.Canvas(OUT)
+c.drawString(72, 750, "LABORATORY REPORT")
+c.drawString(72, 720, "Patient: Jane Doe   Date: 2022-03-14")
+c.drawString(72, 690, "Creatinine: 1.4 mg/dL  (Reference: 0.6-1.2)")
+c.drawString(72, 660, "eGFR: 52 mL/min/1.73m^2")
+c.showPage()
+c.save()
+print(f"Wrote {OUT}")

tests/fixtures/tiny_fhir.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "resourceType": "Bundle",
+  "type": "transaction",
+  "entry": [
+    {
+      "resource": {
+        "resourceType": "Patient",
+        "id": "p1",
+        "name": [{"family": "Doe123", "given": ["Jane45"]}],
+        "birthDate": "1957-04-12",
+        "gender": "female"
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "Observation",
+        "id": "o1",
+        "status": "final",
+        "code": {"text": "Creatinine"},
+        "effectiveDateTime": "2022-03-14T10:00:00Z",
+        "valueQuantity": {"value": 1.4, "unit": "mg/dL"},
+        "subject": {"reference": "Patient/p1"}
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "Encounter",
+        "id": "e1",
+        "status": "finished",
+        "class": {"code": "AMB", "display": "Ambulatory"},
+        "period": {"start": "2022-03-14T09:30:00Z", "end": "2022-03-14T10:15:00Z"},
+        "reasonCode": [{"text": "Nephrology consult"}],
+        "subject": {"reference": "Patient/p1"}
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "MedicationRequest",
+        "id": "m1",
+        "status": "active",
+        "intent": "order",
+        "medicationCodeableConcept": {"text": "Lisinopril 10 mg"},
+        "authoredOn": "2022-03-14",
+        "subject": {"reference": "Patient/p1"}
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "Condition",
+        "id": "c1",
+        "clinicalStatus": {"coding": [{"code": "active"}]},
+        "code": {"text": "Chronic kidney disease, stage 3"},
+        "onsetDateTime": "2022-04-01T00:00:00Z",
+        "subject": {"reference": "Patient/p1"}
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "Procedure",
+        "id": "pr1",
+        "status": "completed",
+        "code": {"text": "Renal ultrasound"},
+        "performedDateTime": "2022-04-15T11:00:00Z",
+        "subject": {"reference": "Patient/p1"}
+      }
+    },
+    {
+      "resource": {
+        "resourceType": "DiagnosticReport",
+        "id": "dr1",
+        "status": "final",
+        "code": {"text": "Comprehensive metabolic panel"},
+        "effectiveDateTime": "2022-03-14T10:30:00Z",
+        "conclusion": "Mildly elevated creatinine consistent with stage 3 CKD.",
+        "subject": {"reference": "Patient/p1"}
+      }
+    }
+  ]
+}

tests/fixtures/tiny_lab.pdf ADDED Viewed

	@@ -0,0 +1,68 @@

+%PDF-1.3
+%���� ReportLab Generated PDF document (opensource)
+1 0 obj
+<<
+/F1 2 0 R
+>>
+endobj
+2 0 obj
+<<
+/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
+>>
+endobj
+3 0 obj
+<<
+/Contents 7 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 6 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+>>
+  /Type /Page
+>>
+endobj
+4 0 obj
+<<
+/PageMode /UseNone /Pages 6 0 R /Type /Catalog
+>>
+endobj
+5 0 obj
+<<
+/Author (anonymous) /CreationDate (D:20260504232000+05'00') /Creator (anonymous) /Keywords () /ModDate (D:20260504232000+05'00') /Producer (ReportLab PDF Library - \(opensource\))
+  /Subject (unspecified) /Title (untitled) /Trapped /False
+>>
+endobj
+6 0 obj
+<<
+/Count 1 /Kids [ 3 0 R ] /Type /Pages
+>>
+endobj
+7 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 232
+>>
+stream
+Garo:4U]+l&4Ckp`KVht\Qr]sl/F']+ED9CL_+YQ&afeDs2W#Z===8U%/V)50Hp)&(C)Jpad,1#BgHI'67Qe^'RKVgk)=\*+:dG3>h6?Jg)aZ]LYBlREed><&3LMZVXN%/"nmpWX<.dWh=Om$%<H&l&Z't'fj^&ESf0H"o)YU?dG9t$"e<S5>CENdK\jXM6nt;\s)$Fse(rRjQnr!4\BZH9k;.>E0+O`n9f>Fn~>endstream
+endobj
+xref
+0 8
+0000000000 65535 f
+0000000061 00000 n
+0000000092 00000 n
+0000000199 00000 n
+0000000402 00000 n
+0000000470 00000 n
+0000000731 00000 n
+0000000790 00000 n
+trailer
+<<
+/ID
+[<de94e39f5d88808ade2bdab9cb3e3993><de94e39f5d88808ade2bdab9cb3e3993>]
+% ReportLab generated PDF document -- digest (opensource)
+/Info 5 0 R
+/Root 4 0 R
+/Size 8
+>>
+startxref
+1112
+%%EOF

tests/test_cases.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import json
+import shutil
+from recap.cases import load_case
+def test_load_case_with_only_fhir(tmp_path):
+    case = tmp_path / "tiny"
+    case.mkdir()
+    (case / "manifest.json").write_text(json.dumps({
+        "id": "tiny",
+        "display_name": "Tiny Test",
+        "fhir_bundle": "fhir.json",
+        "docs": [],
+        "images": [],
+        "demo_questions": [],
+    }))
+    shutil.copy("tests/fixtures/tiny_fhir.json", case / "fhir.json")
+    p = load_case(str(tmp_path), "tiny")
+    assert p.id == "tiny"
+    assert p.display_name == "Tiny Test"  # manifest override wins
+    assert len(p.events) > 0
+def test_load_case_pulls_display_name_from_fhir_when_manifest_omits_it(tmp_path):
+    """Minimal manifest — name, age, gender all come from the FHIR Patient resource."""
+    case = tmp_path / "auto"
+    case.mkdir()
+    (case / "manifest.json").write_text(json.dumps({
+        "id": "auto",
+        "fhir_bundle": "fhir.json",
+        "demo_questions": [],
+    }))
+    shutil.copy("tests/fixtures/tiny_fhir.json", case / "fhir.json")
+    p = load_case(str(tmp_path), "auto")
+    assert p.display_name.startswith("Jane Doe")  # from FHIR Patient.name
+    assert p.age is not None and p.age >= 60
+    assert p.gender == "female"
+def test_load_case_with_pdf_docs(tmp_path):
+    case = tmp_path / "tiny"
+    case.mkdir()
+    (case / "docs").mkdir()
+    shutil.copy("tests/fixtures/tiny_lab.pdf", case / "docs" / "lab.pdf")
+    (case / "manifest.json").write_text(json.dumps({
+        "id": "tiny",
+        "display_name": "Tiny",
+        "fhir_bundle": None,
+        "docs": [{
+            "file": "docs/lab.pdf",
+            "date": "2022-03-14",
+            "category": "lab",
+            "title": "Renal panel",
+        }],
+        "images": [],
+        "demo_questions": [],
+    }))
+    p = load_case(str(tmp_path), "tiny")
+    pdf_events = [e for e in p.events if e.source == "lab.pdf"]
+    assert len(pdf_events) == 1
+    assert pdf_events[0].category == "lab"
+    assert "Creatinine" in pdf_events[0].body
+def test_load_case_with_images(tmp_path):
+    case = tmp_path / "tiny"
+    case.mkdir()
+    (case / "images").mkdir()
+    from PIL import Image
+    Image.new("RGB", (10, 10), "white").save(case / "images" / "fundus.png")
+    (case / "manifest.json").write_text(json.dumps({
+        "id": "tiny",
+        "display_name": "Tiny",
+        "fhir_bundle": None,
+        "docs": [],
+        "images": [{
+            "file": "images/fundus.png",
+            "date": "2023-04-01",
+            "category": "scan",
+            "title": "Fundus photo",
+        }],
+        "demo_questions": [],
+    }))
+    p = load_case(str(tmp_path), "tiny")
+    assert len(p.events) == 1
+    assert p.events[0].category == "scan"
+    assert p.events[0].source == "fundus.png"
+def test_load_case_events_chronologically_orderable(tmp_path):
+    """Multi-source case (FHIR + PDF + image) yields a sortable timeline."""
+    case = tmp_path / "tiny"
+    case.mkdir()
+    (case / "docs").mkdir()
+    (case / "images").mkdir()
+    shutil.copy("tests/fixtures/tiny_fhir.json", case / "fhir.json")
+    shutil.copy("tests/fixtures/tiny_lab.pdf", case / "docs" / "lab.pdf")
+    from PIL import Image
+    Image.new("RGB", (10, 10), "white").save(case / "images" / "fundus.png")
+    (case / "manifest.json").write_text(json.dumps({
+        "id": "tiny",
+        "display_name": "Tiny",
+        "fhir_bundle": "fhir.json",
+        "docs": [{
+            "file": "docs/lab.pdf",
+            "date": "2022-03-14",
+            "category": "lab",
+            "title": "Lab",
+        }],
+        "images": [{
+            "file": "images/fundus.png",
+            "date": "2023-04-01",
+            "category": "scan",
+            "title": "Fundus",
+        }],
+        "demo_questions": [],
+    }))
+    p = load_case(str(tmp_path), "tiny")
+    sorted_dates = sorted(e.date for e in p.events)
+    assert sorted_dates == [e.date for e in sorted(p.events, key=lambda e: e.date)]

tests/test_inference_gateway.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""Tests for the inference gateway. These exercise citation parsing and
+backend routing without loading any model — the mock backend is enough.
+"""
+import os
+from datetime import datetime
+import pytest
+import recap.inference.gateway as gw
+from recap.models import Event
+def _ev(src, date_iso="2022-03-14"):
+    return Event(
+        id=src,
+        date=datetime.fromisoformat(date_iso),
+        category="lab",
+        title=f"Record from {src}",
+        source=src,
+    )
+def test_parses_citations_from_model_output():
+    text = (
+        "Creatinine first crossed normal in March 2022 [src:lab_2022.pdf#p1]. "
+        "eGFR was 52 [src:lab_2022.pdf]."
+    )
+    cites = gw._parse_citations(text, [_ev("lab_2022.pdf")])
+    assert len(cites) == 2
+    assert cites[0].page == 1
+    assert cites[1].page is None
+def test_dedupes_repeated_citations():
+    text = "[src:a.pdf] said X [src:a.pdf]"
+    cites = gw._parse_citations(text, [_ev("a.pdf")])
+    assert len(cites) == 1
+def test_drops_citations_to_unknown_sources():
+    text = "[src:hallucinated.pdf]"
+    cites = gw._parse_citations(text, [])
+    assert cites == []
+def test_dedupe_treats_different_pages_as_different_citations():
+    text = "[src:a.pdf#p1] earlier [src:a.pdf#p2] later"
+    cites = gw._parse_citations(text, [_ev("a.pdf")])
+    assert len(cites) == 2
+    assert {c.page for c in cites} == {1, 2}
+def test_answer_end_to_end_with_mock_backend(monkeypatch):
+    """Full pipeline: question -> retrieve -> mock -> cited answer."""
+    monkeypatch.setenv("RECAP_BACKEND", "mock")
+    events = [
+        _ev("lab_2022.pdf", "2022-03-14"),
+        _ev("visit_2023.pdf", "2023-01-01"),
+    ]
+    a = gw.answer("when did the lab change", events)
+    assert "[mock answer]" in a.text
+    assert any(c.source_id in {"lab_2022.pdf", "visit_2023.pdf"} for c in a.citations)

tests/test_ingestion_fhir.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from datetime import datetime, timezone
+from recap.ingestion.fhir import load_bundle, load_demographics
+FIXTURE = "tests/fixtures/tiny_fhir.json"
+def test_loads_observation_as_lab_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    labs = [e for e in events if e.category == "lab"]
+    assert len(labs) == 1
+    assert "Creatinine" in labs[0].title
+    assert "1.4" in labs[0].title or "1.4" in labs[0].body
+    assert labs[0].date == datetime.fromisoformat("2022-03-14T10:00:00+00:00")
+def test_loads_encounter_as_visit_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    visits = [e for e in events if e.category == "visit"]
+    assert len(visits) == 1
+    assert "Nephrology" in visits[0].title
+def test_loads_medication_as_med_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    meds = [e for e in events if e.category == "med"]
+    assert len(meds) == 1
+    assert "Lisinopril" in meds[0].title
+def test_loads_condition_as_diagnosis_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    dx = [e for e in events if e.category == "diagnosis"]
+    assert len(dx) == 1
+    assert "Chronic kidney disease" in dx[0].title
+    assert dx[0].metadata["clinical_status"] == "active"
+def test_loads_procedure_as_procedure_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    procs = [e for e in events if e.category == "procedure"]
+    assert len(procs) == 1
+    assert "Renal ultrasound" in procs[0].title
+def test_loads_diagnostic_report_as_report_event():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    reports = [e for e in events if e.category == "report"]
+    assert len(reports) == 1
+    assert "metabolic panel" in reports[0].title.lower()
+    assert "stage 3 CKD" in reports[0].body
+def test_events_are_chronologically_orderable():
+    events = load_bundle(FIXTURE, source_id="tiny_fhir.json")
+    sorted_events = sorted(events, key=lambda e: e.date)
+    assert [e.id for e in sorted_events] == [e.id for e in sorted(events, key=lambda e: e.date)]
+def test_load_demographics_extracts_name_age_gender():
+    demo = load_demographics(FIXTURE)
+    assert demo is not None
+    # Trailing digits ("Jane45 Doe123") stripped for display
+    assert demo.display_name.startswith("Jane Doe")
+    assert demo.gender == "female"
+    # Born 1957 → age depends on current date but should be > 60
+    assert demo.age is not None and demo.age >= 60
+def test_load_demographics_returns_none_if_no_patient_resource(tmp_path):
+    import json
+    p = tmp_path / "no_patient.json"
+    p.write_text(json.dumps({"resourceType": "Bundle", "entry": []}))
+    assert load_demographics(str(p)) is None

tests/test_ingestion_image.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from datetime import datetime
+from PIL import Image
+from recap.ingestion.image import load_image_event
+def test_loads_image_with_provided_date_and_category(tmp_path):
+    p = tmp_path / "fundus.png"
+    Image.new("RGB", (100, 100), "white").save(p)
+    e = load_image_event(
+        str(p),
+        category="scan",
+        title="Right fundus",
+        date_iso="2023-04-01",
+        source_id="fundus_2023.png",
+    )
+    assert e.category == "scan"
+    assert e.date == datetime.fromisoformat("2023-04-01T00:00:00+00:00")
+    assert e.source == "fundus_2023.png"
+    assert "fundus" in e.title.lower()
+def test_default_source_id_is_filename(tmp_path):
+    p = tmp_path / "ct_chest.png"
+    Image.new("RGB", (10, 10), "black").save(p)
+    e = load_image_event(str(p), category="scan", title="CT chest", date_iso="2024-01-15")
+    assert e.source == "ct_chest.png"
+def test_image_path_preserved_in_metadata(tmp_path):
+    p = tmp_path / "wound.jpg"
+    Image.new("RGB", (10, 10), "red").save(p)
+    e = load_image_event(str(p), category="photo", title="Wound day 7", date_iso="2024-06-20")
+    assert e.metadata["path"] == str(p)

tests/test_ingestion_pdf.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from recap.ingestion.pdf import load_pdf
+def test_extracts_pages_with_text_and_metadata():
+    pages = load_pdf("tests/fixtures/tiny_lab.pdf")
+    assert len(pages) == 1
+    assert pages[0].page_number == 1
+    assert "Creatinine" in pages[0].text
+    assert "1.4 mg/dL" in pages[0].text
+def test_pages_have_source_id():
+    pages = load_pdf("tests/fixtures/tiny_lab.pdf", source_id="lab_2022-03-14.pdf")
+    assert pages[0].source_id == "lab_2022-03-14.pdf"
+def test_default_source_id_is_filename():
+    pages = load_pdf("tests/fixtures/tiny_lab.pdf")
+    assert pages[0].source_id == "tiny_lab.pdf"

tests/test_mi300x_client.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import httpx
+import pytest
+import recap.inference.mi300x_client as client
+class _FakeResp:
+    def __init__(self, status_code=200, json_data=None):
+        self.status_code = status_code
+        self._json = json_data or {}
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            req = httpx.Request("POST", "http://x")
+            raise httpx.HTTPStatusError("err", request=req, response=httpx.Response(self.status_code))
+    def json(self):
+        return self._json
+def test_raises_when_url_unset(monkeypatch):
+    monkeypatch.delenv("RECAP_MI300X_URL", raising=False)
+    with pytest.raises(RuntimeError, match="RECAP_MI300X_URL"):
+        client._post("medgemma", "sys", "user")
+def test_posts_to_correct_url(monkeypatch):
+    monkeypatch.setenv("RECAP_MI300X_URL", "https://example.test")
+    seen = {}
+    def fake_post(url, json, timeout):
+        seen["url"] = url
+        seen["json"] = json
+        return _FakeResp(200, {"text": "hello"})
+    monkeypatch.setattr(client.httpx, "post", fake_post)
+    out = client._post("qwen", "sys-prompt", "user-prompt")
+    assert out == "hello"
+    assert seen["url"] == "https://example.test/qwen"
+    assert seen["json"] == {"system": "sys-prompt", "user": "user-prompt"}
+def test_retries_on_transport_errors(monkeypatch):
+    monkeypatch.setenv("RECAP_MI300X_URL", "https://example.test")
+    monkeypatch.setattr(client.time, "sleep", lambda *_: None)
+    calls = {"n": 0}
+    def flaky_post(url, json, timeout):
+        calls["n"] += 1
+        if calls["n"] < 3:
+            raise httpx.ConnectError("boom")
+        return _FakeResp(200, {"text": "ok"})
+    monkeypatch.setattr(client.httpx, "post", flaky_post)
+    out = client._post("medgemma", "s", "u")
+    assert out == "ok"
+    assert calls["n"] == 3
+def test_gives_up_after_three_attempts(monkeypatch):
+    monkeypatch.setenv("RECAP_MI300X_URL", "https://example.test")
+    monkeypatch.setattr(client.time, "sleep", lambda *_: None)
+    calls = {"n": 0}
+    def always_fail(url, json, timeout):
+        calls["n"] += 1
+        raise httpx.ConnectError("down")
+    monkeypatch.setattr(client.httpx, "post", always_fail)
+    with pytest.raises(RuntimeError, match="failed after 3 attempts"):
+        client._post("medgemma", "s", "u")
+    assert calls["n"] == 3
+def test_strips_trailing_slash_from_url(monkeypatch):
+    monkeypatch.setenv("RECAP_MI300X_URL", "https://example.test/")
+    seen = {}
+    def fake_post(url, json, timeout):
+        seen["url"] = url
+        return _FakeResp(200, {"text": ""})
+    monkeypatch.setattr(client.httpx, "post", fake_post)
+    client._post("qwen", "s", "u")
+    assert seen["url"] == "https://example.test/qwen"

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from datetime import datetime
+from recap.models import Citation, Event, Patient
+def test_citation_roundtrip():
+    c = Citation(source_id="lab_2022-03-14.pdf", page=2, snippet="Cr 1.4 mg/dL")
+    d = c.model_dump()
+    assert d["source_id"] == "lab_2022-03-14.pdf"
+    assert d["page"] == 2
+def test_event_orderable_by_date():
+    a = Event(
+        id="a",
+        date=datetime(2022, 3, 14),
+        category="lab",
+        title="Cr 1.4",
+        source="lab_2022-03-14.pdf",
+    )
+    b = Event(
+        id="b",
+        date=datetime(2023, 1, 1),
+        category="visit",
+        title="Nephrology",
+        source="visit_2023-01-01.pdf",
+    )
+    assert sorted([b, a], key=lambda e: e.date) == [a, b]
+def test_patient_holds_events():
+    p = Patient(id="sarah", display_name="Sarah, 67", events=[])
+    assert p.id == "sarah"
+    assert len(p.events) == 0

tests/test_reasoner.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from recap.reasoner import EXTRACT_SYSTEM, SYNTHESIZE_SYSTEM, two_stage
+def test_two_stage_pipes_extract_into_synthesize():
+    captured: dict = {}
+    def fake_extract(system, user):
+        captured["extract_system"] = system
+        captured["extract_user"] = user
+        return "Cr 1.4 mg/dL on 2022-03-14 [src:lab.pdf]"
+    def fake_synth(system, user):
+        captured["synth_system"] = system
+        captured["synth_user"] = user
+        return "Creatinine first crossed normal in March 2022 [src:lab.pdf]."
+    out = two_stage(
+        "When did kidney function decline?",
+        "Patient records block",
+        extract_fn=fake_extract,
+        synthesize_fn=fake_synth,
+    )
+    assert out == "Creatinine first crossed normal in March 2022 [src:lab.pdf]."
+    # Extract sees the records + question
+    assert "Patient records block" in captured["extract_user"]
+    assert "When did kidney function decline?" in captured["extract_user"]
+    assert captured["extract_system"] == EXTRACT_SYSTEM
+    # Synthesize sees the extracted evidence
+    assert "Cr 1.4 mg/dL on 2022-03-14 [src:lab.pdf]" in captured["synth_user"]
+    assert "When did kidney function decline?" in captured["synth_user"]
+    assert captured["synth_system"] == SYNTHESIZE_SYSTEM
+def test_citations_survive_the_pipeline():
+    """The whole point of two-stage is that MedGemma's [src:...] markers
+    flow through Qwen's synthesis intact, so the gateway can parse them."""
+    def fake_extract(s, u):
+        return "Cr 1.4 [src:lab_2022.pdf#p1] eGFR 52 [src:lab_2022.pdf#p1]"
+    def fake_synth(s, u):
+        return "She crossed the CKD threshold [src:lab_2022.pdf#p1]."
+    out = two_stage("when?", "block", extract_fn=fake_extract, synthesize_fn=fake_synth)
+    assert "[src:lab_2022.pdf#p1]" in out
+def test_evidence_string_is_passed_verbatim_to_synth():
+    """If MedGemma returns text with leading/trailing whitespace,
+    we strip it before feeding to Qwen so no double-empty-lines slip through."""
+    seen = []
+    def fake_extract(s, u):
+        return "  evidence text  \n\n"
+    def fake_synth(s, u):
+        seen.append(u)
+        return "answer"
+    two_stage("q", "b", extract_fn=fake_extract, synthesize_fn=fake_synth)
+    assert "  evidence text  " not in seen[0]
+    assert "evidence text" in seen[0]

tests/test_retrieval.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from datetime import datetime
+from recap.models import Event
+from recap.retrieval import retrieve
+def _ev(eid, body, date_iso="2022-01-01"):
+    return Event(
+        id=eid,
+        date=datetime.fromisoformat(date_iso),
+        category="lab",
+        title=body,
+        source="x",
+        body=body,
+    )
+def test_retrieves_relevant_events_for_question():
+    events = [
+        _ev("a", "Creatinine 1.4 mg/dL — first abnormal reading"),
+        _ev("b", "Influenza vaccination administered"),
+        _ev("c", "Hemoglobin A1c 8.2%"),
+    ]
+    hits = retrieve("when did creatinine become abnormal", events, top_k=2)
+    assert hits[0].id == "a"
+def test_retrieve_returns_at_most_top_k():
+    events = [_ev(str(i), f"event {i}") for i in range(20)]
+    hits = retrieve("event", events, top_k=5)
+    assert len(hits) == 5
+def test_retrieve_handles_empty_event_list():
+    assert retrieve("anything", [], top_k=5) == []
+def test_retrieve_falls_back_to_first_k_when_no_match():
+    events = [_ev("a", "alpha"), _ev("b", "beta"), _ev("c", "gamma")]
+    hits = retrieve("zzzzz", events, top_k=2)
+    assert len(hits) == 2  # falls back to first k rather than empty

tests/test_timeline.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from datetime import datetime
+from recap.models import Event
+from recap.timeline import build_timeline
+def _ev(date_iso, cat="lab", title="t"):
+    return Event(
+        id=date_iso,
+        date=datetime.fromisoformat(date_iso),
+        category=cat,
+        title=title,
+        source="x",
+    )
+def test_timeline_sorts_chronologically():
+    events = [_ev("2023-01-01"), _ev("2020-05-15"), _ev("2022-12-31")]
+    tl = build_timeline(events)
+    dates = [e.date for e in tl.events]
+    assert dates == sorted(dates)
+def test_timeline_groups_by_year():
+    events = [_ev("2020-01-01"), _ev("2020-06-01"), _ev("2021-01-01")]
+    tl = build_timeline(events)
+    assert sorted(tl.years_covered) == [2020, 2021]
+def test_empty_timeline_handles_zero_events():
+    tl = build_timeline([])
+    assert tl.events == []
+    assert tl.years_covered == []