Spaces:

TheJackBright
/

polyguard-openenv-workbench

Sleeping

App Files Files Community

TheJackBright commited on 11 days ago

Commit

e21fe7d

verified ·

1 Parent(s): 21c7db9

Deploy PolyGuard OpenEnv Space (polyguard-rl @ 52ecdc8)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +8 -0
.gitattributes +1 -0
Dockerfile +3 -3
Dockerfile.space +2 -3
README.md +40 -4
app/api/routes.py +12 -0
app/api/schemas.py +8 -0
app/tools/__init__.py +1 -0
app/tools/medication_alternatives.py +463 -0
app/ui/frontend/src/App.tsx +14 -0
app/ui/frontend/src/components/AlternativeMedicineSearch.tsx +215 -0
app/ui/frontend/src/styles/theme.css +102 -0
docker/space/README.md +28 -10
docs/DEMO_RECORDING_SCRIPT.md +493 -0
docs/deployment.md +1 -1
docs/final_submission_audit.md +1 -1
docs/idea_document_traceability.md +57 -0
docs/participant_guide_traceability.md +1 -1
docs/ui.md +4 -0
scripts/deploy_space_api.py +25 -3
scripts/install_hf_active_bundle.py +18 -7
submission_bundle/README.md +13 -0
submission_bundle/grpo_training_cycle/docs_results/README.md +22 -0
submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json +11 -0
submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png +0 -0
submission_bundle/grpo_training_cycle/docs_results/avg_reward.png +0 -0
submission_bundle/grpo_training_cycle/docs_results/baselines.json +119 -0
submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json +52 -0
submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt +52 -0
submission_bundle/grpo_training_cycle/docs_results/dose_train.json +6 -0
submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json +28 -0
submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json +8 -0
submission_bundle/grpo_training_cycle/docs_results/graph_train.json +5 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json +149 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json +42 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json +39 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json +39 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json +39 -0
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json +39 -0
submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json +29 -0
submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json +123 -0
submission_bundle/grpo_training_cycle/docs_results/improvement_report.json +19 -0
submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json +19 -0
submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json +22 -0
submission_bundle/grpo_training_cycle/docs_results/legality_rate.png +0 -0
submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json +28 -0
submission_bundle/grpo_training_cycle/docs_results/plot_index.json +9 -0
submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png +0 -0
submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json +43 -0
submission_bundle/grpo_training_cycle/docs_results/risk_train.json +6 -0

.env.example CHANGED Viewed

@@ -20,3 +20,11 @@ POLYGUARD_FRONTIER_MODEL=Qwen/Qwen2.5-7B-Instruct
 POLYGUARD_ALLOW_WEB_FETCH=false
 POLYGUARD_REWARD_MIN=0.001
 POLYGUARD_REWARD_MAX=0.999

 POLYGUARD_ALLOW_WEB_FETCH=false
 POLYGUARD_REWARD_MIN=0.001
 POLYGUARD_REWARD_MAX=0.999
+# --- Medication alternatives tool (FDA openFDA + optional external CDS) ---
+# Optional: higher openFDA rate limits — request a key at https://open.fda.gov/apis/authentication/
+# POLYGUARD_OPENFDA_API_KEY=
+# Optional: POST { "drug_names": ["..."] } to your service; Bearer token if required (Tally/Vellum/custom).
+# Never commit real tokens; set in Space secrets or local .env only.
+# POLYGUARD_MED_TOOL_URL=
+# POLYGUARD_MED_TOOL_TOKEN=

.gitattributes CHANGED Viewed

@@ -41,3 +41,4 @@ docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png filter
 docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
 docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
 docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text

 docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
 docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
 docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
+submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,6 +1,6 @@
-# Hugging Face Space: single-port edge (nginx) + OpenEnv (8100) + API (8200) + static UI.
-# Build from repository root: docker build -f Dockerfile.space -t polyguard-space .
-# Cheap tier: use Space "CPU basic"; first boot downloads ~1.1GB model bundle.
 FROM node:20-bookworm-slim AS frontend
 WORKDIR /build

+# Hugging Face Space: nginx on PORT (7860) + OpenEnv (8100) + API (8200) + Vite-built UI.
+# Build: docker build -t polyguard-space .
+# HF Spaces use this file by default when "Dockerfile path" is unset — keep this as the demo image.
 FROM node:20-bookworm-slim AS frontend
 WORKDIR /build

Dockerfile.space CHANGED Viewed

@@ -1,6 +1,5 @@
-# Hugging Face Space: single-port edge (nginx) + OpenEnv (8100) + API (8200) + static UI.
-# Build from repository root: docker build -f Dockerfile.space -t polyguard-space .
-# Cheap tier: use Space "CPU basic"; first boot downloads ~1.1GB model bundle.
 FROM node:20-bookworm-slim AS frontend
 WORKDIR /build

+# Same image as ./Dockerfile — use this path in HF Space settings if "Dockerfile path"
+# must be explicit (e.g. Dockerfile.space). Keep in sync with Dockerfile.
 FROM node:20-bookworm-slim AS frontend
 WORKDIR /build

README.md CHANGED Viewed

@@ -1,12 +1,48 @@
 ---
 title: PolyGuard OpenEnv
-emoji: 🛡️
 colorFrom: blue
-colorTo: purple
 sdk: docker
 app_port: 7860
 pinned: false
-license: mit
 ---
-Full-stack **PolyGuard** workbench: OpenEnv (WebSocket), FastAPI, and React UI behind nginx on `PORT`. Uses **CPU basic**; first cold start downloads the public [usable model bundle](https://huggingface.co/TheJackBright/polyguard-openenv-training-full-artifacts/tree/main/usable_model_bundles/local-qwen-0-5b-active-smoke) (~1.1 GB). See `docker/space/README.md` for details.

 ---
 title: PolyGuard OpenEnv
 colorFrom: blue
+colorTo: green
 sdk: docker
 app_port: 7860
 pinned: false
 ---
+# PolyGuard (OpenEnv implementation package)
+Run all CLI commands from this directory (`cd polyguard-rl`). The repository root [`README.md`](../README.md) carries the same submission narrative with paths adjusted for viewers landing on the GitHub repo home page.
+## Submission Links
+- GitHub Repo URL: [https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK](https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK)
+- HF Space URL: [https://huggingface.co/spaces/TheJackBright/polyguard-openenv](https://huggingface.co/spaces/TheJackBright/polyguard-openenv)
+- Colab Notebook URL: [https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (see also `notebooks/09_training_loop.ipynb` for a modular training walkthrough)
+- YouTube Video URL: not used for this submission; see Hugging Face Blog URL below.
+- Hugging Face Blog URL: [https://huggingface.co/blog/TheJackBright/polyguard-openenv](https://huggingface.co/blog/TheJackBright/polyguard-openenv) *(publish `docs/hf_blog_draft.md` or replace with a live story URL)*
+## Problem Statement
+Polypharmacy decisions are long-horizon, partially observable, and safety-critical. PolyGuard is a research environment where an LLM agent selects constrained clinical actions, receives verifier-backed reward, and improves via SFT + GRPO—not generic open-ended chat fine-tuning.
+## Environment
+`PolyGuardEnv` exposes OpenEnv-style HTTP/WebSocket endpoints (`/reset`, `/step`, `/state`, `/metadata`, `/schema`, `/mcp`, `/health`, `/ws`). Sub-environments include DDI, bandit mining, regimen risk, precision dosing, longitudinal deprescribing, web-search missing data, alternative suggestion, and new-drug decomposition. See `openenv.yaml`, `app/env/env_core.py`, `app/env/fastapi_app.py`, and `docs/environment_design.md`.
+## Agent Capabilities
+Medication reconciliation, evidence retrieval, graph safety, dosing guardrails, candidate generation, supervisor routing, planner/critic stack, explanations, and contextual bandit ranking for ablations (`app/agents/`, `docs/agents.md`).
+## Tasks
+DDI risk reduction, safe adds/substitutions, regimen optimization, taper/deprescribing sequences, precision dosing, missing-data recovery, and new-drug decomposition (`data/scenarios/`, `app/env/catalog.py`).
+## Reward Model / Evaluation Logic
+Thirteen verifier-backed reward components roll up into four primary channels (`safety_legality`, `clinical_improvement`, `dosing_quality`, `process_integrity`), clamped to `[0.001, 0.999]`, with anti-cheat and timeout logic (`app/env/reward_router.py`, `app/env/anti_cheat.py`, `docs/reward_design.md`).
+## Training And Post-Training Strategy
+Build corpora (`scripts/bootstrap_data.py`, `scripts/build_training_corpus.py`), SFT with TRL (`scripts/train_sft_trl.py`), GRPO with environment reward (`scripts/train_grpo_trl.py`), merge adapters (`scripts/merge_adapters_safe.py`), validate inference (`scripts/test_inference_postsave.py`), evaluate and plot (`scripts/evaluate_*.py`, `docs/results/`). Optional HF GPU training: `scripts/deploy_training_space.py`. Full commands: repository root [`README.md`](../README.md) or `docs/training.md`.
+## Documentation index
+- [Architecture](docs/architecture.md) · [Environment](docs/environment_design.md) · [Rewards](docs/reward_design.md) · [Training](docs/training.md) · [Evaluation](docs/evaluation.md) · [Deployment](docs/deployment.md) · [Datasets](docs/datasets.md) · [Participant guide traceability](docs/participant_guide_traceability.md) · [Idea doc vs implementation](docs/idea_document_traceability.md) · [**Space UI demo script**](docs/DEMO_RECORDING_SCRIPT.md)

app/api/routes.py CHANGED Viewed

@@ -5,9 +5,11 @@ from __future__ import annotations
 from fastapi import APIRouter, Depends, HTTPException
 from app.api.dependencies import get_service
 from app.api.schemas import (
     BatchInferRequest,
     EvidenceQueryRequest,
     OrchestrateRequest,
     ResetRequest,
     StepCandidateRequest,
@@ -137,3 +139,13 @@ def cases_search(q: str, service: APIService = Depends(get_service)) -> list[dic
 @router.post("/evidence/query")
 def evidence_query(payload: EvidenceQueryRequest, service: APIService = Depends(get_service)) -> list[dict]:
     return service.evidence_query(query=payload.query, top_k=payload.top_k)

 from fastapi import APIRouter, Depends, HTTPException
 from app.api.dependencies import get_service
+from app.tools.medication_alternatives import build_alternatives_response
 from app.api.schemas import (
     BatchInferRequest,
     EvidenceQueryRequest,
+    MedicationAlternativesRequest,
     OrchestrateRequest,
     ResetRequest,
     StepCandidateRequest,
 @router.post("/evidence/query")
 def evidence_query(payload: EvidenceQueryRequest, service: APIService = Depends(get_service)) -> list[dict]:
     return service.evidence_query(query=payload.query, top_k=payload.top_k)
+@router.post("/tools/medication_alternatives")
+def medication_alternatives(payload: MedicationAlternativesRequest) -> dict:
+    """OpenFDA class neighbors + optional external POST (env: POLYGUARD_MED_TOOL_URL / TOKEN)."""
+    return build_alternatives_response(
+        query_drug=payload.query_drug,
+        regimen_drugs=payload.regimen_drugs,
+        max_suggestions=payload.max_suggestions,
+    )

app/api/schemas.py CHANGED Viewed

@@ -55,3 +55,11 @@ class BatchInferRequest(StrictSchema):
 class EvidenceQueryRequest(StrictSchema):
     query: str
     top_k: int = 5

 class EvidenceQueryRequest(StrictSchema):
     query: str
     top_k: int = 5
+class MedicationAlternativesRequest(StrictSchema):
+    """FDA / external tool: suggest other labeled drugs in a similar pharmacologic class."""
+    query_drug: Optional[str] = None
+    regimen_drugs: list[str] = Field(default_factory=list)
+    max_suggestions: int = Field(default=10, ge=1, le=25)

app/tools/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Optional product tools (FDA search, external CDS hooks)."""

app/tools/medication_alternatives.py ADDED Viewed

	@@ -0,0 +1,463 @@

+"""OpenFDA-backed medication class search + optional external HTTP tool.
+Secrets (OpenFDA key, Tally/Vellum/custom bearer tokens) must come from env only.
+"""
+from __future__ import annotations
+import logging
+import os
+import re
+from typing import Any
+from urllib.parse import quote
+import requests
+logger = logging.getLogger(__name__)
+OPENFDA_LABEL = "https://api.fda.gov/drug/label.json"
+_DEFAULT_DISCLAIMER = (
+    "Research aid only — not medical advice. FDA labels may be incomplete; verify in approved prescribing information."
+)
+def _openfda_key_suffix() -> str:
+    key = os.getenv("POLYGUARD_OPENFDA_API_KEY", "").strip()
+    if not key:
+        return ""
+    return f"&api_key={quote(key, safe='')}"
+def _fda_get(search: str, limit: int) -> dict[str, Any] | None:
+    """GET openFDA label.json; returns parsed JSON or None on failure."""
+    q = quote(search, safe="")
+    url = f"{OPENFDA_LABEL}?search={q}&limit={int(limit)}{_openfda_key_suffix()}"
+    try:
+        resp = requests.get(url, timeout=14)
+        if resp.status_code != 200:
+            logger.warning("openfda_http_%s: %s", resp.status_code, resp.text[:200])
+            return None
+        return resp.json()
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("openfda_request_failed: %s", exc)
+        return None
+def _first_openfda(payload: dict[str, Any] | None) -> dict[str, Any]:
+    if not payload or "results" not in payload:
+        return {}
+    results = payload.get("results")
+    if not isinstance(results, list) or not results:
+        return {}
+    first = results[0]
+    return first if isinstance(first, dict) else {}
+def _openfda_block(label: dict[str, Any]) -> dict[str, Any]:
+    block = label.get("openfda")
+    return block if isinstance(block, dict) else {}
+def _listify(value: Any) -> list[str]:
+    if value is None:
+        return []
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, list):
+        return [str(x).strip() for x in value if str(x).strip()]
+    return [str(value).strip()]
+def _snippet(text: Any, max_len: int = 380) -> str | None:
+    if not text:
+        return None
+    if isinstance(text, list):
+        text = " ".join(str(x) for x in text[:6])
+    s = re.sub(r"\s+", " ", str(text)).strip()
+    if len(s) <= max_len:
+        return s
+    return s[: max_len - 1] + "…"
+def _label_link(set_id: str | None) -> str | None:
+    if not set_id:
+        return None
+    return f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={set_id}"
+# Keywords from free text / simulator tokens → openFDA pharm_class_epc strings (exact or prefix).
+_KEYWORD_EPCS: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("benzodiazepine", ("Benzodiazepine", "Benzodiazepine Sedative")),
+    ("benzo", ("Benzodiazepine",)),
+    ("nsaid", ("Nonsteroidal Anti-inflammatory Drug",)),
+    ("opioid", ("Opioid Agonist", "Full Opioid Agonists")),
+    ("statin", ("HMG-CoA Reductase Inhibitor",)),
+    ("beta blocker", ("beta-Adrenergic Blocker",)),
+    ("betablocker", ("beta-Adrenergic Blocker",)),
+    ("ace inhibitor", ("Angiotensin-converting Enzyme Inhibitor",)),
+    ("arb", ("Angiotensin II Receptor Blocker",)),
+    ("ppi", ("Proton Pump Inhibitor",)),
+    ("ssri", ("Selective Serotonin Reuptake Inhibitor",)),
+    # Anticoagulant / antiplatelet (simulator warfarin_like → warfarin)
+    ("warfarin", ("Vitamin K Antagonist",)),
+    ("heparin", ("Thrombin Inhibitor", "Factor Xa Inhibitor")),
+)
+def _normalize_simulator_query(q: str) -> str:
+    """Strip simulator suffixes and underscores so benzodiazepine_like → benzodiazepine."""
+    raw = q.strip().lower()[:120]
+    if not raw:
+        return ""
+    for suf in ("_like", "_analog", "_analogue", "_class", "_group", "_category"):
+        if raw.endswith(suf):
+            raw = raw[: -len(suf)].strip("_").strip()
+    return raw.replace("_", " ").strip()
+def _class_search_variants(focus: str) -> list[str]:
+    """Ordered strings to try as openFDA pharm_class_epc (exact quoted) or wildcard body."""
+    raw = _normalize_simulator_query(focus)
+    if not raw:
+        return []
+    out: list[str] = []
+    seen: set[str] = set()
+    def add(s: str) -> None:
+        t = s.strip()
+        if len(t) < 3:
+            return
+        k = t.lower()
+        if k in seen:
+            return
+        seen.add(k)
+        out.append(t)
+    compact = raw.replace(" ", "")
+    # Prefer canonical FDA class strings before raw lowercase (better labels + display).
+    for kw, epcs in _KEYWORD_EPCS:
+        if kw in compact or kw in raw:
+            for e in epcs:
+                add(e)
+    add(raw)
+    first = raw.split()[0]
+    if first != raw:
+        add(first)
+    if raw and " " not in raw and raw.isalpha():
+        add(raw[0].upper() + raw[1:])
+    return out
+def _resolve_focus_drug(query_drug: str | None, regimen_drugs: list[str]) -> str:
+    """Prefer explicit query_drug from client; do not silently use regimen[0] when multiple rows exist."""
+    q = (query_drug or "").strip()
+    if q:
+        return q
+    if len(regimen_drugs) == 1:
+        t = str(regimen_drugs[0]).strip()
+        return t
+    # Multiple regimen drugs but no focus: caller should send query_drug (frontend bug otherwise).
+    return ""
+def _escape_fda_term(term: str) -> str:
+    """Remove characters that break openFDA quoted search."""
+    return re.sub(r'["\\]', " ", term).strip()[:100]
+def _search_label_for_name(name: str) -> dict[str, Any]:
+    """Search brand, generic, or active substance on SPL labels."""
+    n = _escape_fda_term(name.strip()[:80])
+    if not n:
+        return {}
+    data_g = _fda_get(f'openfda.generic_name:"{n}"', limit=3)
+    if data_g and data_g.get("results"):
+        return _first_openfda(data_g)
+    data_b = _fda_get(f'openfda.brand_name:"{n}"', limit=3)
+    if data_b and data_b.get("results"):
+        return _first_openfda(data_b)
+    # Active ingredient / substance (helps real drug stems)
+    data_s = _fda_get(f'openfda.substance_name:"{n}"', limit=3)
+    if data_s and data_s.get("results"):
+        return _first_openfda(data_s)
+    data_a = _fda_get(f'openfda.active_ingredient:"{n}"', limit=3)
+    return _first_openfda(data_a) if data_a else {}
+def _suggestions_by_class_probe(
+    field: str,
+    class_value: str,
+    exclude: set[str],
+    max_suggestions: int,
+) -> list[dict[str, Any]]:
+    rows = _suggestions_for_class(field, class_value, exclude, max_suggestions)
+    if rows:
+        return rows
+    # Wildcard: openFDA supports *suffix / prefix* on some fields
+    body = _escape_fda_term(class_value).lower()
+    if len(body) >= 4:
+        wild = _fda_get(f"openfda.{field}:*{body}*", limit=min(40, max(10, max_suggestions * 4)))
+        if wild and wild.get("results"):
+            # Reuse list builder by synthesizing a narrowed class is awkward; parse manually
+            out: list[dict[str, Any]] = []
+            seen: set[str] = set()
+            for row in wild.get("results", []):
+                if not isinstance(row, dict):
+                    continue
+                of = _openfda_block(row)
+                brands = _listify(of.get("brand_name"))
+                generics = _listify(of.get("generic_name"))
+                display = (brands[0] if brands else None) or (generics[0] if generics else None)
+                if not display:
+                    continue
+                key = display.lower()
+                if key in seen or key in exclude:
+                    continue
+                seen.add(key)
+                ar = row.get("adverse_reactions")
+                ar_text = ar[0] if isinstance(ar, list) and ar else ar
+                set_id = None
+                if isinstance(of.get("spl_set_id"), list) and of["spl_set_id"]:
+                    set_id = str(of["spl_set_id"][0])
+                elif of.get("spl_set_id"):
+                    set_id = str(of["spl_set_id"])
+                out.append(
+                    {
+                        "display_name": display,
+                        "generic_names": generics[:4],
+                        "brand_names": brands[:4],
+                        "routes": _listify(of.get("route"))[:4],
+                        "adverse_reactions_snippet": _snippet(ar_text),
+                        "label_link": _label_link(set_id),
+                        "source_detail": f"openfda.{field}.wildcard",
+                    },
+                )
+                if len(out) >= max_suggestions:
+                    break
+            return out
+    return []
+def _pick_pharm_class(openfda_block: dict[str, Any]) -> tuple[str | None, str | None]:
+    for key in ("pharm_class_epc", "pharm_class_cs", "pharm_class_moa"):
+        for item in _listify(openfda_block.get(key)):
+            if len(item) > 3:
+                return key, item
+    return None, None
+def _suggestions_for_class(
+    field: str,
+    pharm_class: str,
+    exclude: set[str],
+    max_suggestions: int,
+) -> list[dict[str, Any]]:
+    """List other drugs sharing FDA pharmacologic class on label."""
+    pc = pharm_class.strip()[:120]
+    if not pc or not field:
+        return []
+    search = f'openfda.{field}:"{pc}"'
+    data = _fda_get(search, limit=min(50, max(10, max_suggestions * 4)))
+    if not data or not data.get("results"):
+        return []
+    out: list[dict[str, Any]] = []
+    seen: set[str] = set()
+    for row in data.get("results", []):
+        if not isinstance(row, dict):
+            continue
+        of = _openfda_block(row)
+        brands = _listify(of.get("brand_name"))
+        generics = _listify(of.get("generic_name"))
+        display = (brands[0] if brands else None) or (generics[0] if generics else None)
+        if not display:
+            continue
+        key = display.lower()
+        if key in seen:
+            continue
+        if key in exclude:
+            continue
+        seen.add(key)
+        ar = row.get("adverse_reactions")
+        if isinstance(ar, list) and ar:
+            ar_text = ar[0]
+        else:
+            ar_text = ar
+        set_id = None
+        if isinstance(of.get("spl_set_id"), list) and of["spl_set_id"]:
+            set_id = str(of["spl_set_id"][0])
+        elif of.get("spl_set_id"):
+            set_id = str(of["spl_set_id"])
+        out.append(
+            {
+                "display_name": display,
+                "generic_names": generics[:4],
+                "brand_names": brands[:4],
+                "routes": _listify(of.get("route"))[:4],
+                "adverse_reactions_snippet": _snippet(ar_text),
+                "label_link": _label_link(set_id),
+                "source_detail": f"openfda.{field}",
+            }
+        )
+        if len(out) >= max_suggestions:
+            break
+    return out
+def _external_suggestions(drug_names: list[str]) -> list[dict[str, Any]] | None:
+    url = os.getenv("POLYGUARD_MED_TOOL_URL", "").strip()
+    if not url:
+        return None
+    headers: dict[str, str] = {"Content-Type": "application/json"}
+    token = os.getenv("POLYGUARD_MED_TOOL_TOKEN", "").strip()
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    try:
+        resp = requests.post(
+            url,
+            json={"drug_names": drug_names},
+            headers=headers,
+            timeout=18,
+        )
+        if resp.status_code >= 400:
+            logger.warning("med_tool_http_%s", resp.status_code)
+            return []
+        payload = resp.json()
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("med_tool_request_failed: %s", exc)
+        return []
+    if not isinstance(payload, dict):
+        return []
+    raw = payload.get("suggestions")
+    if not isinstance(raw, list):
+        return []
+    cleaned: list[dict[str, Any]] = []
+    for item in raw:
+        if isinstance(item, dict) and item.get("display_name"):
+            row = dict(item)
+            row["source_detail"] = str(row.get("source_detail") or "external_tool")
+            cleaned.append(row)
+        elif isinstance(item, str) and item.strip():
+            cleaned.append(
+                {
+                    "display_name": item.strip(),
+                    "generic_names": [],
+                    "brand_names": [],
+                    "routes": [],
+                    "adverse_reactions_snippet": None,
+                    "label_link": None,
+                    "source_detail": "external_tool",
+                }
+            )
+    return cleaned
+def build_alternatives_response(
+    query_drug: str | None,
+    regimen_drugs: list[str],
+    max_suggestions: int,
+) -> dict[str, Any]:
+    errors: list[str] = []
+    regimen_clean = [str(x).strip() for x in regimen_drugs if str(x).strip()][:40]
+    focus = _resolve_focus_drug(query_drug, regimen_clean)
+    exclude = {x.lower() for x in regimen_clean}
+    if focus:
+        exclude.add(focus.lower())
+    external_rows: list[dict[str, Any]] = []
+    ext = _external_suggestions([focus] if focus else regimen_clean[:5])
+    if ext is not None:
+        external_rows = ext
+    if not focus and not regimen_clean:
+        return {
+            "focus_drug": "",
+            "therapeutic_class": None,
+            "suggestions": external_rows,
+            "source": "external" if external_rows else "none",
+            "disclaimer": _DEFAULT_DISCLAIMER,
+            "errors": ["Enter a drug name or load drugs from the current episode."],
+        }
+    if not focus and regimen_clean:
+        return {
+            "focus_drug": "",
+            "therapeutic_class": None,
+            "therapeutic_class_field": None,
+            "suggestions": external_rows,
+            "source": "external" if external_rows else "none",
+            "disclaimer": _DEFAULT_DISCLAIMER,
+            "errors": [
+                "Several medications are on this regimen; pick a focus row in the UI (or pass query_drug). "
+                "The server does not guess the first medication anymore.",
+            ],
+        }
+    # SPL name/substance search: normalize simulator tokens first (benzodiazepine_like → benzodiazepine).
+    lookup = _normalize_simulator_query(focus) or focus.strip()
+    label = _search_label_for_name(lookup)
+    ofb = _openfda_block(label)
+    pharm_field, pharm = _pick_pharm_class(ofb)
+    openfda_rows: list[dict[str, Any]] = []
+    if pharm and pharm_field:
+        openfda_rows = _suggestions_for_class(pharm_field, pharm, exclude, max_suggestions)
+    if not openfda_rows:
+        # Simulator tokens (e.g. benzodiazepine_like) or class keywords: try FDA class directly.
+        for cand in _class_search_variants(focus):
+            rows = _suggestions_by_class_probe("pharm_class_epc", cand, exclude, max_suggestions)
+            if rows:
+                pharm_field, pharm = "pharm_class_epc", cand
+                openfda_rows = rows
+                break
+    if not openfda_rows:
+        for cand in _class_search_variants(focus):
+            rows = _suggestions_by_class_probe("pharm_class_cs", cand, exclude, max_suggestions)
+            if rows:
+                pharm_field, pharm = "pharm_class_cs", cand
+                openfda_rows = rows
+                break
+    if not openfda_rows:
+        if not (pharm and pharm_field):
+            errors.append(
+                "Could not match this text to an FDA SPL (generic/brand/substance) or pharmacologic class. "
+                "Try a generic name (e.g. diazepam), a class keyword (e.g. benzodiazepine), or load from episode.",
+            )
+        elif not external_rows:
+            errors.append(
+                "No labeled products returned for this query (try another spelling or a broader class keyword).",
+            )
+    merged: list[dict[str, Any]] = []
+    seen_keys: set[str] = set()
+    for row in external_rows + openfda_rows:
+        display = str(row.get("display_name", "")).strip()
+        if not display:
+            continue
+        generics = [str(g).lower() for g in (row.get("generic_names") or []) if g]
+        dedupe_key = generics[0] if generics else display.lower()
+        if dedupe_key in seen_keys:
+            continue
+        seen_keys.add(dedupe_key)
+        merged.append(row)
+        if len(merged) >= max_suggestions:
+            break
+    source = "openfda"
+    if external_rows and openfda_rows:
+        source = "mixed"
+    elif external_rows and not openfda_rows:
+        source = "external"
+    elif not external_rows and not openfda_rows:
+        source = "none"
+    return {
+        "focus_drug": focus,
+        "therapeutic_class": pharm,
+        "therapeutic_class_field": pharm_field,
+        "suggestions": merged,
+        "source": source,
+        "disclaimer": _DEFAULT_DISCLAIMER,
+        "errors": errors,
+    }

app/ui/frontend/src/App.tsx CHANGED Viewed

@@ -20,6 +20,7 @@ import type {
   StepResponse,
   TaskPreset,
 } from "./lib/types";
 import MetaverseBackdrop from "./components/MetaverseBackdrop";
 type WorkbenchMode = "agent" | "env";
@@ -887,6 +888,18 @@ export default function App() {
   const activeInfo = mode === "agent" ? agentInfo : envInfo;
   const activeTerminationReason = shortValue(activeInfo?.termination_reason);
   const terminationReason = activeTerminationReason !== "-" ? activeTerminationReason : null;
   const heroStats: Array<[string, string]> = [
     ["Runtime", mode === "agent" ? "Agent Workbench" : "Env Explorer"],
     ["Scenario", taskLabel(taskId, catalog.task_presets)],
@@ -1164,6 +1177,7 @@ export default function App() {
           <DetailPanel title="Explanation" data={mode === "agent" ? explanation : null} />
           <DetailPanel title="Evidence" data={mode === "agent" ? (isRecord(evidence) || Array.isArray(evidence) ? evidence : null) : null} />
           <EventLog events={events} error={error} />
         </main>
         <QTips
           open={tipsOpen}

   StepResponse,
   TaskPreset,
 } from "./lib/types";
+import AlternativeMedicineSearch from "./components/AlternativeMedicineSearch";
 import MetaverseBackdrop from "./components/MetaverseBackdrop";
 type WorkbenchMode = "agent" | "env";
   const activeInfo = mode === "agent" ? agentInfo : envInfo;
   const activeTerminationReason = shortValue(activeInfo?.termination_reason);
   const terminationReason = activeTerminationReason !== "-" ? activeTerminationReason : null;
+  const regimenForAltTool = useMemo(() => {
+    const meds = activeObservation?.medication_table ?? [];
+    const names: string[] = [];
+    for (const row of meds) {
+      const v = row.drug ?? row.drug_id ?? row.name;
+      if (typeof v === "string" && v.trim()) {
+        names.push(v.trim());
+      }
+    }
+    return names;
+  }, [activeObservation]);
   const heroStats: Array<[string, string]> = [
     ["Runtime", mode === "agent" ? "Agent Workbench" : "Env Explorer"],
     ["Scenario", taskLabel(taskId, catalog.task_presets)],
           <DetailPanel title="Explanation" data={mode === "agent" ? explanation : null} />
           <DetailPanel title="Evidence" data={mode === "agent" ? (isRecord(evidence) || Array.isArray(evidence) ? evidence : null) : null} />
           <EventLog events={events} error={error} />
+          <AlternativeMedicineSearch regimenDrugNames={regimenForAltTool} />
         </main>
         <QTips
           open={tipsOpen}

app/ui/frontend/src/components/AlternativeMedicineSearch.tsx ADDED Viewed

	@@ -0,0 +1,215 @@

+import { useCallback, useEffect, useState } from "react";
+import { API_BASE } from "../lib/constants";
+type Suggestion = {
+  display_name: string;
+  generic_names?: string[];
+  brand_names?: string[];
+  routes?: string[];
+  adverse_reactions_snippet?: string | null;
+  label_link?: string | null;
+  source_detail?: string;
+};
+type AlternativesResponse = {
+  focus_drug: string;
+  therapeutic_class: string | null;
+  therapeutic_class_field?: string | null;
+  suggestions: Suggestion[];
+  source: string;
+  disclaimer: string;
+  errors: string[];
+};
+type Props = {
+  regimenDrugNames: string[];
+};
+export default function AlternativeMedicineSearch({ regimenDrugNames }: Props) {
+  const [query, setQuery] = useState("");
+  /** Which row in the current episode regimen drives the FDA "focus" drug (not always the first). */
+  const [regimenFocusIndex, setRegimenFocusIndex] = useState(0);
+  /** False after user types in the box; true when select/load sets query so regimen row wins on submit. */
+  const [focusFromRegimenSelect, setFocusFromRegimenSelect] = useState(true);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const [result, setResult] = useState<AlternativesResponse | null>(null);
+  useEffect(() => {
+    if (regimenDrugNames.length === 0) {
+      setRegimenFocusIndex(0);
+      return;
+    }
+    setRegimenFocusIndex((prev) => (prev >= regimenDrugNames.length ? 0 : prev));
+  }, [regimenDrugNames]);
+  const runSearch = useCallback(
+    async (queryDrug: string | undefined, regimen: string[]) => {
+      setLoading(true);
+      setError(null);
+      try {
+        const res = await fetch(`${API_BASE}/tools/medication_alternatives`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            query_drug: queryDrug?.trim() || null,
+            regimen_drugs: regimen,
+            max_suggestions: 7,
+          }),
+        });
+        if (!res.ok) {
+          const t = await res.text();
+          throw new Error(t.slice(0, 200) || `HTTP ${res.status}`);
+        }
+        setResult((await res.json()) as AlternativesResponse);
+      } catch (e) {
+        setResult(null);
+        setError(e instanceof Error ? e.message : "Request failed");
+      } finally {
+        setLoading(false);
+      }
+    },
+    [],
+  );
+  const safeRegimenIndex =
+    regimenDrugNames.length > 0
+      ? Math.min(Math.max(regimenFocusIndex, 0), regimenDrugNames.length - 1)
+      : 0;
+  /** Never send null focus when a regimen exists — avoids API defaulting to regimen[0] (always benzo if first). */
+  const resolvedFocusDrug = (): string | undefined => {
+    const typed = query.trim();
+    const fromList = regimenDrugNames[safeRegimenIndex]?.trim() ?? "";
+    if (focusFromRegimenSelect && regimenDrugNames.length > 0) {
+      return fromList || typed || undefined;
+    }
+    return typed || fromList || undefined;
+  };
+  const onSubmit = () => {
+    void runSearch(resolvedFocusDrug(), regimenDrugNames);
+  };
+  const onLoadRegimen = () => {
+    const names = regimenDrugNames.length ? regimenDrugNames : [];
+    if (!names.length) {
+      setError("Reset an episode first so the regimen list is available.");
+      return;
+    }
+    const idx = Math.min(Math.max(regimenFocusIndex, 0), names.length - 1);
+    const focus = names[idx] ?? "";
+    setRegimenFocusIndex(idx);
+    setQuery(focus);
+    setFocusFromRegimenSelect(true);
+    void runSearch(focus, names);
+  };
+  const onRegimenSelectChange = (index: number) => {
+    setRegimenFocusIndex(index);
+    const name = regimenDrugNames[index]?.trim() ?? "";
+    setQuery(name);
+    setFocusFromRegimenSelect(true);
+  };
+  return (
+    <section
+      className="panel-surface alt-med-tool panel-wide"
+      aria-label="FDA alternatives: openFDA class neighbors"
+    >
+      <div className="panel-heading">
+        <h2>FDA alternatives</h2>
+        <span>Tool</span>
+      </div>
+      {regimenDrugNames.length > 0 ? (
+        <label className="field alt-med-tool-regimen-select">
+          <span>Focus drug from current regimen</span>
+          <select
+            aria-label="Select regimen drug for alternatives search"
+            value={safeRegimenIndex}
+            onChange={(e) => onRegimenSelectChange(Number(e.target.value))}
+          >
+            {regimenDrugNames.map((name, i) => (
+              <option key={`${name}-${i}`} value={i}>
+                {name}
+              </option>
+            ))}
+          </select>
+        </label>
+      ) : null}
+      <div className="alt-med-tool-row">
+        <label className="field alt-med-tool-field">
+          <span>Drug name (focus)</span>
+          <input
+            value={query}
+            onChange={(e) => {
+              setQuery(e.target.value);
+              setFocusFromRegimenSelect(false);
+            }}
+            placeholder="e.g. diazepam, ibuprofen, benzodiazepine (or simulator token like benzodiazepine_like)"
+            autoComplete="off"
+          />
+        </label>
+        <div className="alt-med-tool-actions">
+          <button type="button" className="secondary" onClick={onLoadRegimen} disabled={loading}>
+            Load from episode
+          </button>
+          <button type="button" onClick={onSubmit} disabled={loading}>
+            {loading ? "Searching…" : "Suggest alternatives"}
+          </button>
+        </div>
+      </div>
+      <p className="muted small-print alt-med-tool-hint">
+        Pick a regimen row, then search. Up to 7 results — scroll the list below.
+      </p>
+      {error && <div className="error-banner">{error}</div>}
+      {result && (
+        <div className="alt-med-tool-results">
+          {result.errors?.length ? (
+            <ul className="alt-med-tool-errors">
+              {result.errors.map((msg) => (
+                <li key={msg}>{msg}</li>
+              ))}
+            </ul>
+          ) : null}
+          <p className="muted">
+            <strong>Focus:</strong> {result.focus_drug || "—"} · <strong>Class:</strong>{" "}
+            {result.therapeutic_class ?? "—"}{" "}
+            {result.therapeutic_class_field ? <span>({result.therapeutic_class_field})</span> : null} ·{" "}
+            <strong>Source:</strong> {result.source}
+          </p>
+          <div className="alt-med-suggestions-scroll" role="region" aria-label="FDA alternative suggestions, scrollable">
+            <ul className="alt-med-suggestion-list">
+            {result.suggestions?.length ? (
+              result.suggestions.map((s, idx) => (
+                <li key={`${s.display_name}-${idx}`} className="alt-med-suggestion">
+                  <div>
+                    <strong>{s.display_name}</strong>
+                    <span className="muted"> · {s.source_detail ?? "openfda"}</span>
+                  </div>
+                  {s.routes?.length ? (
+                    <div className="muted small-print">Route: {s.routes.join(", ")}</div>
+                  ) : null}
+                  {s.generic_names?.length ? (
+                    <div className="muted small-print">Generic: {s.generic_names.join(", ")}</div>
+                  ) : null}
+                  {s.adverse_reactions_snippet ? (
+                    <div className="alt-med-ar">ADR label excerpt: {s.adverse_reactions_snippet}</div>
+                  ) : null}
+                  {s.label_link ? (
+                    <a className="alt-med-link" href={s.label_link} target="_blank" rel="noreferrer">
+                      DailyMed / label
+                    </a>
+                  ) : null}
+                </li>
+              ))
+            ) : (
+              <li className="muted">No suggestions yet — try another spelling or load from episode.</li>
+            )}
+            </ul>
+          </div>
+        </div>
+      )}
+    </section>
+  );
+}

app/ui/frontend/src/styles/theme.css CHANGED Viewed

@@ -1138,6 +1138,108 @@ td {
   }
 }
 ::-webkit-scrollbar {
   width: 7px;
   height: 7px;

   }
 }
+.small-print {
+  font-size: 0.78rem;
+  line-height: 1.35;
+}
+.alt-med-tool {
+  margin-top: 10px;
+  border: 1px dashed rgba(155, 124, 255, 0.35);
+  background: rgba(8, 11, 27, 0.55);
+}
+.alt-med-tool .panel-heading h2 {
+  font-size: 1.05rem;
+}
+.alt-med-tool-regimen-select {
+  margin: 0 0 10px;
+  max-width: min(520px, 100%);
+}
+.alt-med-tool-regimen-select select {
+  width: 100%;
+}
+.alt-med-tool-hint {
+  margin: 8px 0 0;
+  max-width: 960px;
+}
+.alt-med-tool-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 12px;
+  align-items: flex-end;
+}
+.alt-med-tool-field {
+  flex: 1 1 220px;
+  margin: 0;
+}
+.alt-med-tool-actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+.alt-med-tool-results {
+  margin-top: 12px;
+}
+.alt-med-tool-errors {
+  color: var(--warning);
+  font-size: 0.85rem;
+}
+.alt-med-suggestions-scroll {
+  margin-top: 8px;
+  max-height: 17.5rem;
+  overflow-y: auto;
+  overflow-x: hidden;
+  padding-right: 4px;
+  border-radius: 12px;
+  border: 1px solid var(--line-soft);
+  background: rgba(5, 8, 20, 0.35);
+}
+.alt-med-suggestion-list {
+  list-style: none;
+  margin: 0;
+  padding: 8px;
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+}
+.alt-med-suggestion {
+  padding: 8px 10px;
+  border-radius: 10px;
+  border: 1px solid var(--line-soft);
+  background: rgba(13, 16, 35, 0.45);
+  flex-shrink: 0;
+}
+.alt-med-ar {
+  margin-top: 4px;
+  font-size: 0.76rem;
+  color: var(--muted);
+  line-height: 1.35;
+  display: -webkit-box;
+  -webkit-box-orient: vertical;
+  -webkit-line-clamp: 2;
+  overflow: hidden;
+}
+.alt-med-link {
+  display: inline-block;
+  margin-top: 6px;
+  font-size: 0.82rem;
+  color: var(--accent-2);
+}
 ::-webkit-scrollbar {
   width: 7px;
   height: 7px;

docker/space/README.md CHANGED Viewed

@@ -12,28 +12,46 @@ Never commit or paste Hugging Face tokens into chat or the repo. If a token was
    ```bash
    cd polyguard-rl
-   docker build -f Dockerfile.space -t polyguard-space .
    ```
-3. Push the Space repo (HF expects `Dockerfile` at root). Either:
-   - **Option A:** In the Space repo on Hub, set **Build → Dockerfile path** to `Dockerfile.space` if the UI allows, **or** copy/rename: `cp Dockerfile.space Dockerfile` in the branch you push.
-   - **Option B:** Make this `polyguard-rl` folder the Space git root and add a symlink or duplicate `Dockerfile` pointing to the same content as `Dockerfile.space`.
-4. Commit and push to the Space repository. HF builds the image on their builders (you do not need to `docker push` to Docker Hub for standard Spaces).
 ## Runtime
 - **Port:** Space sets `PORT` (default `7860`). Nginx listens on `PORT` and routes `/api/*` → API, `/ws` → OpenEnv WebSocket, `/` → built React app.
-- **First boot:** If `checkpoints/active/grpo_adapter` is missing, `entrypoint.sh` runs `scripts/install_hf_active_bundle.py` (downloads the public bundle; slow on first start).
 - **CORS:** Set via `POLYGUARD_ALLOW_HF_SPACE_CORS=true` (default in the Space Dockerfile).
-## Optional secrets
-| Name       | Use |
-|-----------|-----|
-| `HF_TOKEN` | Private model or artifact repo; `huggingface_hub` picks it up automatically when set in the Space environment. |
 ## Local smoke (same as Space)

    ```bash
    cd polyguard-rl
+   docker build -t polyguard-space .
    ```
+3. Push the Space repo. The root **`Dockerfile`** is the full demo (Vite UI + nginx + API + OpenEnv). Hugging Face uses it automatically when **Dockerfile path** is empty. If your Space was created earlier with a different Dockerfile, trigger **Factory reboot** after pushing so the new image builds.
+4. Commit and push to the Space repository. HF builds the image on their builders (you do not need to `docker push` to Docker Hub for standard Spaces).
+## FDA panel / latest UI missing on the live Space
+Pushing code to GitHub alone does **not** refresh `huggingface.co/spaces/...` unless that Space is connected to the same repo **and** rebuilds from the branch that has your UI (for example `fda` vs `main`). This repo’s usual demo path is **upload via Hub API**:
+```bash
+cd polyguard-rl
+export HF_TOKEN="hf_..."   # write token; never commit it
+uv run python scripts/deploy_space_api.py --repo-id TheJackBright/polyguard-openenv
+```
+Wait for **Build** in the Space logs to finish, then use **Factory reboot** or a hard browser refresh if the page still looks old. **Dockerfile path** should be empty (default `Dockerfile`) or `Dockerfile` / `Dockerfile.space`. If the Space uses the **full monorepo** as its Git root, set Dockerfile path to the repo-root `Dockerfile` or to `polyguard-rl/Dockerfile`.
 ## Runtime
 - **Port:** Space sets `PORT` (default `7860`). Nginx listens on `PORT` and routes `/api/*` → API, `/ws` → OpenEnv WebSocket, `/` → built React app.
+- **First boot:** If `checkpoints/active/grpo_adapter` is missing, `entrypoint.sh` runs `scripts/install_hf_active_bundle.py`. That pulls `TheJackBright/polyguard-openenv-training-full-artifacts` (slow, ~1.1 GB).
 - **CORS:** Set via `POLYGUARD_ALLOW_HF_SPACE_CORS=true` (default in the Space Dockerfile).
+## If logs show `401` / `RepositoryNotFoundError` on startup
+The artifact **model repo is private, gated, or needs a license click** while anonymous downloads are blocked. The UI can still “work” using the **heuristic ranker** and public base models, but **your trained bundle is not installed**.
+**Fix (pick one):**
+1. **Space secret (recommended):** Space → **Settings** → **Secrets** → add **`HF_TOKEN`** = a [read token](https://huggingface.co/settings/tokens) that can access `polyguard-openenv-training-full-artifacts`. Restart the Space.
+2. **Hub settings:** Make that model repo **public**, or ensure **gated** access allows the token you use in (1).
+3. **Ignore:** Leave as-is if ranker-only behavior is enough for the demo.
+## Secrets
+| Name        | Use |
+|------------|-----|
+| `HF_TOKEN` | **Required** if the artifact repo is not anonymously readable; `huggingface_hub` reads it automatically. |
 ## Local smoke (same as Space)

docs/DEMO_RECORDING_SCRIPT.md ADDED Viewed

	@@ -0,0 +1,493 @@

+# PolyGuard Space UI — demo recording script (shot-by-shot)
+Use this document while screen-recording the Hugging Face Space (or local Docker). Target length: **8–14 minutes** for a full pass, or **3–5 minutes** for a highlights reel.
+---
+## Before you hit record
+1. **Open the Space** in a clean browser profile or incognito (fewer extensions → fewer glitches).
+2. **Set resolution**: 1920×1080 or 1440×900; browser zoom **100%**.
+3. **Fullscreen** the Space iframe or use HF “Open in new tab” so the URL bar shows the Space domain.
+4. **Wait for cold start**: first load may download the model bundle (several minutes). The **Event Log** and **Model Truth** panel will tell you if the policy failed to load (heuristic fallback is still usable for env steps).
+5. **Optional**: hide mouse cursor in OBS if you prefer; otherwise move slowly and pause **2 seconds** on each panel after major clicks.
+**Primary Space (product):** `https://huggingface.co/spaces/TheJackBright/polyguard-openenv`
+Runtime: nginx fronts the **product API** (default `8200`) and **OpenEnv service** (`8100`); see `docker/space/entrypoint.sh`.
+---
+## Where the model lives (Qwen and artifacts)
+This matters for what you say on camera.
+| Location | What it is |
+| --- | --- |
+| **On the Space container** | Working directory `/app` (see `entrypoint.sh`: `cd /app`). |
+| **Downloaded bundle** | If `checkpoints/active/grpo_adapter/adapter_config.json` is missing at boot, `scripts/install_hf_active_bundle.py` pulls the **HF usable model bundle** into `checkpoints/active/`. |
+| **Typical layout after install** | `checkpoints/active/active_model_manifest.json` — which artifact is active (often **GRPO adapter** on top of base). |
+| **Weights** | `checkpoints/active/grpo_adapter/` (LoRA/PEFT), optionally `checkpoints/active/merged/` (full merged weights), `checkpoints/active/sft_adapter/`. |
+| **Base model name** | Usually **`Qwen/Qwen2.5-0.5B-Instruct`** as the Transformers base for adapters (set via env e.g. `POLYGUARD_HF_MODEL`). |
+**What the UI proves:** the **Model Truth** panel calls **`GET /policy/model_status`** (product API). It shows `model_id` / `base_model`, `run_id`, `preferred_artifact` / `loaded_source`, and availability flags. Say on camera: *“This is live from the API, not hard-coded in the frontend.”*
+---
+## UI map (what appears on screen)
+| Region | Purpose |
+| --- | --- |
+| **Hero** (“PolyGuard neural safety cockpit”) | Marketing copy + quick stats. |
+| **Top bar** | **Agent Workbench** vs **Env Explorer**, **Task** dropdown, **Reset Episode**, **Q Tips**. |
+| **Status chips** | “Live” / model line; in Env mode one chip reads **ws env** (WebSocket to OpenEnv). |
+| **Model Truth** | Qwen / artifact / run / availability. |
+| **Advanced strip** | Only if Task = **Advanced** — pick raw `difficulty` + `sub_environment`. |
+| **Episode Overview** | Mode, task, difficulty, environment, step budget, last reward, patient id, **Patient Summary**, **Risk Delta**. |
+| **Candidate Actions** | Legal moves: `candidate_id`, action type, target/replacement, estimated safety delta (or **Blocked**). |
+| **Action Console** | Confidence, rationale, **Submit** vs **Run Agent** (Agent mode only for Run Agent). |
+| **Reward Channels** | Bars for total + primary + component scores (see below). |
+| **Current Medications** | Cards from observation. |
+| **Action History / Warnings** | Step trace and env warnings. |
+| **Decision / Explanation / Evidence** | **Agent mode only** (filled after API steps that return those fields). |
+| **Event Log** | Human-readable trace of resets, steps, rewards, errors. |
+---
+## Feature encyclopedia — every panel, branch, and agent
+Use this section as a **script appendix** or **judge handout**. It mirrors the React workbench in `app/ui/frontend/src/App.tsx`, the API in `app/api/`, and the orchestrator in `app/agents/orchestrator.py`.
+### A. How the Space is wired (under the hood)
+| Piece | Role |
+| --- | --- |
+| **Browser → nginx** | HF Space exposes one origin; nginx routes paths. |
+| **Product API** | Vite uses `API_BASE` (default **`/api`**). FastAPI serves catalog, reset, step_candidate, orchestrate, model_status, reward_breakdown, etc. |
+| **OpenEnv HTTP/WS** | `ENV_BASE` defaults to **same origin** on Spaces (not localhost). Web UI opens **`ws(s)://<origin>/ws`** for Env Explorer. |
+| **Two Python processes** | `entrypoint.sh` starts **uvicorn** for `app.env.fastapi_app` (env, port **8100**) and **uvicorn** for `app.api` (product API, port **8200**). Agent mode reset/step still use the **API’s** in-process `PolyGuardEnv`; Env mode uses the **separate** env service over WebSocket. |
+| **Important** | Agent and Env UIs maintain **separate React state** (`agentObservation` vs `envObservation`). Toggling mode **clears the Event Log** and clears the inactive branch’s episode state so you always know which backend path you are exercising. |
+### B. Hero (“PolyGuard neural safety cockpit”)
+| Stat | Source | What to say on camera |
+| --- | --- | --- |
+| **Runtime** | `mode === "agent"` → “Agent Workbench”; else “Env Explorer”. | “This is which transport I am using right now.” |
+| **Scenario** | Human label for current `taskId` from catalog presets or Advanced. | “Which curriculum preset is bound to difficulty + sub-environment.” |
+| **Candidates** | `candidate_action_set.length` from the **active** observation. | “How many legal moves the env is offering after the last reset/step.” |
+| **Reward** | Last scalar reward for the active branch (`null` → shown as `-`). | “Verifier scalar after the last step in this mode only.” |
+### C. Top bar — every control
+| Control | Behavior |
+| --- | --- |
+| **Agent Workbench** | Sets `mode` to `agent`. Clears env state, event log, error; clears agent panels if switching from env (see `handleModeChange`). |
+| **Env Explorer** | Sets `mode` to `env`. Clears agent-specific observation/reward/decision/evidence. |
+| **Task** `<select>` | Options: each **task preset** from `GET /env/catalog` (`task_presets`), plus **Advanced**. Changing a preset updates internal `difficulty` + `sub_environment` to match the preset. |
+| **Reset Episode** | **Agent:** `POST /env/reset` with body from preset (`{ task_id }`) or `{ difficulty, sub_environment }`. Refreshes **Model Truth** first. Clears reward breakdown, decision, explanation, evidence, sets default candidate. **Env:** WebSocket `reset` with `{ difficulty, sub_environment }` only (no `task_id` in WS path—preset is flattened to those two fields). **Always** clears `events` at start of reset handler, then appends one “Reset … in agent/env” line. |
+| **Q Tips** | Opens modal walkthrough; highlights DOM nodes with `[data-guide="…"]`. **Skip** stores `polyguard.qtips.v2.seen` in localStorage so first visit auto-opens tips. |
+| **Status chips** | First chip: **Live** if observation loaded and not done, else **Complete** / **Ready**. Second chip: in Agent mode, derived from **`modelSignal()`** (Qwen verified or not); in Env mode shows **`ws env`**. |
+### D. Model Truth panel — field by field
+Data from **`GET /policy/model_status`** (`PolicyProviderRouter` / `active_model_status`).
+| Field in UI | Typical meaning |
+| --- | --- |
+| **Heading label** | “Qwen 0.5B active” only when Space config matches a strict check (enabled + active + availability + model id regex for **Qwen2.5-0.5B-Instruct**); else “Qwen not verified” or Ollama-specific text if Ollama wins locally. |
+| **Detail paragraph** | Human sentence: model name, artifact, `run_id`, optional **load_error**. |
+| **Model** | `model_id` or `base_model` — HF id of the loaded or configured base. |
+| **Run** | `run_id` from manifest / sweep activation (which training bundle). |
+| **Artifact** | `loaded_source` or `preferred_artifact` — e.g. **`grpo_adapter`**, **`merged`**, **`sft_adapter`**. |
+| **Availability** | Key/value pairs from `availability` dict (which load stages succeeded). |
+**Ollama branch (local dev):** If `status.ollama.enabled && available`, the UI labels **Ollama Qwen active** and mentions `POLYGUARD_PROVIDER_PREFERENCE` order. Spaces Dockerfile sets **`POLYGUARD_ENABLE_OLLAMA=false`** by default.
+### E. Advanced strip (Task = Advanced)
+Only rendered when `taskId === "advanced"`. Two selects:
+1. **Difficulty:** `easy` \| `medium` \| `hard` — passed to reset as `difficulty`.
+2. **Environment:** every string in `catalog.sub_environments` (DDI, BANDIT_MINING, REGIMEN_RISK, PRECISION_DOSING, LONGITUDINAL_DEPRESCRIBING, WEB_SEARCH_MISSING_DATA, ALTERNATIVE_SUGGESTION, NEW_DRUG_DECOMPOSITION).
+**What each sub-environment stresses (one line each):**
+| Sub-environment | What the episode emphasizes |
+| --- | --- |
+| **DDI** | Drug–drug interaction exposure and pair risk. |
+| **BANDIT_MINING** | Policy / bandit exploration style scenario (see preset “Bandit Mining”). |
+| **REGIMEN_RISK** | Overall regimen burden and safety tradeoffs. |
+| **PRECISION_DOSING** | Dose buckets, organ-sensitive flags in observation. |
+| **LONGITUDINAL_DEPRESCRIBING** | Multi-step taper / stop sequences over time. |
+| **WEB_SEARCH_MISSING_DATA** | Rewards process fidelity for evidence-fetch actions. |
+| **ALTERNATIVE_SUGGESTION** | Substitution / alternative action types rewarded more. |
+| **NEW_DRUG_DECOMPOSITION** | Hard track: decompose novel drug string into components. |
+### F. Episode Overview — every KPI and subsection
+**KPI grid (always eight rows):**
+| KPI | Source |
+| --- | --- |
+| **Mode** | Literal “Agent Workbench” or “Env Explorer”. |
+| **Task** | Preset label or “Advanced”. |
+| **Difficulty** | `observation.deterministic_contract.difficulty` or `-`. |
+| **Environment** | `deterministic_contract.sub_environment` or `observation.sub_environment`. |
+| **Step Budget** | `observation.step_budget_remaining`. |
+| **Last Reward** | Active branch’s last reward (after reset, Agent clears to `-` until first step). |
+| **Patient** | `patient_summary.patient_id` or `patient_summary.id`. |
+| **Status** | Complete if `done`, else Live if observation exists, else Ready. |
+**Patient Summary `<dl>`:** First **8** keys of `observation.patient_summary` (keys humanized: underscores → spaces, title case). Typical keys include demographics, allergies, high-level clinical flags—whatever the backend puts on `PolyGuardObservation`.
+**Risk Delta `<dl>`:** First **8** entries of `observation.burden_score_summary` — burden-related scalars the env uses for reward deltas.
+### G. Candidate Actions list — each column
+Each row is one **`CandidateAction`** from `candidate_action_set`.
+| Column / concept | Meaning |
+| --- | --- |
+| **`candidate_id`** | Stable id (e.g. `cand_…`) — must match when submitting. |
+| **Action label** | Humanized `action_type` (STOP_DRUG, SUBSTITUTE_WITHIN_CLASS, …). |
+| **Third column** | `target_drug` **or** `replacement_drug` **or** `mode` — whichever is most informative. |
+| **Right column** | `estimated_safety_delta` formatted to 3 decimals, or **Blocked** if `legality_precheck === false`. |
+| **Disabled rows** | You cannot select illegal candidates; click does nothing. |
+| **Default selection** | **Agent:** first candidate in list. **Env:** first **legal** candidate that is not `KEEP_REGIMEN` and not `REQUEST_*`, else first legal non–KEEP_REGIMEN, else first in list (`defaultCandidateForMode`). |
+**Hidden fields you can mention if showing JSON elsewhere:** `dose_bucket`, `taper_days`, `monitoring_plan`, `evidence_query`, `new_drug_name`, `candidate_components`, `uncertainty_score`, `rationale_tags`, `required_monitoring`, `burden_delta`, `disease_stability_estimate`.
+### H. Action Console — every input and button
+| UI element | Effect |
+| --- | --- |
+| **Type / Mode / Target / Replacement / Dose / Uncertainty** | Read-only snapshot of the **currently selected** candidate. |
+| **Confidence** | Number input **0.001–0.999** step 0.001; sent as `confidence` on **Submit Candidate** (Agent) or embedded in WS payload (Env). |
+| **Rationale** | Free text → `rationale_brief` / rationale on the action. |
+| **Submit Candidate** (Agent) | Calls `POST /env/step_candidate` with `{ candidate_id, confidence, rationale_brief }`. API finds matching legal action and calls `env.step`. |
+| **Submit Env Step** (Env) | Same confidence/rationale + full action payload built by `buildActionPayload` → WS `step`. |
+| **Run Agent** | **Only when** `mode === "agent"` **and** observation exists **and** not `done`. Calls `POST /agents/orchestrate` with empty JSON body. **Disabled** in Env mode. |
+| **Done notice** | If `done`, shows which mode completed and `termination_reason` from `info` if present. Primary button becomes **Reset Episode** (shortcut). |
+### I. Reward Channels — every bar (exact keys)
+The UI renders **exactly these keys** in order (`REWARD_KEYS` in `App.tsx` — **14** rows):
+| # | Key | Role |
+| --- | --- | --- |
+| 1 | `total_reward` | Weighted aggregate of component scores (`aggregate_rewards` in `reward_scaling.py`). |
+| 2 | `primary_safety_legality` | Roll-up: legality, candidate alignment, anti-cheat, uncertainty calibration (`reward_router.compute_primary_reward_channels`). |
+| 3 | `primary_clinical_improvement` | Roll-up: safety delta, burden improvement, disease stability. |
+| 4 | `primary_dosing_quality` | Roll-up: dosing quality + abstention quality. |
+| 5 | `primary_process_integrity` | Roll-up: format compliance, efficiency, process fidelity, explanation grounding. |
+| 6 | `legality_score` | Action legal per safety verifier. |
+| 7 | `safety_delta_score` | Movement on severe DDI / risk proxy vs pre-step state. |
+| 8 | `burden_improvement_score` | Medication burden before vs after. |
+| 9 | `disease_stability_score` | Stability heuristic vs disruptive action types. |
+| 10 | `dosing_quality_score` | Dose-mode and bucket appropriateness. |
+| 11 | `process_fidelity_score` | Follows intended workflow for sub-environment (e.g. fetch evidence when required). |
+| 12 | `explanation_grounding_score` | Rationale present / grounded. |
+| 13 | `anti_cheat_score` | Collapses when anti-cheat triggers. |
+| 14 | `uncertainty_calibration_score` | Confidence vs uncertainty alignment. |
+**Note:** `total_reward` is row 1; rows 2–5 are **primary** channels; rows 6–14 are **exposed component** scores. Other components (`format_compliance_score`, `efficiency_score`, `candidate_alignment_score`, `abstention_quality_score`) still exist **in the backend** `RewardBreakdown` and feed primaries + total, but this UI **does not** give them their own bar rows.
+Bars show **`-`** when the value is missing (no step yet or breakdown not returned). Bar width = value × 100% with value clamped to `[0.001, 0.999]`.
+**Agent vs breakdown source:** After a step, UI prefers `info.reward_breakdown`; it may also call **`GET /env/reward_breakdown`**. **Env:** uses `info.reward_breakdown` from the WebSocket step packet; if empty, the UI clears the reward panel.
+### J. Current Medications cards
+Built from `observation.medication_table[]`. Each card:
+- **Title:** `drug` / `drug_id` / `name`.
+- **High-risk ribbon:** if `high_risk` or `is_high_risk_elderly` or Beers / warning flags.
+- **Body:** `indication` or `class_name` or `atc_class`.
+- **Meta row:** dose bucket or mg dose; taper vs `monitoring` or `route`.
+### K. Action History vs Warnings
+| Panel | Source |
+| --- | --- |
+| **Action History** | `observation.action_history` — each item shows step index and `action_type` / `candidate_id` / reward snippet. |
+| **Warnings** | `observation.warning_summary` — list of human-readable env warnings (DDIs, constraints, etc.). |
+### L. Decision / Explanation / Evidence (Agent only)
+Rendered as JSON `<pre>` blocks:
+| Title | When populated | Content origin |
+| --- | --- | --- |
+| **Decision** | Agent mode only. | **`final_action`** on the packet. For **`step_candidate`**, the API returns the standard **step** payload — **typically no `final_action` field**, so this panel may stay **empty after manual submit**. For **`orchestrate`**, **`final_action`** is the **`PolyGuardAction`** after critic (what actually hit `env.step`). |
+| **Explanation** | Agent mode only. | **`explanation`** — output of **`ExplainerAgent`** after the step (`orchestrate` returns it). Usually **empty** after raw `step_candidate` unless API adds it. |
+| **Evidence** | Agent mode only. | **`evidence`** key on packet. **`orchestrate`** returns **`evidence_out`** from **`EvidenceAgent.run(state)`** (retrieval / web-fallback bundle). **`step_candidate`** does not attach orchestrator evidence — panel often **empty** on manual clicks. |
+**Demo takeaway:** Tell viewers: *“To populate Decision / Explanation / Evidence in the UI, use **Run Agent** (orchestrate). Manual **Submit Candidate** updates the env and rewards but does not replay the full multi-agent JSON into those three panels.”*
+### M. Event Log vs Q Tips
+| Feature | Behavior |
+| --- | --- |
+| **Event Log** | Prepends timestamped strings: resets, each step’s reward line, errors. **Capped** at 24 lines. Cleared when you click **Reset Episode** (handler starts with `setEvents([])` then appends) — *not* the same as mode switch clearing. |
+| **Q Tips** | 10-step overlay; does not mutate env. |
+### N. Orchestrator — every agent in order (`Run Agent`)
+When **`POST /agents/orchestrate`** runs, `Orchestrator.run_step` executes:
+| Step | Agent class | What it does (operator language) |
+| --- | --- | --- |
+| 1 | **`MedRecAgent`** | Summarizes current medication list / reconciliation view for downstream modules. Output key: `medrec`. |
+| 2 | **`EvidenceAgent`** | Retrieves **local evidence** (and optional web fallback) for missing or thin context. Shown in UI **`evidence`** when orchestrating. |
+| 3 | **`GraphSafetyAgent`** | Graph-style **DDI / duplicate therapy** style signals. Output: `graph`. |
+| 4 | **`DosingAgent`** | Flags **dose-sensitive** windows and dosing opportunities. Feeds **`dosing_active`** into supervisor. |
+| 5 | **`CandidateAgent`** | Wraps env **candidate builder** — produces the legal `CandidateAction` list. |
+| 6 | **`SupervisorAgent`** | Chooses planner **mode**: regimen vs dose vs **REVIEW** (conservative routing). |
+| 7 | **Contextual bandit** | **`ContextualBanditPolicy`** (LinUCB or Thompson sampling via `POLYGUARD_BANDIT_ALGO`) proposes **top-k** (`POLYGUARD_BANDIT_TOP_K`) candidates for the planner to consider. |
+| 8 | **`PlannerAgent`** | Calls **`PolicyProviderRouter.select_candidate`** — this is where **Transformers + Qwen + PEFT** (or Ollama, or **safety ranker fallback**) picks a **`candidate_id`** and rationale. |
+| 9 | **`CriticAgent`** | Safety veto / repair. May replace proposed action with a safer **`final_action`**. |
+| 10 | **Replan / debate** (optional) | If `coordination_mode` is `replan_on_veto` or `lightweight_debate` and critic rejects, planner may rerun on **review** candidates; `debate_rounds` increments. |
+| 11 | **`PolyGuardEnv.step`** | Commits **`final_action`**, returns `observation`, `reward`, `done`, `info`. |
+| 12 | **Bandit `update`** | If the chosen candidate was in the bandit pool, **updates bandit statistics with the reward** (learning signal for next orchestrate). |
+| 13 | **`ExplainerAgent`** | Builds **`explanation`** object for audit / UI. |
+**Environment variables (mention for power users):**
+| Variable | Effect |
+| --- | --- |
+| **`POLYGUARD_POLICY_STACK`** | `llm+bandit` (default): planner sees **bandit-shortlisted** candidates. `llm-only`: all supervisor-filtered candidates. `bandit-only`: **no LLM** — first bandit pick with fixed rationale. |
+| **`POLYGUARD_BANDIT_*`** | Algorithm, alpha, epsilon, seed, top-k. |
+| **`POLYGUARD_PROVIDER_PREFERENCE`** | e.g. `transformers` vs `ollama` order. |
+| **`POLYGUARD_ENABLE_ACTIVE_MODEL`** | Must be true on Space for bundle path; **`POLYGUARD_HF_MODEL`** sets base id for adapters. |
+### O. Qwen and fallbacks (planner path)
+`PolicyProviderRouter` (`app/models/policy/provider_runtime.py`):
+1. Builds a **JSON instruction** listing candidates and asks for `candidate_id=…; rationale=…`.
+2. Tries providers in **`POLYGUARD_PROVIDER_PREFERENCE`** (default **Transformers** on Space).
+3. Parses model text for a legal `candidate_id`; on failure uses **`safety_ranker`** deterministic ordering.
+**So:** Even without Qwen load, **Run Agent** still completes using **ranker / bandit** — mention that if Model Truth is red.
+### P. Full observation contract (API / types)
+The TypeScript type `EnvObservation` (`lib/types.ts`) lists fields the backend **may** send. The main workbench **highlights** patient summary, medication table, candidates, burden summary, action history, warnings, step budget, and sub-environment. **Not all fields get their own panel** — if you open browser DevTools → Network → `reset` / `step` response, you can narrate extras:
+| Field | Typical use |
+| --- | --- |
+| `comorbidity_summary` | Comorbidity list for the patient. |
+| `organ_function_summary` | eGFR / hepatic flags for dosing scenarios. |
+| `labs_vitals_summary` | Labs relevant to risk scoring. |
+| `graph_safety_summary` | Aggregated graph / DDI context. |
+| `precision_dosing_flags` | Tags when sub-environment is dosing-heavy. |
+| `unresolved_conflicts` | Specialist conflict strings. |
+| `abstention_indicators` | When the env suggests review / abstain. |
+| `deterministic_contract` | Difficulty + sub-environment + scenario id contract for reproducibility. |
+### Q. Q Tips — copy for each slide (matches `GUIDE_STEPS`)
+| # | Title | Body (read aloud or paraphrase) |
+| --- | --- | --- |
+| 1 | Start here | PolyGuard is an interactive OpenEnv workbench; top bar picks runtime, scenario, reset. |
+| 2 | Choose the runtime | Agent Workbench = REST API + reward breakdown + Qwen path; Env Explorer = WebSocket to OpenEnv. |
+| 3 | Pick a scenario | Presets load real patient/regimen state from backend. |
+| 4 | Check the model truth | `/policy/model_status`; Qwen only “verified” when API says adapters live. |
+| 5 | Read the episode state | Task, patient, step budget, reward, risk delta from latest env response. |
+| 6 | Review legal actions | Candidate rows = legal moves; inspect safety delta and mode. |
+| 7 | Submit or ask the agent | Submit Candidate vs Run Agent; check model panel before claiming LLM. |
+| 8 | Inspect reward channels | Real scorer output per channel; empty = no step yet. |
+| 9 | Track regimen changes | Medication cards + history + warnings = not canned. |
+| 10 | Follow the run | Event log shows resets, steps, rewards, errors plainly. |
+---
+## Agent Workbench vs Env Explorer (say this exactly on camera)
+| | **Agent Workbench** | **Env Explorer** |
+| --- | --- | --- |
+| **Reset** | `POST /env/reset` with task preset (e.g. `{ "task_id": "easy_screening" }`) via product API. | WebSocket `reset` message to OpenEnv **`/ws`** with `{ difficulty, sub_environment }`. |
+| **Submit** | `POST /env/step_candidate` — product API resolves `candidate_id` + your confidence + rationale into a full action and steps the **same** in-process `PolyGuardEnv`. | WebSocket `step` — payload built from selected candidate; talks **directly** to OpenEnv service. |
+| **Run Agent** | **`POST /agents/orchestrate`** — runs the full **orchestrator** (med rec, evidence, graph, dosing, candidates, supervisor, bandit, **planner/LLM**, critic, env step, explainer). | Button **disabled** — there is no orchestrator path over raw WS-only mode in this UI. |
+| **Decision / Explanation / Evidence panels** | **Populated** after orchestrate or after steps that echo `final_action` / `explanation` / `evidence` (orchestrate returns rich `evidence` from `EvidenceAgent` pipeline). | **Always empty** in the UI by design — those panels are `null` in Env mode (`App.tsx` only passes agent-mode state to DetailPanels). |
+| **Reward breakdown** | From step `info.reward_breakdown` or fallback `GET /env/reward_breakdown`. | From WS step packet `info.reward_breakdown` when present. |
+| **Switching mode** | Clears the **Event Log** and resets the other mode’s transient state — mention that so viewers don’t think it’s a bug. | Same. |
+**One-liner for judges:** *“Agent Workbench is the full product API plus optional LLM-orchestrated policy; Env Explorer is the raw OpenEnv WebSocket contract for the same underlying environment.”*
+---
+## Reward channels — what they mean and how they’re computed (talk track)
+Rewards are **verifier-backed**, **bounded** to roughly **`[0.001, 0.999]`** (3 decimal places in UI).
+### Four primary channels (high level)
+These are **averages of component groups** (`app/env/reward_router.py` — `compute_primary_reward_channels`):
+1. **`primary_safety_legality`** — legality, candidate id alignment, anti-cheat, uncertainty calibration.
+2. **`primary_clinical_improvement`** — safety delta vs severe pairs, burden improvement, disease stability.
+3. **`primary_dosing_quality`** — dosing quality + abstention (e.g. appropriate review requests under uncertainty).
+4. **`primary_process_integrity`** ��� format compliance, efficiency (step budget), process fidelity, explanation grounding.
+### Components (examples — `compute_reward_breakdown`)
+The environment builds scores such as:
+- **`legality_score`**: high if the action is legal per safety report.
+- **`safety_delta_score` / `burden_improvement_score`**: from **before/after** burden and severe DDI pair counts (`_delta_to_reward`).
+- **`anti_cheat_score`**: collapses if anti-cheat flags the trajectory.
+- **`uncertainty_calibration_score`**: penalizes overconfidence vs modeled uncertainty.
+- **Sub-environment tweaks**: e.g. `WEB_SEARCH_MISSING_DATA` boosts process fidelity when using `FETCH_EXTERNAL_EVIDENCE`; `NEW_DRUG_DECOMPOSITION` rewards decomposition actions with components.
+Then components are **scaled/clamped**, **primary channels** recomputed, and **`total_reward`** = weighted aggregate (`aggregate_rewards`).
+**Demo line:** *“Bars update only after a real step — empty fields mean we haven’t stepped yet, not fake filler.”*
+---
+## Built-in **Q Tips** (on-screen tour)
+Click **Q Tips** in the top bar. The app cycles **10 slides** (`App.tsx` → `GUIDE_STEPS`):
+1. Start here — top bar, scenarios, reset.
+2. Choose the runtime — Agent vs Env.
+3. Pick a scenario — presets load real patient/regimen state.
+4. Check the model truth — `/policy/model_status`.
+5. Read episode state — overview + patient summary.
+6. Review legal actions — candidates.
+7. Submit or ask the agent — Submit vs Run Agent.
+8. Inspect reward channels.
+9. Medications + history/warnings.
+10. Event log — errors and connectivity.
+**Recording tip:** Record **Q Tips** once in full voiceover (“I’ll use the in-app tour…”) then dismiss and do the live walkthrough below.
+---
+## Shot-by-shot recording script
+### Scene 0 — Intro (30–45 s)
+**Action:** Scroll slightly so hero + top bar are visible.
+**Say:** *“This is PolyGuard on Hugging Face Spaces: an OpenEnv workbench for polypharmacy safety. The backend runs a real `PolyGuardEnv` with verifiable rewards; the UI can drive it through the product API or raw OpenEnv WebSockets.”*
+---
+### Scene 1 — Model Truth (45–60 s)
+**Action:** Stay on **Agent Workbench**. Click nothing yet; point at **Model Truth**.
+**Say:** *“Model Truth is live from `/policy/model_status`. Here we see the base model—typically Qwen 2.5 0.5B Instruct—which artifact is loaded—often the GRPO adapter—and the run id. On Spaces, weights are under `/app/checkpoints/active` after the bundle installer runs.”*
+**If panel shows unavailable:** *“Cold start or CPU load can delay the bundle; the environment still works for manual candidate submission; Run Agent may fall back to non-LLM routing depending on config.”*
+---
+### Scene 2 — Easy task, manual submit (Agent) (90–120 s)
+**Action:** Task → **Easy Screening** (DDI, easy). **Reset Episode.**
+**Say:** *“Easy Screening fixes difficulty easy and sub-environment DDI—drug–drug interaction screening.”*
+**Action:** Pan **Episode Overview** — read **Patient Summary** and **Risk Delta** aloud briefly.
+**Say:** *“This patient block and risk delta come straight from the observation object.”*
+**Action:** **Candidate Actions** — click 2–3 rows; show **Blocked** vs legal. Select a **legal** row.
+**Say:** *“Candidates are legal moves from the env; illegal rows are disabled .”*
+**Action:** **Action Console** — tweak **Confidence** and **Rationale** slightly. Click **Submit Candidate**.
+**Say:** *“Submit Candidate hits `/env/step_candidate` with my chosen legal action, confidence, and rationale.”*
+**Action:** After response, pause on **Reward Channels** and **Last Reward** in overview.
+**Say:** *“These bars are the verifier breakdown; total reward is the scalar GRPO-style signal we train on.”*
+**Action:** **Action History** — show one new line. **Event Log** — show the new reward line.
+**Say:** *“History and event log give an audit trail—not a canned animation.”*
+---
+### Scene 3 — Run Agent (orchestrator + LLM path) (90–120 s)
+**Prerequisite:** Prefer recording when Model Truth shows **enabled** and **active** with Qwen artifacts.
+**Action:** **Reset Episode** again (same or different task). Click **Run Agent**. Wait for completion.
+**Say:** *“Run Agent calls `/agents/orchestrate`. That runs med reconciliation, evidence retrieval, graph safety, dosing hints, candidate generation, supervisor mode, a contextual bandit shortlist, then the planner—here that’s where the loaded Qwen policy can choose among candidates—the critic veto, environment step, and explainer.”*
+**Action:** Scroll to **Decision**, **Explanation**, **Evidence** JSON panels.
+**Say:** *“These three panels are only populated in Agent Workbench mode. Env Explorer deliberately hides them because the raw WebSocket client doesn’t run the full orchestrator response bundle.”*
+**Action:** Point at **Evidence** — mention structured retriever output vs empty object if task didn’t fetch.
+**Say:** *“Evidence is whatever the evidence agent produced for this state—grounding for clinician trust.”*
+---
+### Scene 4 — Env Explorer contrast (60–90 s)
+**Action:** Click **Env Explorer**. **Reset Episode** (same task: Easy Screening).
+**Say:** *“Now the UI resets over WebSocket `reset` to the OpenEnv service on port 8100—same scenarios, different transport.”*
+**Action:** Select a candidate, **Submit Env Step**.
+**Say:** *“Submit Env Step sends a WebSocket `step` with the action payload—no `/agents/orchestrate`.”*
+**Action:** Scroll to **Decision / Explanation / Evidence** — show they stay **empty** or “No data.”
+**Say:** *“This is intentional: I’m proving the low-level env API, not the full agent stack.”*
+**Action:** **Event Log** — note new lines tagged from env step.
+---
+### Scene 5 — Task variety (2–3 minutes, optional montage)
+For each preset, do **Reset** + **one** legal **Submit** (Agent mode is enough):
+| Task | Difficulty | Sub-environment | What to say |
+| --- | --- | --- | --- |
+| **Easy Screening** | easy | DDI | “Fast DDI-focused episode.” |
+| **Budgeted Screening** | medium | REGIMEN_RISK | “More steps, regimen-risk tradeoffs.” |
+| **Complex Tradeoff** | hard | REGIMEN_RISK | “Harder patient draw, tighter budgets.” |
+| **Bandit Mining** | hard | BANDIT_MINING | “Bandit-style policy mining scenario.” |
+**Action:** Switch Task to **Advanced**. Set e.g. **hard** + **PRECISION_DOSING**. Reset.
+**Say:** *“Advanced exposes every sub-environment enum the backend supports—precision dosing, deprescribing, web-search missing data, alternatives, new-drug decomposition.”*
+---
+### Scene 6 — Medications + warnings (45 s)
+**Action:** After any step with regimen change, show **Current Medications** cards (high-risk styling).
+**Say:** *“Cards mirror `medication_table` from the observation; warnings list is explicit env output.”*
+---
+### Scene 7 — Closing (30 s)
+**Say:** *“That’s the full loop: HF Space hosts OpenEnv + API, Qwen adapters live under checkpoints/active, Agent Workbench demonstrates orchestrated LLM decisions with evidence and explanations, and Env Explorer proves the same environment over raw WebSockets for OpenEnv compatibility.”*
+---
+## OBS / QuickTime checklist
+- [ ] Capture **system audio** if you add voiceover in post; or record mic in OBS.
+- [ ] **1920×1080**, 30 fps (or 60 if you want smooth cursor).
+- [ ] **2 s pause** after each button click before scrolling away.
+- [ ] If Space sleeps, **mouse jiggle** or refresh before recording.
+- [ ] Export **MP4 H.264** for YouTube / HF dataset card.
+---
+## Quick troubleshooting on camera (if something breaks)
+| Symptom | What to say / do |
+| --- | --- |
+| WebSocket errors in Event Log | “Env service reconnect—refresh page; WS URL is derived from the Space origin.” |
+| Run Agent fails | “Check Model Truth—model may still be downloading or Ollama disabled on Space.” |
+| Reward bars all dash | “No step yet—reset and submit once.” |
+| Candidates empty | “Reset episode—env didn’t initialize.” |
+---
+## Related docs
+- [UI overview](ui.md)
+- [Deployment](deployment.md)
+- [Environment design](environment_design.md)
+- [Reward design](reward_design.md)
+- [Architecture](architecture.md)

docs/deployment.md CHANGED Viewed

@@ -30,7 +30,7 @@ uv run python -c "from huggingface_hub import HfApi; print(HfApi().space_info('$
 openenv validate --url "https://thejackbright-polyguard-openenv.hf.space"
 ```
-`scripts/deploy_space_api.py` is the preferred deployment path for this repo because it uploads a valid Docker Space README frontmatter bundle through `huggingface_hub.HfApi`. `scripts/deploy_space.sh` remains available, but the current OpenEnv CLI path may fail with invalid generated `colorFrom`/`colorTo` metadata.
 Useful `scripts/deploy_space.sh` flags:

 openenv validate --url "https://thejackbright-polyguard-openenv.hf.space"
 ```
+`scripts/deploy_space_api.py` is the preferred deployment path for this repo because it uploads a valid Docker Space README frontmatter bundle through `huggingface_hub.HfApi`. `scripts/deploy_space.sh` remains available, but the current OpenEnv CLI path may fail with invalid generated `colorFrom`/`colorTo` metadata. Pushing to GitHub alone does not change the Hub Space unless that Space is configured to rebuild from that repo and branch; run the deploy script (with `HF_TOKEN`) after UI or API changes so the Docker image rebuilds. See `docker/space/README.md` for Dockerfile path, monorepo layout, and cache/reboot notes.
 Useful `scripts/deploy_space.sh` flags:

docs/final_submission_audit.md CHANGED Viewed

@@ -23,7 +23,7 @@ The only known judge-facing blocker is external storytelling: the README blog UR
 | Product/demo | Implemented | FastAPI product API, React/Vite workbench, policy lab, training monitor, replay, dosing, and safety views. |
 | Results and plots | Implemented | Tracked `docs/results/*.json` and PNG plots, including SFT baseline sweep evidence and top-level environment-backed GRPO evidence. |
 | HF Space deployment | Implemented | Public Space is running on CPU basic, Space metadata is available, and tracked `docs/results/hf_space_verification.json` reports OpenEnv validation passed. |
-| Colab notebook | Implemented | README links `notebooks/09_training_loop.ipynb` through Colab. |
 | Story artifact | Pending external publication | `docs/hf_blog_draft.md` exists, but the README blog URL returns 404 until published. |
 | Full public per-model GRPO sweep | Not claimed | Current public/tracked evidence is a 3-model SFT-baseline sweep plus a top-level GRPO run. Private training artifact repos require auth and must be mirrored before being used as public evidence. |

 | Product/demo | Implemented | FastAPI product API, React/Vite workbench, policy lab, training monitor, replay, dosing, and safety views. |
 | Results and plots | Implemented | Tracked `docs/results/*.json` and PNG plots, including SFT baseline sweep evidence and top-level environment-backed GRPO evidence. |
 | HF Space deployment | Implemented | Public Space is running on CPU basic, Space metadata is available, and tracked `docs/results/hf_space_verification.json` reports OpenEnv validation passed. |
+| Colab notebook | Implemented | README Colab URL targets `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`; `notebooks/09_training_loop.ipynb` is the modular alternative. |
 | Story artifact | Pending external publication | `docs/hf_blog_draft.md` exists, but the README blog URL returns 404 until published. |
 | Full public per-model GRPO sweep | Not claimed | Current public/tracked evidence is a 3-model SFT-baseline sweep plus a top-level GRPO run. Private training artifact repos require auth and must be mirrored before being used as public evidence. |

docs/idea_document_traceability.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Idea document and participant guide — implementation map
+This ties your polypharmacy / OpenEnv design notes and typical hackathon submission requirements to files in this repository.
+## Submission narrative (required bullets)
+| Requirement | Status | Where |
+| --- | --- | --- |
+| Problem statement | Documented + implemented | Root [`README.md`](../../README.md), `polyguard-rl/README.md`, `docs/safety.md` |
+| Environment (agent operates here) | Implemented | `PolyGuardEnv`, `app/env/env_core.py`, `app/env/fastapi_app.py`, `openenv.yaml`, `server/app.py` |
+| Agent capabilities | Implemented | `app/agents/`, `docs/agents.md` |
+| Tasks | Implemented | Scenario JSONL under `data/scenarios/`, presets in `app/env/catalog.py` |
+| Reward / evaluation logic | Implemented | `app/env/reward_router.py`, `app/env/verifier.py`, `configs/rewards.yaml`, `docs/reward_design.md`, `docs/evaluation.md` |
+| Post-training / self-improvement | Implemented | `scripts/train_sft_trl.py`, `scripts/train_grpo_trl.py`, `app/training/grpo_trl.py`, `docs/training.md` |
+## Your “Plan” sections vs codebase
+| Plan item | Status | Notes |
+| --- | --- | --- |
+| OpenEnv `reset` / `step` / `state`, timeouts, safety | Done | `env_core.py`, `fastapi_app.py`, max steps per sub-env, `anti_cheat.py` |
+| Local + remote execution | Done | Local FastAPI + `docker-compose.yml`, HF Space via `scripts/deploy_space_api.py`, `Dockerfile.space`, `docker/space/` |
+| Specific envs: DDI, bandit mining, regimen risk | Done | `SubEnvironment` enum, transitions in `app/env/transition.py` |
+| Precision dosing, deprescribing, web search, alternatives, new drug (hard) | Done | Matching enum values + scenario tracks; “new drug” is `NEW_DRUG_DECOMPOSITION` |
+| Multiple reward functions + anti-hacking | Done | 13 components → 4 channels; anti-cheat and tests in `tests/` |
+| TRL + Unsloth, metrics, generations | Done | TRL scripts + reports; Unsloth optional (`--use-unsloth`); `app/training/metrics.py` |
+| Post-training + inference | Done | merge + `test_inference_postsave.py`, active manifest / API path |
+| Product / Space demo, UI | Done | FastAPI `app/api/`, React `app/ui/frontend/`, Space deployment scripts |
+| Benchmarks + plots + sample generations | Done | `scripts/evaluate_*.py`, `docs/results/`, `scripts/generate_submission_evidence.py` |
+| Deploy: OpenEnv, container, HF Space | Done | See `docs/deployment.md` |
+| Easy / medium / hard | Done | `scenarios_easy.jsonl`, `scenarios_medium.jsonl`, `scenarios_hard.jsonl` |
+## Themes (world modeling, multi-agent, self-improvement)
+| Theme | Status | Notes |
+| --- | --- | --- |
+| World modeling / professional tasks | Primary fit | Stateful regimen, verifiers, tool-like actions |
+| Multi-agent | Partial | Supervisor/orchestrator and policy stack (`app/agents/orchestrator.py`, `supervisor_agent.py`); not a separate multi-player env |
+| Self-improving systems | Via GRPO | Environment-backed RLVR-style training, not online self-play |
+## “What to submit” checklist
+| Deliverable | Status |
+| --- | --- |
+| GitHub repo + URLs in README | Root + `polyguard-rl/README.md` |
+| HF Space URL | In README |
+| Points from doc | `docs/participant_guide_traceability.md`, this file |
+| Colab | `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`, `notebooks/09_training_loop.ipynb` |
+| Video or blog | README links blog; **publish** draft in `docs/hf_blog_draft.md` or swap URL |
+## Future ideas from your notes (not claimed as done)
+- Medicine images / barcodes: listed under Future Work in README.
+- Web search agents: sub-env `WEB_SEARCH_MISSING_DATA` exists; “full web agent product” is beyond current scope.
+## Fresh clone reminder
+Generated data and many `outputs/` reports are produced by scripts (see `scripts/bootstrap_data.py`, `scripts/acceptance_gate.py` `REQUIRED_ARTIFACTS`). Run the bootstrap/build pipeline before expecting strict `POLYGUARD_ENFORCE_SUBMISSION_LINKS=true` acceptance to pass on an empty workspace.

docs/participant_guide_traceability.md CHANGED Viewed

@@ -18,7 +18,7 @@ This audit maps the hackathon guide, FAQ, and judging criteria to concrete PolyG
 | Export adapters safely and test inference | `scripts/merge_adapters_safe.py` and `scripts/test_inference_postsave.py` |
 | Show results with plots and reports | `docs/results/*.json`, tracked reward/process/legal/success/sweep plot PNGs, a 3-model SFT-baseline sweep, and a top-level environment-backed GRPO run |
 | Host the environment on Hugging Face Spaces | `scripts/deploy_space_api.py`, `scripts/deploy_space.sh`, Docker runtime, `docs/results/hf_space_verification.json`, and live Space health/metadata checks |
-| Include a Colab training notebook | `notebooks/09_training_loop.ipynb` |
 | Link story material from README | README links the selected Hugging Face blog/story URL; publish it before final hand-in if the external URL is still 404 |
 ## Current Evidence Status

 | Export adapters safely and test inference | `scripts/merge_adapters_safe.py` and `scripts/test_inference_postsave.py` |
 | Show results with plots and reports | `docs/results/*.json`, tracked reward/process/legal/success/sweep plot PNGs, a 3-model SFT-baseline sweep, and a top-level environment-backed GRPO run |
 | Host the environment on Hugging Face Spaces | `scripts/deploy_space_api.py`, `scripts/deploy_space.sh`, Docker runtime, `docs/results/hf_space_verification.json`, and live Space health/metadata checks |
+| Include a Colab training notebook | [`PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`](../PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (README Colab link) and [`notebooks/09_training_loop.ipynb`](../notebooks/09_training_loop.ipynb) (modular walkthrough) |
 | Link story material from README | README links the selected Hugging Face blog/story URL; publish it before final hand-in if the external URL is still 404 |
 ## Current Evidence Status

docs/ui.md CHANGED Viewed

@@ -1,3 +1,7 @@
 # UI
 React/Vite workbench provides operator-oriented pages for patient workbench, replay, policy lab, dosing, safety, and training monitoring.

 # UI
 React/Vite workbench provides operator-oriented pages for patient workbench, replay, policy lab, dosing, safety, and training monitoring.
+For a **panel-by-panel and agent-by-agent** reference (Model Truth, Agent vs Env, Qwen, orchestrator stack, every reward bar, Q Tips copy), see [DEMO_RECORDING_SCRIPT.md](DEMO_RECORDING_SCRIPT.md) § *Feature encyclopedia*.
+The **FDA alternatives** panel (last block in the Home workbench, full width) calls `POST /tools/medication_alternatives` (up to 7 rows, scrollable list). Optional env: `POLYGUARD_OPENFDA_API_KEY`, `POLYGUARD_MED_TOOL_URL` + `POLYGUARD_MED_TOOL_TOKEN` (server-side only).

scripts/deploy_space_api.py CHANGED Viewed

@@ -8,8 +8,9 @@ still shipping the same OpenEnv/FastAPI runtime.
 from __future__ import annotations
 import argparse
-from pathlib import Path
 import shutil
 from huggingface_hub import HfApi
@@ -17,6 +18,18 @@ from huggingface_hub import HfApi
 ROOT = Path(__file__).resolve().parents[1]
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Deploy PolyGuard OpenEnv Space with valid HF metadata.")
     parser.add_argument("--repo-id", default="TheJackBright/polyguard-openenv")
@@ -64,7 +77,8 @@ def build_bundle(bundle_dir: Path) -> None:
                 "colorFrom: blue",
                 "colorTo: green",
                 "sdk: docker",
-                "app_port: 8100",
                 "pinned: false",
                 "---",
                 "",
@@ -83,6 +97,9 @@ def main() -> None:
         print(f"bundle_dir={bundle_dir}")
         return
     api = HfApi()
     api.create_repo(
         repo_id=args.repo_id,
@@ -95,7 +112,7 @@ def main() -> None:
         repo_id=args.repo_id,
         repo_type="space",
         folder_path=str(bundle_dir),
-        commit_message="Deploy PolyGuard OpenEnv Space",
         ignore_patterns=[
             ".git/*",
             ".venv/*",
@@ -109,6 +126,11 @@ def main() -> None:
     print(f"space_url=https://huggingface.co/spaces/{args.repo_id}")
     print(f"runtime_url=https://{args.repo_id.replace('/', '-').lower()}.hf.space")
     print(f"bundle_dir={bundle_dir}")
 if __name__ == "__main__":

 from __future__ import annotations
 import argparse
 import shutil
+import subprocess
+from pathlib import Path
 from huggingface_hub import HfApi
 ROOT = Path(__file__).resolve().parents[1]
+def _git_revision() -> str:
+    try:
+        return subprocess.check_output(
+            ["git", "rev-parse", "--short", "HEAD"],
+            cwd=str(ROOT),
+            stderr=subprocess.DEVNULL,
+            text=True,
+        ).strip()
+    except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+        return "unknown"
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Deploy PolyGuard OpenEnv Space with valid HF metadata.")
     parser.add_argument("--repo-id", default="TheJackBright/polyguard-openenv")
                 "colorFrom: blue",
                 "colorTo: green",
                 "sdk: docker",
+                # Must match nginx / EXPOSE in Dockerfile (Space UI + /api proxy); 8100/8200 are loopback-only.
+                "app_port: 7860",
                 "pinned: false",
                 "---",
                 "",
         print(f"bundle_dir={bundle_dir}")
         return
+    rev = _git_revision()
+    commit_message = f"Deploy PolyGuard OpenEnv Space (polyguard-rl @ {rev})"
     api = HfApi()
     api.create_repo(
         repo_id=args.repo_id,
         repo_id=args.repo_id,
         repo_type="space",
         folder_path=str(bundle_dir),
+        commit_message=commit_message,
         ignore_patterns=[
             ".git/*",
             ".venv/*",
     print(f"space_url=https://huggingface.co/spaces/{args.repo_id}")
     print(f"runtime_url=https://{args.repo_id.replace('/', '-').lower()}.hf.space")
     print(f"bundle_dir={bundle_dir}")
+    print(f"deployed_src_revision={rev}")
+    print(
+        "If the live UI still looks old: open the Space → Settings → Factory reboot, "
+        "or hard-refresh the browser (Vite hashes usually bust cache after rebuild)."
+    )
 if __name__ == "__main__":

scripts/install_hf_active_bundle.py CHANGED Viewed

@@ -77,13 +77,24 @@ def main() -> None:
     allow = f"{args.bundle_path.strip('/')}/**"
     print(f"Downloading snapshot of {args.repo_id}@{args.revision} (pattern {allow}) …", flush=True)
-    snapshot_download(
-        repo_id=args.repo_id,
-        repo_type="model",
-        revision=args.revision,
-        local_dir=str(snap_root),
-        allow_patterns=[allow],
-    )
     bundle_root = snap_root / args.bundle_path
     ckpt_src = bundle_root / "checkpoints"

     allow = f"{args.bundle_path.strip('/')}/**"
     print(f"Downloading snapshot of {args.repo_id}@{args.revision} (pattern {allow}) …", flush=True)
+    try:
+        snapshot_download(
+            repo_id=args.repo_id,
+            repo_type="model",
+            revision=args.revision,
+            local_dir=str(snap_root),
+            allow_patterns=[allow],
+        )
+    except Exception as exc:
+        err = f"{type(exc).__name__}: {exc}"
+        hint = (
+            "\n[install_hf_active_bundle] Hub returned an error (401/404 often means the artifact repo is private or gated).\n"
+            "  • Hugging Face Space: Space Settings → Secrets → add HF_TOKEN (read access to that model repo).\n"
+            "  • Or change the repo to public / accept the license on the model card while logged in.\n"
+            "  • Without a successful download, POLYGUARD falls back to heuristics / ranker (no local GRPO weights).\n"
+        )
+        print(f"{hint}  • Raw error: {err}\n", flush=True)
+        raise SystemExit(1) from exc
     bundle_root = snap_root / args.bundle_path
     ckpt_src = bundle_root / "checkpoints"

submission_bundle/README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# PolyGuard Submission Bundle
+This folder contains push-friendly training evidence from the Hugging Face runs.
+Included:
+- `grpo_training_cycle/`: GRPO run plots, reports, and docs result JSONs.
+- `sft_baseline/`: SFT baseline sweep plots, reports, and docs result JSONs.
+- `local_results/`: Current local result mirrors from `outputs/plots`, `outputs/reports`, and `docs/results`.
+- `notebooks/`: Project notebooks, including `09_training_loop.ipynb`.
+Not included:
+- Model weights, adapters, checkpoints, optimizer states, or tokenizer/model binary payloads.
+- These were intentionally removed after download cancellation so the repository can be pushed without large model artifacts.

submission_bundle/grpo_training_cycle/docs_results/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+# Result Artifacts
+These tracked files mirror the latest local smoke/evaluation artifacts so the README can show stable evidence even though `outputs/` and `checkpoints/` are intentionally git-ignored.
+Current status:
+- OpenEnv structure/runtime validation passes locally.
+- Test suite passes locally.
+- Frontend production build passes locally.
+- SFT and GRPO artifacts in this folder are non-fallback TRL Transformers evidence from a tiny local compliance run.
+- `postsave_inference.json` loads the merged artifact rather than the fallback policy.
+- `improvement_report.json` shows positive average-reward improvement against the no-change baseline.
+- `hf_space_verification.json` records a live Hugging Face Space validation pass.
+For a stronger final pitch, replace these artifacts after a larger Colab/HF GPU run:
+- `sft_trl_run.json`
+- `grpo_trl_run.json`
+- `postsave_inference.json`
+- `improvement_report.json`
+- all plot PNGs
+- `hf_space_verification.json`

submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "missing_files": [],
+  "missing_artifacts": [],
+  "missing_readme_markers": [],
+  "missing_readme_links": [],
+  "strict_submission_links": true,
+  "missing_submission_env": [],
+  "strict_submission_failures": [],
+  "submission_ready": true,
+  "status": "ok"
+}

submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png ADDED Viewed

submission_bundle/grpo_training_cycle/docs_results/avg_reward.png ADDED Viewed

submission_bundle/grpo_training_cycle/docs_results/baselines.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "no_change": {
+    "mode": "REGIMEN_OPT",
+    "action_type": "KEEP_REGIMEN",
+    "target_drug": null,
+    "replacement_drug": null,
+    "dose_bucket": "NA",
+    "taper_days": null,
+    "monitoring_plan": null,
+    "evidence_query": null,
+    "new_drug_name": null,
+    "candidate_components": [],
+    "candidate_id": "cand_01",
+    "confidence": 0.8,
+    "rationale_brief": "Baseline no-change policy."
+  },
+  "rules_only": {
+    "mode": "REGIMEN_OPT",
+    "action_type": "SUBSTITUTE_WITHIN_CLASS",
+    "target_drug": "opioid_like",
+    "replacement_drug": "non_opioid_analgesic",
+    "dose_bucket": "NA",
+    "taper_days": null,
+    "monitoring_plan": null,
+    "evidence_query": null,
+    "new_drug_name": null,
+    "candidate_components": [],
+    "candidate_id": "cand_04",
+    "confidence": 0.75,
+    "rationale_brief": "Rules-only selected top legal candidate."
+  },
+  "greedy": {
+    "mode": "REGIMEN_OPT",
+    "action_type": "SUBSTITUTE_WITHIN_CLASS",
+    "target_drug": "opioid_like",
+    "replacement_drug": "non_opioid_analgesic",
+    "dose_bucket": "NA",
+    "taper_days": null,
+    "monitoring_plan": null,
+    "evidence_query": null,
+    "new_drug_name": null,
+    "candidate_components": [],
+    "candidate_id": "cand_04",
+    "confidence": 0.72,
+    "rationale_brief": "Greedy safety/burden improvement baseline."
+  },
+  "contextual_bandit": {
+    "mode": "REGIMEN_OPT",
+    "action_type": "SUBSTITUTE_WITHIN_CLASS",
+    "target_drug": "opioid_like",
+    "replacement_drug": "non_opioid_analgesic",
+    "dose_bucket": "NA",
+    "taper_days": null,
+    "monitoring_plan": null,
+    "evidence_query": null,
+    "new_drug_name": null,
+    "candidate_components": [],
+    "candidate_id": "cand_04",
+    "confidence": 0.68,
+    "rationale_brief": "Contextual bandit selected candidate."
+  },
+  "contextual_bandit_topk": [
+    {
+      "candidate_id": "cand_09",
+      "score": 1.1532307878304324,
+      "exploration_bonus": 1.1532307878304324,
+      "algorithm": "linucb"
+    },
+    {
+      "candidate_id": "cand_10",
+      "score": 1.1489735636645433,
+      "exploration_bonus": 1.1489735636645433,
+      "algorithm": "linucb"
+    },
+    {
+      "candidate_id": "cand_08",
+      "score": 1.1447401451857973,
+      "exploration_bonus": 1.1447401451857973,
+      "algorithm": "linucb"
+    }
+  ],
+  "beam_search": {
+    "mode": "REGIMEN_OPT",
+    "action_type": "SUBSTITUTE_WITHIN_CLASS",
+    "target_drug": "opioid_like",
+    "replacement_drug": "non_opioid_analgesic",
+    "dose_bucket": "NA",
+    "taper_days": null,
+    "monitoring_plan": null,
+    "evidence_query": null,
+    "new_drug_name": null,
+    "candidate_components": [],
+    "candidate_id": "cand_04",
+    "confidence": 0.74,
+    "rationale_brief": "Beam-search(3) top candidate."
+  },
+  "baseline_policy": "no_change_candidate",
+  "episodes": 8,
+  "avg_reward": 0.747,
+  "legality_rate": 1.0,
+  "success_rate": 0.0,
+  "policy_stack_ablations": {
+    "bandit-only": {
+      "avg_reward": 0.7616666666666667,
+      "legality_rate": 1.0,
+      "steps": 3.0
+    },
+    "llm-only": {
+      "avg_reward": 0.7753333333333333,
+      "legality_rate": 1.0,
+      "steps": 3.0
+    },
+    "llm+bandit": {
+      "avg_reward": 0.7753333333333333,
+      "legality_rate": 1.0,
+      "steps": 3.0
+    }
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "offline_policy_eval": {
+    "avg_reward": 0.772833,
+    "legal_rate": 1.0,
+    "success_rate": 0.0
+  },
+  "safety_eval": {
+    "severe_violation_rate": 0.0,
+    "illegal_step_rate": 0.0
+  },
+  "dosing_eval": {
+    "target_attainment": 0.75,
+    "toxicity_avoidance": 1.0
+  },
+  "robustness_eval": {
+    "missing_labs_safety_rate": 0.666667,
+    "noisy_dose_info_safety_rate": 1.0,
+    "conflicting_meds_safety_rate": 1.0,
+    "alias_noise_safety_rate": 1.0,
+    "hidden_duplicate_detection_rate": 1.0,
+    "wrong_candidate_id_resilience": 1.0,
+    "stale_evidence_safety_rate": 1.0,
+    "delayed_ade_manifestation_safety_rate": 1.0
+  },
+  "calibration_eval": {
+    "ece_proxy": 0.08625
+  },
+  "abstention_eval": {
+    "appropriate_abstention_rate": 0.0
+  },
+  "process_eval": {
+    "process_fidelity": 0.92,
+    "avg_invalid_actions": 0.333333
+  },
+  "subgroup_eval": {
+    "renal_compromise": {
+      "avg_reward": 0.774,
+      "legal_rate": 1.0
+    },
+    "hepatic_compromise": {
+      "avg_reward": 0.779333,
+      "legal_rate": 1.0
+    },
+    "frail": {
+      "avg_reward": 0.781667,
+      "legal_rate": 1.0
+    }
+  },
+  "explainability_eval": {
+    "grounding_rate": 0.8
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "offline_policy_eval": {
+    "avg_reward": 0.772833,
+    "legal_rate": 1.0,
+    "success_rate": 0.0
+  },
+  "safety_eval": {
+    "severe_violation_rate": 0.0,
+    "illegal_step_rate": 0.0
+  },
+  "dosing_eval": {
+    "target_attainment": 0.75,
+    "toxicity_avoidance": 1.0
+  },
+  "robustness_eval": {
+    "missing_labs_safety_rate": 0.666667,
+    "noisy_dose_info_safety_rate": 1.0,
+    "conflicting_meds_safety_rate": 1.0,
+    "alias_noise_safety_rate": 1.0,
+    "hidden_duplicate_detection_rate": 1.0,
+    "wrong_candidate_id_resilience": 1.0,
+    "stale_evidence_safety_rate": 1.0,
+    "delayed_ade_manifestation_safety_rate": 1.0
+  },
+  "calibration_eval": {
+    "ece_proxy": 0.08625
+  },
+  "abstention_eval": {
+    "appropriate_abstention_rate": 0.0
+  },
+  "process_eval": {
+    "process_fidelity": 0.92,
+    "avg_invalid_actions": 0.333333
+  },
+  "subgroup_eval": {
+    "renal_compromise": {
+      "avg_reward": 0.774,
+      "legal_rate": 1.0
+    },
+    "hepatic_compromise": {
+      "avg_reward": 0.779333,
+      "legal_rate": 1.0
+    },
+    "frail": {
+      "avg_reward": 0.781667,
+      "legal_rate": 1.0
+    }
+  },
+  "explainability_eval": {
+    "grounding_rate": 0.8
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/dose_train.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "dataset_size": 120.0,
+  "status": "trained",
+  "train_mae": 0.0025,
+  "model_path": "outputs/models/dose_model.pkl"
+}

submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "avg_reward": 0.7785555555555557,
+  "legality_rate": 1.0,
+  "severe_violation_rate": 0.0,
+  "abstention_rate": 0.0,
+  "avg_episode_length": 2.0,
+  "success_rate": 0.0,
+  "avg_burden_delta": 0.0,
+  "avg_safety_delta": 0.5,
+  "avg_dosing_quality": 0.75,
+  "avg_process_fidelity": 0.9200000000000002,
+  "exploit_detection_count": 3.0,
+  "reward_columns": {
+    "format_compliance_score": 0.999,
+    "candidate_alignment_score": 0.999,
+    "legality_score": 0.999,
+    "safety_delta_score": 0.5,
+    "burden_improvement_score": 0.5,
+    "disease_stability_score": 0.9000000000000001,
+    "dosing_quality_score": 0.75,
+    "abstention_quality_score": 0.56,
+    "efficiency_score": 0.77,
+    "process_fidelity_score": 0.9200000000000002,
+    "explanation_grounding_score": 0.7999999999999999,
+    "anti_cheat_score": 0.6663333333333333,
+    "uncertainty_calibration_score": 0.87
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "frontier_models": [
+    "qwen2.5:7b-instruct",
+    "qwen2.5:14b-instruct"
+  ],
+  "deployment_mode": "hf_or_vllm_ready",
+  "notes": "Baseline complete; ready for larger model sweep."
+}

submission_bundle/grpo_training_cycle/docs_results/graph_train.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "num_samples": 180,
+  "status": "trained",
+  "model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl"
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json ADDED Viewed

	@@ -0,0 +1,149 @@

+{
+  "status": "ok",
+  "ablations": {
+    "bandit_only": {
+      "avg_reward": 0.779625,
+      "legality_rate": 1.0,
+      "severe_violation_rate": 0.0,
+      "abstention_rate": 0.0,
+      "avg_episode_length": 2.8125,
+      "success_rate": 0.0,
+      "avg_burden_delta": 0.0,
+      "avg_safety_delta": 0.483125,
+      "avg_dosing_quality": 0.75,
+      "avg_process_fidelity": 0.9056250000000008,
+      "exploit_detection_count": 2.0,
+      "timeout_rate": 0.0,
+      "failure_visible_rate": 0.0625,
+      "avg_invalid_actions": 0.0625,
+      "reward_columns": {
+        "format_compliance_score": 0.9989999999999996,
+        "candidate_alignment_score": 0.9989999999999996,
+        "legality_score": 0.9989999999999996,
+        "safety_delta_score": 0.483125,
+        "burden_improvement_score": 0.5,
+        "disease_stability_score": 0.8999999999999995,
+        "dosing_quality_score": 0.75,
+        "abstention_quality_score": 0.5600000000000002,
+        "efficiency_score": 0.5855625,
+        "process_fidelity_score": 0.9056250000000008,
+        "explanation_grounding_score": 0.8000000000000004,
+        "anti_cheat_score": 0.9366249999999997,
+        "uncertainty_calibration_score": 0.8531250000000004
+      },
+      "primary_reward_channels": {
+        "safety_legality": 0.9469062499999998,
+        "clinical_improvement": 0.6273749999999997,
+        "dosing_quality": 0.6550000000000001,
+        "process_integrity": 0.8225937500000001
+      },
+      "policy_stack": "bandit-only",
+      "failure_mining": {
+        "total_rows": 32,
+        "failure_rows": 2,
+        "top_failure_reasons": [
+          {
+            "reason": "repeated_action_loop",
+            "count": 2
+          }
+        ]
+      }
+    },
+    "llm_only": {
+      "avg_reward": 0.7723913043478261,
+      "legality_rate": 1.0,
+      "severe_violation_rate": 0.0,
+      "abstention_rate": 0.0,
+      "avg_episode_length": 1.9565217391304348,
+      "success_rate": 0.0,
+      "avg_burden_delta": 0.0,
+      "avg_safety_delta": 0.4882608695652174,
+      "avg_dosing_quality": 0.75,
+      "avg_process_fidelity": 0.9000000000000005,
+      "exploit_detection_count": 7.0,
+      "timeout_rate": 0.0,
+      "failure_visible_rate": 0.30434782608695654,
+      "avg_invalid_actions": 0.30434782608695654,
+      "reward_columns": {
+        "format_compliance_score": 0.9989999999999999,
+        "candidate_alignment_score": 0.9989999999999999,
+        "legality_score": 0.9989999999999999,
+        "safety_delta_score": 0.4882608695652174,
+        "burden_improvement_score": 0.5,
+        "disease_stability_score": 0.8999999999999998,
+        "dosing_quality_score": 0.75,
+        "abstention_quality_score": 0.5600000000000004,
+        "efficiency_score": 0.7027826086956522,
+        "process_fidelity_score": 0.9000000000000005,
+        "explanation_grounding_score": 0.8000000000000003,
+        "anti_cheat_score": 0.6952608695652175,
+        "uncertainty_calibration_score": 0.8482608695652176
+      },
+      "primary_reward_channels": {
+        "safety_legality": 0.8853478260869562,
+        "clinical_improvement": 0.6290869565217388,
+        "dosing_quality": 0.6549999999999998,
+        "process_integrity": 0.8504782608695656
+      },
+      "policy_stack": "llm-only",
+      "failure_mining": {
+        "total_rows": 23,
+        "failure_rows": 7,
+        "top_failure_reasons": [
+          {
+            "reason": "repeated_action_loop",
+            "count": 7
+          }
+        ]
+      }
+    },
+    "llm_bandit": {
+      "avg_reward": 0.7647391304347826,
+      "legality_rate": 1.0,
+      "severe_violation_rate": 0.0,
+      "abstention_rate": 0.0,
+      "avg_episode_length": 1.9565217391304348,
+      "success_rate": 0.0,
+      "avg_burden_delta": 0.0,
+      "avg_safety_delta": 0.48982608695652174,
+      "avg_dosing_quality": 0.717391304347826,
+      "avg_process_fidelity": 0.9000000000000005,
+      "exploit_detection_count": 7.0,
+      "timeout_rate": 0.0,
+      "failure_visible_rate": 0.30434782608695654,
+      "avg_invalid_actions": 0.30434782608695654,
+      "reward_columns": {
+        "format_compliance_score": 0.9989999999999999,
+        "candidate_alignment_score": 0.9989999999999999,
+        "legality_score": 0.9989999999999999,
+        "safety_delta_score": 0.48982608695652174,
+        "burden_improvement_score": 0.5043478260869565,
+        "disease_stability_score": 0.8582608695652173,
+        "dosing_quality_score": 0.717391304347826,
+        "abstention_quality_score": 0.5600000000000004,
+        "efficiency_score": 0.7027826086956522,
+        "process_fidelity_score": 0.9000000000000005,
+        "explanation_grounding_score": 0.8000000000000003,
+        "anti_cheat_score": 0.6952608695652175,
+        "uncertainty_calibration_score": 0.8126086956521739
+      },
+      "primary_reward_channels": {
+        "safety_legality": 0.8765217391304347,
+        "clinical_improvement": 0.6171739130434781,
+        "dosing_quality": 0.6386956521739129,
+        "process_integrity": 0.8504782608695656
+      },
+      "policy_stack": "llm+bandit",
+      "failure_mining": {
+        "total_rows": 23,
+        "failure_rows": 7,
+        "top_failure_reasons": [
+          {
+            "reason": "repeated_action_loop",
+            "count": 7
+          }
+        ]
+      }
+    }
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "status": "ok",
+  "backend": "trl_transformers",
+  "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+  "records": 2000,
+  "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl",
+  "reward_summary": {
+    "count": 4000,
+    "avg_reward": 0.782178,
+    "avg_reward_components": {
+      "format_compliance_score": 0.999,
+      "candidate_alignment_score": 0.999,
+      "legality_score": 0.985277,
+      "safety_delta_score": 0.496104,
+      "burden_improvement_score": 0.494346,
+      "disease_stability_score": 0.8912,
+      "dosing_quality_score": 0.511938,
+      "abstention_quality_score": 0.56,
+      "efficiency_score": 0.84942,
+      "process_fidelity_score": 0.905268,
+      "explanation_grounding_score": 0.800248,
+      "anti_cheat_score": 0.48004,
+      "uncertainty_calibration_score": 0.730195
+    },
+    "avg_primary_reward_channels": {
+      "safety_legality": 0.798661,
+      "clinical_improvement": 0.62689,
+      "dosing_quality": 0.535969,
+      "process_integrity": 0.888448
+    }
+  },
+  "reward_log": "/app/checkpoints/grpo_reward_components.jsonl",
+  "train_metrics": {
+    "train_runtime": 6960.8084,
+    "train_samples_per_second": 0.287,
+    "train_steps_per_second": 0.287,
+    "total_flos": 0.0,
+    "train_loss": 2.3633859725151752e-06
+  },
+  "artifact_path": "/app/checkpoints/grpo_adapter",
+  "unsloth_available": false
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "status": "fallback",
+  "backend": "env_reward_fallback",
+  "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
+  "records": 2,
+  "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
+  "reward_summary": {
+    "count": 2,
+    "avg_reward": 0.798,
+    "avg_reward_components": {
+      "format_compliance_score": 0.999,
+      "candidate_alignment_score": 0.999,
+      "legality_score": 0.999,
+      "safety_delta_score": 0.671,
+      "burden_improvement_score": 0.525,
+      "disease_stability_score": 0.74,
+      "dosing_quality_score": 0.5,
+      "abstention_quality_score": 0.56,
+      "efficiency_score": 0.857,
+      "process_fidelity_score": 0.92,
+      "explanation_grounding_score": 0.8,
+      "anti_cheat_score": 0.5,
+      "uncertainty_calibration_score": 0.74
+    },
+    "avg_primary_reward_channels": {
+      "safety_legality": 0.8095,
+      "clinical_improvement": 0.645,
+      "dosing_quality": 0.53,
+      "process_integrity": 0.894
+    }
+  },
+  "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
+  "train_metrics": {
+    "steps_executed": 2.0
+  },
+  "artifact_path": "",
+  "unsloth_available": false,
+  "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "status": "fallback",
+  "backend": "env_reward_fallback",
+  "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
+  "records": 1,
+  "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
+  "reward_summary": {
+    "count": 1,
+    "avg_reward": 0.764,
+    "avg_reward_components": {
+      "format_compliance_score": 0.999,
+      "candidate_alignment_score": 0.999,
+      "legality_score": 0.999,
+      "safety_delta_score": 0.5,
+      "burden_improvement_score": 0.5,
+      "disease_stability_score": 0.9,
+      "dosing_quality_score": 0.5,
+      "abstention_quality_score": 0.56,
+      "efficiency_score": 0.857,
+      "process_fidelity_score": 0.92,
+      "explanation_grounding_score": 0.8,
+      "anti_cheat_score": 0.001,
+      "uncertainty_calibration_score": 0.7
+    },
+    "avg_primary_reward_channels": {
+      "safety_legality": 0.675,
+      "clinical_improvement": 0.633,
+      "dosing_quality": 0.53,
+      "process_integrity": 0.894
+    }
+  },
+  "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
+  "train_metrics": {
+    "steps_executed": 1.0
+  },
+  "artifact_path": "",
+  "unsloth_available": false,
+  "trl_runtime_error": "forced_fallback"
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "status": "fallback",
+  "backend": "env_reward_fallback",
+  "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
+  "records": 1,
+  "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
+  "reward_summary": {
+    "count": 1,
+    "avg_reward": 0.764,
+    "avg_reward_components": {
+      "format_compliance_score": 0.999,
+      "candidate_alignment_score": 0.999,
+      "legality_score": 0.999,
+      "safety_delta_score": 0.5,
+      "burden_improvement_score": 0.5,
+      "disease_stability_score": 0.9,
+      "dosing_quality_score": 0.5,
+      "abstention_quality_score": 0.56,
+      "efficiency_score": 0.857,
+      "process_fidelity_score": 0.92,
+      "explanation_grounding_score": 0.8,
+      "anti_cheat_score": 0.001,
+      "uncertainty_calibration_score": 0.7
+    },
+    "avg_primary_reward_channels": {
+      "safety_legality": 0.675,
+      "clinical_improvement": 0.633,
+      "dosing_quality": 0.53,
+      "process_integrity": 0.894
+    }
+  },
+  "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
+  "train_metrics": {
+    "steps_executed": 1.0
+  },
+  "artifact_path": "",
+  "unsloth_available": false,
+  "trl_runtime_error": "forced_fallback"
+}

submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "status": "fallback",
+  "backend": "env_reward_fallback",
+  "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
+  "records": 1,
+  "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
+  "reward_summary": {
+    "count": 1,
+    "avg_reward": 0.764,
+    "avg_reward_components": {
+      "format_compliance_score": 0.999,
+      "candidate_alignment_score": 0.999,
+      "legality_score": 0.999,
+      "safety_delta_score": 0.5,
+      "burden_improvement_score": 0.5,
+      "disease_stability_score": 0.9,
+      "dosing_quality_score": 0.5,
+      "abstention_quality_score": 0.56,
+      "efficiency_score": 0.857,
+      "process_fidelity_score": 0.92,
+      "explanation_grounding_score": 0.8,
+      "anti_cheat_score": 0.001,
+      "uncertainty_calibration_score": 0.7
+    },
+    "avg_primary_reward_channels": {
+      "safety_legality": 0.675,
+      "clinical_improvement": 0.633,
+      "dosing_quality": 0.53,
+      "process_integrity": 0.894
+    }
+  },
+  "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
+  "train_metrics": {
+    "steps_executed": 1.0
+  },
+  "artifact_path": "",
+  "unsloth_available": false,
+  "trl_runtime_error": "forced_fallback"
+}

submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "passed": true,
+  "status": "running",
+  "checked_on": "2026-04-26",
+  "repo_id": "TheJackBright/polyguard-openenv",
+  "space_url": "https://huggingface.co/spaces/TheJackBright/polyguard-openenv",
+  "runtime_url": "https://thejackbright-polyguard-openenv.hf.space",
+  "space_sha": "877add7878fbdf2011ed3d5d378cdca5fe7bac4b",
+  "space_private": false,
+  "runtime": {
+    "stage": "RUNNING",
+    "hardware": {
+      "current": "cpu-basic",
+      "requested": "cpu-basic"
+    },
+    "replicas": {
+      "current": 1,
+      "requested": 1
+    },
+    "domain": "thejackbright-polyguard-openenv.hf.space"
+  },
+  "openenv_validation": {
+    "command": "uv run openenv validate --url https://thejackbright-polyguard-openenv.hf.space",
+    "passed": true,
+    "passed_count": 6,
+    "total_count": 6,
+    "failed_criteria": []
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+  "status": "running",
+  "started_at": 1777161126.3536248,
+  "finished_at": null,
+  "commands": [
+    {
+      "args": [
+        "python",
+        "scripts/bootstrap_data.py"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 0.821
+    },
+    {
+      "args": [
+        "python",
+        "scripts/build_training_corpus.py",
+        "--profile",
+        "massive",
+        "--with-local",
+        "--with-synthetic",
+        "--with-hf"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 4.367
+    },
+    {
+      "args": [
+        "python",
+        "scripts/train_sft_trl.py",
+        "--model-id",
+        "Qwen/Qwen2.5-0.5B-Instruct",
+        "--dataset-path",
+        "data/processed/training_corpus_sft.json",
+        "--epochs",
+        "1",
+        "--max-steps",
+        "20",
+        "--batch-size",
+        "2",
+        "--max-seq-len",
+        "512",
+        "--use-unsloth"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 24.564
+    },
+    {
+      "args": [
+        "reuse_artifact",
+        "grpo_adapter",
+        "/app/checkpoints/grpo_adapter"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 0.0
+    },
+    {
+      "args": [
+        "python",
+        "scripts/merge_adapters_safe.py",
+        "--adapter-dir",
+        "checkpoints/sft_adapter",
+        "--output-dir",
+        "checkpoints/merged"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 9.014
+    },
+    {
+      "args": [
+        "python",
+        "scripts/test_inference_postsave.py",
+        "--samples",
+        "3",
+        "--base-model",
+        "Qwen/Qwen2.5-0.5B-Instruct"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 14.811
+    },
+    {
+      "args": [
+        "python",
+        "scripts/evaluate_policy_ablations.py",
+        "--episodes",
+        "8"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 4.458
+    },
+    {
+      "args": [
+        "python",
+        "scripts/evaluate_baselines.py"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 4.603
+    },
+    {
+      "args": [
+        "python",
+        "scripts/evaluate_all.py"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 4.271
+    },
+    {
+      "args": [
+        "python",
+        "scripts/evaluate_compare_runs.py",
+        "--baseline",
+        "outputs/reports/baselines.json",
+        "--candidate",
+        "outputs/reports/benchmark_report.json",
+        "--output",
+        "outputs/reports/improvement_report.json"
+      ],
+      "returncode": 0,
+      "elapsed_seconds": 0.037
+    }
+  ],
+  "artifact_repo_id": "TheJackBright/polyguard-openenv-training-artifacts"
+}

submission_bundle/grpo_training_cycle/docs_results/improvement_report.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "status": "ok",
+  "baseline": "outputs/reports/baselines.json",
+  "candidate": "outputs/reports/benchmark_report.json",
+  "deltas": {
+    "avg_reward": 0.025833,
+    "legality_rate": 0.0,
+    "success_rate": 0.0,
+    "avg_process_fidelity": 0.92,
+    "timeout_rate": 0.0,
+    "failure_visible_rate": 0.0
+  },
+  "gate": {
+    "avg_reward_up": true,
+    "legality_up": true,
+    "success_up": true
+  },
+  "improved": true
+}

submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "status": "ok",
+  "baseline": "outputs/reports/baselines.json",
+  "candidate": "outputs/reports/benchmark_report.json",
+  "deltas": {
+    "avg_reward": -0.0025,
+    "legality_rate": 0.0,
+    "success_rate": 0.0,
+    "avg_process_fidelity": 0.92,
+    "timeout_rate": 0.0,
+    "failure_visible_rate": 0.0
+  },
+  "gate": {
+    "avg_reward_up": false,
+    "legality_up": true,
+    "success_up": true
+  },
+  "improved": false
+}

submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "status": "ok",
+  "runs": [
+    {
+      "run": 0,
+      "provider": "transformers",
+      "candidate_id": "cand_04",
+      "latency_ms": 2313.731,
+      "rationale": "Transformers fallback selected cand_04 via local ranker."
+    },
+    {
+      "run": 1,
+      "provider": "transformers",
+      "candidate_id": "cand_02",
+      "latency_ms": 0.012,
+      "rationale": "Transformers fallback selected cand_02 via local ranker."
+    }
+  ],
+  "avg_latency_ms": 1156.872,
+  "provider_requested": "transformers",
+  "model": "Qwen/Qwen2.5-0.5B-Instruct"
+}

submission_bundle/grpo_training_cycle/docs_results/legality_rate.png ADDED Viewed

submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "avg_reward": 0.77625,
+  "legality_rate": 1.0,
+  "severe_violation_rate": 0.0,
+  "abstention_rate": 0.0,
+  "avg_episode_length": 2.0,
+  "success_rate": 0.0,
+  "avg_burden_delta": 0.0,
+  "avg_safety_delta": 0.5,
+  "avg_dosing_quality": 0.75,
+  "avg_process_fidelity": 0.92,
+  "exploit_detection_count": 4.0,
+  "reward_columns": {
+    "format_compliance_score": 0.9990000000000001,
+    "candidate_alignment_score": 0.9990000000000001,
+    "legality_score": 0.9990000000000001,
+    "safety_delta_score": 0.5,
+    "burden_improvement_score": 0.5,
+    "disease_stability_score": 0.9000000000000002,
+    "dosing_quality_score": 0.75,
+    "abstention_quality_score": 0.5600000000000002,
+    "efficiency_score": 0.73,
+    "process_fidelity_score": 0.92,
+    "explanation_grounding_score": 0.7999999999999999,
+    "anti_cheat_score": 0.6663333333333333,
+    "uncertainty_calibration_score": 0.8699999999999998
+  }
+}

submission_bundle/grpo_training_cycle/docs_results/plot_index.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "plots": [
+    "/app/outputs/plots/avg_reward.png",
+    "/app/outputs/plots/legality_rate.png",
+    "/app/outputs/plots/success_rate.png",
+    "/app/outputs/plots/avg_process_fidelity.png",
+    "/app/outputs/plots/policy_stack_avg_reward.png"
+  ]
+}

submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png ADDED Viewed

submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "status": "ok",
+  "model_source": "merged",
+  "model_load_error": "",
+  "samples": 3,
+  "valid_rate": 1.0,
+  "avg_env_reward": 0.712333,
+  "results": [
+    {
+      "idx": 0,
+      "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
+      "completion": "}\n\nTo choose a candidate ID, I will follow these steps:\n\n1. **Review the list of candidates**: The provided list includes six candidate IDs: `cand_01`, `cand_02`, `cand_03`, `cand_04`, `cand_05`, `cand_08`, `cand_09`, and",
+      "candidate_id": "cand_01",
+      "selected_candidate": "cand_01",
+      "env_reward": 0.717,
+      "done": true,
+      "valid": true,
+      "termination_reason": "exploit_detection"
+    },
+    {
+      "idx": 1,
+      "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
+      "completion": "}\n\nSure, I can help you choose a candidate ID based on the information provided. However, I need to know which of the given candidates is your patient ID so that I can provide a suitable rationale for choosing that specific candidate.\n\nPlease let me know which candidate ID you are referring to, and I will proceed with selecting the appropriate candidate ID and providing the rationale accordingly. If you don't have a",
+      "candidate_id": null,
+      "selected_candidate": "cand_01",
+      "env_reward": 0.703,
+      "done": true,
+      "valid": true,
+      "termination_reason": "exploit_detection"
+    },
+    {
+      "idx": 2,
+      "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
+      "completion": "}\n\nTo choose a candidate ID for the patient with the identifier `patient_8002`, I will follow these steps:\n\n1. **Identify the Patient Identifier**: The patient's identifier is `patient_8002`.\n\n2. **List of Candidate IDs**: The list of candidate IDs provided is:\n   - cand_07\n   - cand_01\n   -",
+      "candidate_id": "cand_07",
+      "selected_candidate": "cand_01",
+      "env_reward": 0.717,
+      "done": true,
+      "valid": true,
+      "termination_reason": "exploit_detection"
+    }
+  ]
+}

submission_bundle/grpo_training_cycle/docs_results/risk_train.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "dataset_size": 180.0,
+  "status": "trained",
+  "train_mae": 0.0033,
+  "model_path": "outputs/models/tabular_risk.pkl"
+}