Deploy PolyGuard OpenEnv Space (polyguard-rl @ 52ecdc8)
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +8 -0
- .gitattributes +1 -0
- Dockerfile +3 -3
- Dockerfile.space +2 -3
- README.md +40 -4
- app/api/routes.py +12 -0
- app/api/schemas.py +8 -0
- app/tools/__init__.py +1 -0
- app/tools/medication_alternatives.py +463 -0
- app/ui/frontend/src/App.tsx +14 -0
- app/ui/frontend/src/components/AlternativeMedicineSearch.tsx +215 -0
- app/ui/frontend/src/styles/theme.css +102 -0
- docker/space/README.md +28 -10
- docs/DEMO_RECORDING_SCRIPT.md +493 -0
- docs/deployment.md +1 -1
- docs/final_submission_audit.md +1 -1
- docs/idea_document_traceability.md +57 -0
- docs/participant_guide_traceability.md +1 -1
- docs/ui.md +4 -0
- scripts/deploy_space_api.py +25 -3
- scripts/install_hf_active_bundle.py +18 -7
- submission_bundle/README.md +13 -0
- submission_bundle/grpo_training_cycle/docs_results/README.md +22 -0
- submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json +11 -0
- submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png +0 -0
- submission_bundle/grpo_training_cycle/docs_results/avg_reward.png +0 -0
- submission_bundle/grpo_training_cycle/docs_results/baselines.json +119 -0
- submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json +52 -0
- submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt +52 -0
- submission_bundle/grpo_training_cycle/docs_results/dose_train.json +6 -0
- submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json +28 -0
- submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json +8 -0
- submission_bundle/grpo_training_cycle/docs_results/graph_train.json +5 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json +149 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json +42 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json +39 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json +39 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json +39 -0
- submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json +39 -0
- submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json +29 -0
- submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json +123 -0
- submission_bundle/grpo_training_cycle/docs_results/improvement_report.json +19 -0
- submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json +19 -0
- submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json +22 -0
- submission_bundle/grpo_training_cycle/docs_results/legality_rate.png +0 -0
- submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json +28 -0
- submission_bundle/grpo_training_cycle/docs_results/plot_index.json +9 -0
- submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png +0 -0
- submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json +43 -0
- submission_bundle/grpo_training_cycle/docs_results/risk_train.json +6 -0
.env.example
CHANGED
|
@@ -20,3 +20,11 @@ POLYGUARD_FRONTIER_MODEL=Qwen/Qwen2.5-7B-Instruct
|
|
| 20 |
POLYGUARD_ALLOW_WEB_FETCH=false
|
| 21 |
POLYGUARD_REWARD_MIN=0.001
|
| 22 |
POLYGUARD_REWARD_MAX=0.999
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
POLYGUARD_ALLOW_WEB_FETCH=false
|
| 21 |
POLYGUARD_REWARD_MIN=0.001
|
| 22 |
POLYGUARD_REWARD_MAX=0.999
|
| 23 |
+
|
| 24 |
+
# --- Medication alternatives tool (FDA openFDA + optional external CDS) ---
|
| 25 |
+
# Optional: higher openFDA rate limits — request a key at https://open.fda.gov/apis/authentication/
|
| 26 |
+
# POLYGUARD_OPENFDA_API_KEY=
|
| 27 |
+
# Optional: POST { "drug_names": ["..."] } to your service; Bearer token if required (Tally/Vellum/custom).
|
| 28 |
+
# Never commit real tokens; set in Space secrets or local .env only.
|
| 29 |
+
# POLYGUARD_MED_TOOL_URL=
|
| 30 |
+
# POLYGUARD_MED_TOOL_TOKEN=
|
.gitattributes
CHANGED
|
@@ -41,3 +41,4 @@ docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png filter
|
|
| 41 |
docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 41 |
docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
# Hugging Face Space:
|
| 2 |
-
# Build
|
| 3 |
-
#
|
| 4 |
|
| 5 |
FROM node:20-bookworm-slim AS frontend
|
| 6 |
WORKDIR /build
|
|
|
|
| 1 |
+
# Hugging Face Space: nginx on PORT (7860) + OpenEnv (8100) + API (8200) + Vite-built UI.
|
| 2 |
+
# Build: docker build -t polyguard-space .
|
| 3 |
+
# HF Spaces use this file by default when "Dockerfile path" is unset — keep this as the demo image.
|
| 4 |
|
| 5 |
FROM node:20-bookworm-slim AS frontend
|
| 6 |
WORKDIR /build
|
Dockerfile.space
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
-
#
|
| 2 |
-
#
|
| 3 |
-
# Cheap tier: use Space "CPU basic"; first boot downloads ~1.1GB model bundle.
|
| 4 |
|
| 5 |
FROM node:20-bookworm-slim AS frontend
|
| 6 |
WORKDIR /build
|
|
|
|
| 1 |
+
# Same image as ./Dockerfile — use this path in HF Space settings if "Dockerfile path"
|
| 2 |
+
# must be explicit (e.g. Dockerfile.space). Keep in sync with Dockerfile.
|
|
|
|
| 3 |
|
| 4 |
FROM node:20-bookworm-slim AS frontend
|
| 5 |
WORKDIR /build
|
README.md
CHANGED
|
@@ -1,12 +1,48 @@
|
|
| 1 |
---
|
| 2 |
title: PolyGuard OpenEnv
|
| 3 |
-
emoji: 🛡️
|
| 4 |
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
-
license: mit
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: PolyGuard OpenEnv
|
|
|
|
| 3 |
colorFrom: blue
|
| 4 |
+
colorTo: green
|
| 5 |
sdk: docker
|
| 6 |
app_port: 7860
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# PolyGuard (OpenEnv implementation package)
|
| 11 |
+
|
| 12 |
+
Run all CLI commands from this directory (`cd polyguard-rl`). The repository root [`README.md`](../README.md) carries the same submission narrative with paths adjusted for viewers landing on the GitHub repo home page.
|
| 13 |
+
|
| 14 |
+
## Submission Links
|
| 15 |
+
|
| 16 |
+
- GitHub Repo URL: [https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK](https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK)
|
| 17 |
+
- HF Space URL: [https://huggingface.co/spaces/TheJackBright/polyguard-openenv](https://huggingface.co/spaces/TheJackBright/polyguard-openenv)
|
| 18 |
+
- Colab Notebook URL: [https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (see also `notebooks/09_training_loop.ipynb` for a modular training walkthrough)
|
| 19 |
+
- YouTube Video URL: not used for this submission; see Hugging Face Blog URL below.
|
| 20 |
+
- Hugging Face Blog URL: [https://huggingface.co/blog/TheJackBright/polyguard-openenv](https://huggingface.co/blog/TheJackBright/polyguard-openenv) *(publish `docs/hf_blog_draft.md` or replace with a live story URL)*
|
| 21 |
+
|
| 22 |
+
## Problem Statement
|
| 23 |
+
|
| 24 |
+
Polypharmacy decisions are long-horizon, partially observable, and safety-critical. PolyGuard is a research environment where an LLM agent selects constrained clinical actions, receives verifier-backed reward, and improves via SFT + GRPO—not generic open-ended chat fine-tuning.
|
| 25 |
+
|
| 26 |
+
## Environment
|
| 27 |
+
|
| 28 |
+
`PolyGuardEnv` exposes OpenEnv-style HTTP/WebSocket endpoints (`/reset`, `/step`, `/state`, `/metadata`, `/schema`, `/mcp`, `/health`, `/ws`). Sub-environments include DDI, bandit mining, regimen risk, precision dosing, longitudinal deprescribing, web-search missing data, alternative suggestion, and new-drug decomposition. See `openenv.yaml`, `app/env/env_core.py`, `app/env/fastapi_app.py`, and `docs/environment_design.md`.
|
| 29 |
+
|
| 30 |
+
## Agent Capabilities
|
| 31 |
+
|
| 32 |
+
Medication reconciliation, evidence retrieval, graph safety, dosing guardrails, candidate generation, supervisor routing, planner/critic stack, explanations, and contextual bandit ranking for ablations (`app/agents/`, `docs/agents.md`).
|
| 33 |
+
|
| 34 |
+
## Tasks
|
| 35 |
+
|
| 36 |
+
DDI risk reduction, safe adds/substitutions, regimen optimization, taper/deprescribing sequences, precision dosing, missing-data recovery, and new-drug decomposition (`data/scenarios/`, `app/env/catalog.py`).
|
| 37 |
+
|
| 38 |
+
## Reward Model / Evaluation Logic
|
| 39 |
+
|
| 40 |
+
Thirteen verifier-backed reward components roll up into four primary channels (`safety_legality`, `clinical_improvement`, `dosing_quality`, `process_integrity`), clamped to `[0.001, 0.999]`, with anti-cheat and timeout logic (`app/env/reward_router.py`, `app/env/anti_cheat.py`, `docs/reward_design.md`).
|
| 41 |
+
|
| 42 |
+
## Training And Post-Training Strategy
|
| 43 |
+
|
| 44 |
+
Build corpora (`scripts/bootstrap_data.py`, `scripts/build_training_corpus.py`), SFT with TRL (`scripts/train_sft_trl.py`), GRPO with environment reward (`scripts/train_grpo_trl.py`), merge adapters (`scripts/merge_adapters_safe.py`), validate inference (`scripts/test_inference_postsave.py`), evaluate and plot (`scripts/evaluate_*.py`, `docs/results/`). Optional HF GPU training: `scripts/deploy_training_space.py`. Full commands: repository root [`README.md`](../README.md) or `docs/training.md`.
|
| 45 |
+
|
| 46 |
+
## Documentation index
|
| 47 |
+
|
| 48 |
+
- [Architecture](docs/architecture.md) · [Environment](docs/environment_design.md) · [Rewards](docs/reward_design.md) · [Training](docs/training.md) · [Evaluation](docs/evaluation.md) · [Deployment](docs/deployment.md) · [Datasets](docs/datasets.md) · [Participant guide traceability](docs/participant_guide_traceability.md) · [Idea doc vs implementation](docs/idea_document_traceability.md) · [**Space UI demo script**](docs/DEMO_RECORDING_SCRIPT.md)
|
app/api/routes.py
CHANGED
|
@@ -5,9 +5,11 @@ from __future__ import annotations
|
|
| 5 |
from fastapi import APIRouter, Depends, HTTPException
|
| 6 |
|
| 7 |
from app.api.dependencies import get_service
|
|
|
|
| 8 |
from app.api.schemas import (
|
| 9 |
BatchInferRequest,
|
| 10 |
EvidenceQueryRequest,
|
|
|
|
| 11 |
OrchestrateRequest,
|
| 12 |
ResetRequest,
|
| 13 |
StepCandidateRequest,
|
|
@@ -137,3 +139,13 @@ def cases_search(q: str, service: APIService = Depends(get_service)) -> list[dic
|
|
| 137 |
@router.post("/evidence/query")
|
| 138 |
def evidence_query(payload: EvidenceQueryRequest, service: APIService = Depends(get_service)) -> list[dict]:
|
| 139 |
return service.evidence_query(query=payload.query, top_k=payload.top_k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from fastapi import APIRouter, Depends, HTTPException
|
| 6 |
|
| 7 |
from app.api.dependencies import get_service
|
| 8 |
+
from app.tools.medication_alternatives import build_alternatives_response
|
| 9 |
from app.api.schemas import (
|
| 10 |
BatchInferRequest,
|
| 11 |
EvidenceQueryRequest,
|
| 12 |
+
MedicationAlternativesRequest,
|
| 13 |
OrchestrateRequest,
|
| 14 |
ResetRequest,
|
| 15 |
StepCandidateRequest,
|
|
|
|
| 139 |
@router.post("/evidence/query")
|
| 140 |
def evidence_query(payload: EvidenceQueryRequest, service: APIService = Depends(get_service)) -> list[dict]:
|
| 141 |
return service.evidence_query(query=payload.query, top_k=payload.top_k)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
@router.post("/tools/medication_alternatives")
|
| 145 |
+
def medication_alternatives(payload: MedicationAlternativesRequest) -> dict:
|
| 146 |
+
"""OpenFDA class neighbors + optional external POST (env: POLYGUARD_MED_TOOL_URL / TOKEN)."""
|
| 147 |
+
return build_alternatives_response(
|
| 148 |
+
query_drug=payload.query_drug,
|
| 149 |
+
regimen_drugs=payload.regimen_drugs,
|
| 150 |
+
max_suggestions=payload.max_suggestions,
|
| 151 |
+
)
|
app/api/schemas.py
CHANGED
|
@@ -55,3 +55,11 @@ class BatchInferRequest(StrictSchema):
|
|
| 55 |
class EvidenceQueryRequest(StrictSchema):
|
| 56 |
query: str
|
| 57 |
top_k: int = 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
class EvidenceQueryRequest(StrictSchema):
|
| 56 |
query: str
|
| 57 |
top_k: int = 5
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class MedicationAlternativesRequest(StrictSchema):
|
| 61 |
+
"""FDA / external tool: suggest other labeled drugs in a similar pharmacologic class."""
|
| 62 |
+
|
| 63 |
+
query_drug: Optional[str] = None
|
| 64 |
+
regimen_drugs: list[str] = Field(default_factory=list)
|
| 65 |
+
max_suggestions: int = Field(default=10, ge=1, le=25)
|
app/tools/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Optional product tools (FDA search, external CDS hooks)."""
|
app/tools/medication_alternatives.py
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""OpenFDA-backed medication class search + optional external HTTP tool.
|
| 2 |
+
|
| 3 |
+
Secrets (OpenFDA key, Tally/Vellum/custom bearer tokens) must come from env only.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
from typing import Any
|
| 12 |
+
from urllib.parse import quote
|
| 13 |
+
|
| 14 |
+
import requests
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
OPENFDA_LABEL = "https://api.fda.gov/drug/label.json"
|
| 19 |
+
_DEFAULT_DISCLAIMER = (
|
| 20 |
+
"Research aid only — not medical advice. FDA labels may be incomplete; verify in approved prescribing information."
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _openfda_key_suffix() -> str:
|
| 25 |
+
key = os.getenv("POLYGUARD_OPENFDA_API_KEY", "").strip()
|
| 26 |
+
if not key:
|
| 27 |
+
return ""
|
| 28 |
+
return f"&api_key={quote(key, safe='')}"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _fda_get(search: str, limit: int) -> dict[str, Any] | None:
|
| 32 |
+
"""GET openFDA label.json; returns parsed JSON or None on failure."""
|
| 33 |
+
q = quote(search, safe="")
|
| 34 |
+
url = f"{OPENFDA_LABEL}?search={q}&limit={int(limit)}{_openfda_key_suffix()}"
|
| 35 |
+
try:
|
| 36 |
+
resp = requests.get(url, timeout=14)
|
| 37 |
+
if resp.status_code != 200:
|
| 38 |
+
logger.warning("openfda_http_%s: %s", resp.status_code, resp.text[:200])
|
| 39 |
+
return None
|
| 40 |
+
return resp.json()
|
| 41 |
+
except Exception as exc: # noqa: BLE001
|
| 42 |
+
logger.warning("openfda_request_failed: %s", exc)
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _first_openfda(payload: dict[str, Any] | None) -> dict[str, Any]:
|
| 47 |
+
if not payload or "results" not in payload:
|
| 48 |
+
return {}
|
| 49 |
+
results = payload.get("results")
|
| 50 |
+
if not isinstance(results, list) or not results:
|
| 51 |
+
return {}
|
| 52 |
+
first = results[0]
|
| 53 |
+
return first if isinstance(first, dict) else {}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _openfda_block(label: dict[str, Any]) -> dict[str, Any]:
|
| 57 |
+
block = label.get("openfda")
|
| 58 |
+
return block if isinstance(block, dict) else {}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _listify(value: Any) -> list[str]:
|
| 62 |
+
if value is None:
|
| 63 |
+
return []
|
| 64 |
+
if isinstance(value, str):
|
| 65 |
+
return [value]
|
| 66 |
+
if isinstance(value, list):
|
| 67 |
+
return [str(x).strip() for x in value if str(x).strip()]
|
| 68 |
+
return [str(value).strip()]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _snippet(text: Any, max_len: int = 380) -> str | None:
|
| 72 |
+
if not text:
|
| 73 |
+
return None
|
| 74 |
+
if isinstance(text, list):
|
| 75 |
+
text = " ".join(str(x) for x in text[:6])
|
| 76 |
+
s = re.sub(r"\s+", " ", str(text)).strip()
|
| 77 |
+
if len(s) <= max_len:
|
| 78 |
+
return s
|
| 79 |
+
return s[: max_len - 1] + "…"
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _label_link(set_id: str | None) -> str | None:
|
| 83 |
+
if not set_id:
|
| 84 |
+
return None
|
| 85 |
+
return f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={set_id}"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# Keywords from free text / simulator tokens → openFDA pharm_class_epc strings (exact or prefix).
|
| 89 |
+
_KEYWORD_EPCS: tuple[tuple[str, tuple[str, ...]], ...] = (
|
| 90 |
+
("benzodiazepine", ("Benzodiazepine", "Benzodiazepine Sedative")),
|
| 91 |
+
("benzo", ("Benzodiazepine",)),
|
| 92 |
+
("nsaid", ("Nonsteroidal Anti-inflammatory Drug",)),
|
| 93 |
+
("opioid", ("Opioid Agonist", "Full Opioid Agonists")),
|
| 94 |
+
("statin", ("HMG-CoA Reductase Inhibitor",)),
|
| 95 |
+
("beta blocker", ("beta-Adrenergic Blocker",)),
|
| 96 |
+
("betablocker", ("beta-Adrenergic Blocker",)),
|
| 97 |
+
("ace inhibitor", ("Angiotensin-converting Enzyme Inhibitor",)),
|
| 98 |
+
("arb", ("Angiotensin II Receptor Blocker",)),
|
| 99 |
+
("ppi", ("Proton Pump Inhibitor",)),
|
| 100 |
+
("ssri", ("Selective Serotonin Reuptake Inhibitor",)),
|
| 101 |
+
# Anticoagulant / antiplatelet (simulator warfarin_like → warfarin)
|
| 102 |
+
("warfarin", ("Vitamin K Antagonist",)),
|
| 103 |
+
("heparin", ("Thrombin Inhibitor", "Factor Xa Inhibitor")),
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _normalize_simulator_query(q: str) -> str:
|
| 108 |
+
"""Strip simulator suffixes and underscores so benzodiazepine_like → benzodiazepine."""
|
| 109 |
+
raw = q.strip().lower()[:120]
|
| 110 |
+
if not raw:
|
| 111 |
+
return ""
|
| 112 |
+
for suf in ("_like", "_analog", "_analogue", "_class", "_group", "_category"):
|
| 113 |
+
if raw.endswith(suf):
|
| 114 |
+
raw = raw[: -len(suf)].strip("_").strip()
|
| 115 |
+
return raw.replace("_", " ").strip()
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _class_search_variants(focus: str) -> list[str]:
|
| 119 |
+
"""Ordered strings to try as openFDA pharm_class_epc (exact quoted) or wildcard body."""
|
| 120 |
+
raw = _normalize_simulator_query(focus)
|
| 121 |
+
if not raw:
|
| 122 |
+
return []
|
| 123 |
+
out: list[str] = []
|
| 124 |
+
seen: set[str] = set()
|
| 125 |
+
|
| 126 |
+
def add(s: str) -> None:
|
| 127 |
+
t = s.strip()
|
| 128 |
+
if len(t) < 3:
|
| 129 |
+
return
|
| 130 |
+
k = t.lower()
|
| 131 |
+
if k in seen:
|
| 132 |
+
return
|
| 133 |
+
seen.add(k)
|
| 134 |
+
out.append(t)
|
| 135 |
+
|
| 136 |
+
compact = raw.replace(" ", "")
|
| 137 |
+
# Prefer canonical FDA class strings before raw lowercase (better labels + display).
|
| 138 |
+
for kw, epcs in _KEYWORD_EPCS:
|
| 139 |
+
if kw in compact or kw in raw:
|
| 140 |
+
for e in epcs:
|
| 141 |
+
add(e)
|
| 142 |
+
add(raw)
|
| 143 |
+
first = raw.split()[0]
|
| 144 |
+
if first != raw:
|
| 145 |
+
add(first)
|
| 146 |
+
if raw and " " not in raw and raw.isalpha():
|
| 147 |
+
add(raw[0].upper() + raw[1:])
|
| 148 |
+
return out
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _resolve_focus_drug(query_drug: str | None, regimen_drugs: list[str]) -> str:
|
| 152 |
+
"""Prefer explicit query_drug from client; do not silently use regimen[0] when multiple rows exist."""
|
| 153 |
+
q = (query_drug or "").strip()
|
| 154 |
+
if q:
|
| 155 |
+
return q
|
| 156 |
+
if len(regimen_drugs) == 1:
|
| 157 |
+
t = str(regimen_drugs[0]).strip()
|
| 158 |
+
return t
|
| 159 |
+
# Multiple regimen drugs but no focus: caller should send query_drug (frontend bug otherwise).
|
| 160 |
+
return ""
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _escape_fda_term(term: str) -> str:
|
| 164 |
+
"""Remove characters that break openFDA quoted search."""
|
| 165 |
+
return re.sub(r'["\\]', " ", term).strip()[:100]
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def _search_label_for_name(name: str) -> dict[str, Any]:
|
| 169 |
+
"""Search brand, generic, or active substance on SPL labels."""
|
| 170 |
+
n = _escape_fda_term(name.strip()[:80])
|
| 171 |
+
if not n:
|
| 172 |
+
return {}
|
| 173 |
+
data_g = _fda_get(f'openfda.generic_name:"{n}"', limit=3)
|
| 174 |
+
if data_g and data_g.get("results"):
|
| 175 |
+
return _first_openfda(data_g)
|
| 176 |
+
data_b = _fda_get(f'openfda.brand_name:"{n}"', limit=3)
|
| 177 |
+
if data_b and data_b.get("results"):
|
| 178 |
+
return _first_openfda(data_b)
|
| 179 |
+
# Active ingredient / substance (helps real drug stems)
|
| 180 |
+
data_s = _fda_get(f'openfda.substance_name:"{n}"', limit=3)
|
| 181 |
+
if data_s and data_s.get("results"):
|
| 182 |
+
return _first_openfda(data_s)
|
| 183 |
+
data_a = _fda_get(f'openfda.active_ingredient:"{n}"', limit=3)
|
| 184 |
+
return _first_openfda(data_a) if data_a else {}
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def _suggestions_by_class_probe(
|
| 188 |
+
field: str,
|
| 189 |
+
class_value: str,
|
| 190 |
+
exclude: set[str],
|
| 191 |
+
max_suggestions: int,
|
| 192 |
+
) -> list[dict[str, Any]]:
|
| 193 |
+
rows = _suggestions_for_class(field, class_value, exclude, max_suggestions)
|
| 194 |
+
if rows:
|
| 195 |
+
return rows
|
| 196 |
+
# Wildcard: openFDA supports *suffix / prefix* on some fields
|
| 197 |
+
body = _escape_fda_term(class_value).lower()
|
| 198 |
+
if len(body) >= 4:
|
| 199 |
+
wild = _fda_get(f"openfda.{field}:*{body}*", limit=min(40, max(10, max_suggestions * 4)))
|
| 200 |
+
if wild and wild.get("results"):
|
| 201 |
+
# Reuse list builder by synthesizing a narrowed class is awkward; parse manually
|
| 202 |
+
out: list[dict[str, Any]] = []
|
| 203 |
+
seen: set[str] = set()
|
| 204 |
+
for row in wild.get("results", []):
|
| 205 |
+
if not isinstance(row, dict):
|
| 206 |
+
continue
|
| 207 |
+
of = _openfda_block(row)
|
| 208 |
+
brands = _listify(of.get("brand_name"))
|
| 209 |
+
generics = _listify(of.get("generic_name"))
|
| 210 |
+
display = (brands[0] if brands else None) or (generics[0] if generics else None)
|
| 211 |
+
if not display:
|
| 212 |
+
continue
|
| 213 |
+
key = display.lower()
|
| 214 |
+
if key in seen or key in exclude:
|
| 215 |
+
continue
|
| 216 |
+
seen.add(key)
|
| 217 |
+
ar = row.get("adverse_reactions")
|
| 218 |
+
ar_text = ar[0] if isinstance(ar, list) and ar else ar
|
| 219 |
+
set_id = None
|
| 220 |
+
if isinstance(of.get("spl_set_id"), list) and of["spl_set_id"]:
|
| 221 |
+
set_id = str(of["spl_set_id"][0])
|
| 222 |
+
elif of.get("spl_set_id"):
|
| 223 |
+
set_id = str(of["spl_set_id"])
|
| 224 |
+
out.append(
|
| 225 |
+
{
|
| 226 |
+
"display_name": display,
|
| 227 |
+
"generic_names": generics[:4],
|
| 228 |
+
"brand_names": brands[:4],
|
| 229 |
+
"routes": _listify(of.get("route"))[:4],
|
| 230 |
+
"adverse_reactions_snippet": _snippet(ar_text),
|
| 231 |
+
"label_link": _label_link(set_id),
|
| 232 |
+
"source_detail": f"openfda.{field}.wildcard",
|
| 233 |
+
},
|
| 234 |
+
)
|
| 235 |
+
if len(out) >= max_suggestions:
|
| 236 |
+
break
|
| 237 |
+
return out
|
| 238 |
+
return []
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def _pick_pharm_class(openfda_block: dict[str, Any]) -> tuple[str | None, str | None]:
|
| 242 |
+
for key in ("pharm_class_epc", "pharm_class_cs", "pharm_class_moa"):
|
| 243 |
+
for item in _listify(openfda_block.get(key)):
|
| 244 |
+
if len(item) > 3:
|
| 245 |
+
return key, item
|
| 246 |
+
return None, None
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def _suggestions_for_class(
|
| 250 |
+
field: str,
|
| 251 |
+
pharm_class: str,
|
| 252 |
+
exclude: set[str],
|
| 253 |
+
max_suggestions: int,
|
| 254 |
+
) -> list[dict[str, Any]]:
|
| 255 |
+
"""List other drugs sharing FDA pharmacologic class on label."""
|
| 256 |
+
pc = pharm_class.strip()[:120]
|
| 257 |
+
if not pc or not field:
|
| 258 |
+
return []
|
| 259 |
+
search = f'openfda.{field}:"{pc}"'
|
| 260 |
+
data = _fda_get(search, limit=min(50, max(10, max_suggestions * 4)))
|
| 261 |
+
if not data or not data.get("results"):
|
| 262 |
+
return []
|
| 263 |
+
|
| 264 |
+
out: list[dict[str, Any]] = []
|
| 265 |
+
seen: set[str] = set()
|
| 266 |
+
for row in data.get("results", []):
|
| 267 |
+
if not isinstance(row, dict):
|
| 268 |
+
continue
|
| 269 |
+
of = _openfda_block(row)
|
| 270 |
+
brands = _listify(of.get("brand_name"))
|
| 271 |
+
generics = _listify(of.get("generic_name"))
|
| 272 |
+
display = (brands[0] if brands else None) or (generics[0] if generics else None)
|
| 273 |
+
if not display:
|
| 274 |
+
continue
|
| 275 |
+
key = display.lower()
|
| 276 |
+
if key in seen:
|
| 277 |
+
continue
|
| 278 |
+
if key in exclude:
|
| 279 |
+
continue
|
| 280 |
+
seen.add(key)
|
| 281 |
+
ar = row.get("adverse_reactions")
|
| 282 |
+
if isinstance(ar, list) and ar:
|
| 283 |
+
ar_text = ar[0]
|
| 284 |
+
else:
|
| 285 |
+
ar_text = ar
|
| 286 |
+
set_id = None
|
| 287 |
+
if isinstance(of.get("spl_set_id"), list) and of["spl_set_id"]:
|
| 288 |
+
set_id = str(of["spl_set_id"][0])
|
| 289 |
+
elif of.get("spl_set_id"):
|
| 290 |
+
set_id = str(of["spl_set_id"])
|
| 291 |
+
out.append(
|
| 292 |
+
{
|
| 293 |
+
"display_name": display,
|
| 294 |
+
"generic_names": generics[:4],
|
| 295 |
+
"brand_names": brands[:4],
|
| 296 |
+
"routes": _listify(of.get("route"))[:4],
|
| 297 |
+
"adverse_reactions_snippet": _snippet(ar_text),
|
| 298 |
+
"label_link": _label_link(set_id),
|
| 299 |
+
"source_detail": f"openfda.{field}",
|
| 300 |
+
}
|
| 301 |
+
)
|
| 302 |
+
if len(out) >= max_suggestions:
|
| 303 |
+
break
|
| 304 |
+
return out
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def _external_suggestions(drug_names: list[str]) -> list[dict[str, Any]] | None:
|
| 308 |
+
url = os.getenv("POLYGUARD_MED_TOOL_URL", "").strip()
|
| 309 |
+
if not url:
|
| 310 |
+
return None
|
| 311 |
+
headers: dict[str, str] = {"Content-Type": "application/json"}
|
| 312 |
+
token = os.getenv("POLYGUARD_MED_TOOL_TOKEN", "").strip()
|
| 313 |
+
if token:
|
| 314 |
+
headers["Authorization"] = f"Bearer {token}"
|
| 315 |
+
try:
|
| 316 |
+
resp = requests.post(
|
| 317 |
+
url,
|
| 318 |
+
json={"drug_names": drug_names},
|
| 319 |
+
headers=headers,
|
| 320 |
+
timeout=18,
|
| 321 |
+
)
|
| 322 |
+
if resp.status_code >= 400:
|
| 323 |
+
logger.warning("med_tool_http_%s", resp.status_code)
|
| 324 |
+
return []
|
| 325 |
+
payload = resp.json()
|
| 326 |
+
except Exception as exc: # noqa: BLE001
|
| 327 |
+
logger.warning("med_tool_request_failed: %s", exc)
|
| 328 |
+
return []
|
| 329 |
+
if not isinstance(payload, dict):
|
| 330 |
+
return []
|
| 331 |
+
raw = payload.get("suggestions")
|
| 332 |
+
if not isinstance(raw, list):
|
| 333 |
+
return []
|
| 334 |
+
cleaned: list[dict[str, Any]] = []
|
| 335 |
+
for item in raw:
|
| 336 |
+
if isinstance(item, dict) and item.get("display_name"):
|
| 337 |
+
row = dict(item)
|
| 338 |
+
row["source_detail"] = str(row.get("source_detail") or "external_tool")
|
| 339 |
+
cleaned.append(row)
|
| 340 |
+
elif isinstance(item, str) and item.strip():
|
| 341 |
+
cleaned.append(
|
| 342 |
+
{
|
| 343 |
+
"display_name": item.strip(),
|
| 344 |
+
"generic_names": [],
|
| 345 |
+
"brand_names": [],
|
| 346 |
+
"routes": [],
|
| 347 |
+
"adverse_reactions_snippet": None,
|
| 348 |
+
"label_link": None,
|
| 349 |
+
"source_detail": "external_tool",
|
| 350 |
+
}
|
| 351 |
+
)
|
| 352 |
+
return cleaned
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def build_alternatives_response(
|
| 356 |
+
query_drug: str | None,
|
| 357 |
+
regimen_drugs: list[str],
|
| 358 |
+
max_suggestions: int,
|
| 359 |
+
) -> dict[str, Any]:
|
| 360 |
+
errors: list[str] = []
|
| 361 |
+
regimen_clean = [str(x).strip() for x in regimen_drugs if str(x).strip()][:40]
|
| 362 |
+
focus = _resolve_focus_drug(query_drug, regimen_clean)
|
| 363 |
+
exclude = {x.lower() for x in regimen_clean}
|
| 364 |
+
if focus:
|
| 365 |
+
exclude.add(focus.lower())
|
| 366 |
+
|
| 367 |
+
external_rows: list[dict[str, Any]] = []
|
| 368 |
+
ext = _external_suggestions([focus] if focus else regimen_clean[:5])
|
| 369 |
+
if ext is not None:
|
| 370 |
+
external_rows = ext
|
| 371 |
+
|
| 372 |
+
if not focus and not regimen_clean:
|
| 373 |
+
return {
|
| 374 |
+
"focus_drug": "",
|
| 375 |
+
"therapeutic_class": None,
|
| 376 |
+
"suggestions": external_rows,
|
| 377 |
+
"source": "external" if external_rows else "none",
|
| 378 |
+
"disclaimer": _DEFAULT_DISCLAIMER,
|
| 379 |
+
"errors": ["Enter a drug name or load drugs from the current episode."],
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
if not focus and regimen_clean:
|
| 383 |
+
return {
|
| 384 |
+
"focus_drug": "",
|
| 385 |
+
"therapeutic_class": None,
|
| 386 |
+
"therapeutic_class_field": None,
|
| 387 |
+
"suggestions": external_rows,
|
| 388 |
+
"source": "external" if external_rows else "none",
|
| 389 |
+
"disclaimer": _DEFAULT_DISCLAIMER,
|
| 390 |
+
"errors": [
|
| 391 |
+
"Several medications are on this regimen; pick a focus row in the UI (or pass query_drug). "
|
| 392 |
+
"The server does not guess the first medication anymore.",
|
| 393 |
+
],
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
# SPL name/substance search: normalize simulator tokens first (benzodiazepine_like → benzodiazepine).
|
| 397 |
+
lookup = _normalize_simulator_query(focus) or focus.strip()
|
| 398 |
+
label = _search_label_for_name(lookup)
|
| 399 |
+
ofb = _openfda_block(label)
|
| 400 |
+
pharm_field, pharm = _pick_pharm_class(ofb)
|
| 401 |
+
|
| 402 |
+
openfda_rows: list[dict[str, Any]] = []
|
| 403 |
+
if pharm and pharm_field:
|
| 404 |
+
openfda_rows = _suggestions_for_class(pharm_field, pharm, exclude, max_suggestions)
|
| 405 |
+
if not openfda_rows:
|
| 406 |
+
# Simulator tokens (e.g. benzodiazepine_like) or class keywords: try FDA class directly.
|
| 407 |
+
for cand in _class_search_variants(focus):
|
| 408 |
+
rows = _suggestions_by_class_probe("pharm_class_epc", cand, exclude, max_suggestions)
|
| 409 |
+
if rows:
|
| 410 |
+
pharm_field, pharm = "pharm_class_epc", cand
|
| 411 |
+
openfda_rows = rows
|
| 412 |
+
break
|
| 413 |
+
if not openfda_rows:
|
| 414 |
+
for cand in _class_search_variants(focus):
|
| 415 |
+
rows = _suggestions_by_class_probe("pharm_class_cs", cand, exclude, max_suggestions)
|
| 416 |
+
if rows:
|
| 417 |
+
pharm_field, pharm = "pharm_class_cs", cand
|
| 418 |
+
openfda_rows = rows
|
| 419 |
+
break
|
| 420 |
+
|
| 421 |
+
if not openfda_rows:
|
| 422 |
+
if not (pharm and pharm_field):
|
| 423 |
+
errors.append(
|
| 424 |
+
"Could not match this text to an FDA SPL (generic/brand/substance) or pharmacologic class. "
|
| 425 |
+
"Try a generic name (e.g. diazepam), a class keyword (e.g. benzodiazepine), or load from episode.",
|
| 426 |
+
)
|
| 427 |
+
elif not external_rows:
|
| 428 |
+
errors.append(
|
| 429 |
+
"No labeled products returned for this query (try another spelling or a broader class keyword).",
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
merged: list[dict[str, Any]] = []
|
| 433 |
+
seen_keys: set[str] = set()
|
| 434 |
+
for row in external_rows + openfda_rows:
|
| 435 |
+
display = str(row.get("display_name", "")).strip()
|
| 436 |
+
if not display:
|
| 437 |
+
continue
|
| 438 |
+
generics = [str(g).lower() for g in (row.get("generic_names") or []) if g]
|
| 439 |
+
dedupe_key = generics[0] if generics else display.lower()
|
| 440 |
+
if dedupe_key in seen_keys:
|
| 441 |
+
continue
|
| 442 |
+
seen_keys.add(dedupe_key)
|
| 443 |
+
merged.append(row)
|
| 444 |
+
if len(merged) >= max_suggestions:
|
| 445 |
+
break
|
| 446 |
+
|
| 447 |
+
source = "openfda"
|
| 448 |
+
if external_rows and openfda_rows:
|
| 449 |
+
source = "mixed"
|
| 450 |
+
elif external_rows and not openfda_rows:
|
| 451 |
+
source = "external"
|
| 452 |
+
elif not external_rows and not openfda_rows:
|
| 453 |
+
source = "none"
|
| 454 |
+
|
| 455 |
+
return {
|
| 456 |
+
"focus_drug": focus,
|
| 457 |
+
"therapeutic_class": pharm,
|
| 458 |
+
"therapeutic_class_field": pharm_field,
|
| 459 |
+
"suggestions": merged,
|
| 460 |
+
"source": source,
|
| 461 |
+
"disclaimer": _DEFAULT_DISCLAIMER,
|
| 462 |
+
"errors": errors,
|
| 463 |
+
}
|
app/ui/frontend/src/App.tsx
CHANGED
|
@@ -20,6 +20,7 @@ import type {
|
|
| 20 |
StepResponse,
|
| 21 |
TaskPreset,
|
| 22 |
} from "./lib/types";
|
|
|
|
| 23 |
import MetaverseBackdrop from "./components/MetaverseBackdrop";
|
| 24 |
|
| 25 |
type WorkbenchMode = "agent" | "env";
|
|
@@ -887,6 +888,18 @@ export default function App() {
|
|
| 887 |
const activeInfo = mode === "agent" ? agentInfo : envInfo;
|
| 888 |
const activeTerminationReason = shortValue(activeInfo?.termination_reason);
|
| 889 |
const terminationReason = activeTerminationReason !== "-" ? activeTerminationReason : null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 890 |
const heroStats: Array<[string, string]> = [
|
| 891 |
["Runtime", mode === "agent" ? "Agent Workbench" : "Env Explorer"],
|
| 892 |
["Scenario", taskLabel(taskId, catalog.task_presets)],
|
|
@@ -1164,6 +1177,7 @@ export default function App() {
|
|
| 1164 |
<DetailPanel title="Explanation" data={mode === "agent" ? explanation : null} />
|
| 1165 |
<DetailPanel title="Evidence" data={mode === "agent" ? (isRecord(evidence) || Array.isArray(evidence) ? evidence : null) : null} />
|
| 1166 |
<EventLog events={events} error={error} />
|
|
|
|
| 1167 |
</main>
|
| 1168 |
<QTips
|
| 1169 |
open={tipsOpen}
|
|
|
|
| 20 |
StepResponse,
|
| 21 |
TaskPreset,
|
| 22 |
} from "./lib/types";
|
| 23 |
+
import AlternativeMedicineSearch from "./components/AlternativeMedicineSearch";
|
| 24 |
import MetaverseBackdrop from "./components/MetaverseBackdrop";
|
| 25 |
|
| 26 |
type WorkbenchMode = "agent" | "env";
|
|
|
|
| 888 |
const activeInfo = mode === "agent" ? agentInfo : envInfo;
|
| 889 |
const activeTerminationReason = shortValue(activeInfo?.termination_reason);
|
| 890 |
const terminationReason = activeTerminationReason !== "-" ? activeTerminationReason : null;
|
| 891 |
+
const regimenForAltTool = useMemo(() => {
|
| 892 |
+
const meds = activeObservation?.medication_table ?? [];
|
| 893 |
+
const names: string[] = [];
|
| 894 |
+
for (const row of meds) {
|
| 895 |
+
const v = row.drug ?? row.drug_id ?? row.name;
|
| 896 |
+
if (typeof v === "string" && v.trim()) {
|
| 897 |
+
names.push(v.trim());
|
| 898 |
+
}
|
| 899 |
+
}
|
| 900 |
+
return names;
|
| 901 |
+
}, [activeObservation]);
|
| 902 |
+
|
| 903 |
const heroStats: Array<[string, string]> = [
|
| 904 |
["Runtime", mode === "agent" ? "Agent Workbench" : "Env Explorer"],
|
| 905 |
["Scenario", taskLabel(taskId, catalog.task_presets)],
|
|
|
|
| 1177 |
<DetailPanel title="Explanation" data={mode === "agent" ? explanation : null} />
|
| 1178 |
<DetailPanel title="Evidence" data={mode === "agent" ? (isRecord(evidence) || Array.isArray(evidence) ? evidence : null) : null} />
|
| 1179 |
<EventLog events={events} error={error} />
|
| 1180 |
+
<AlternativeMedicineSearch regimenDrugNames={regimenForAltTool} />
|
| 1181 |
</main>
|
| 1182 |
<QTips
|
| 1183 |
open={tipsOpen}
|
app/ui/frontend/src/components/AlternativeMedicineSearch.tsx
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useCallback, useEffect, useState } from "react";
|
| 2 |
+
import { API_BASE } from "../lib/constants";
|
| 3 |
+
|
| 4 |
+
type Suggestion = {
|
| 5 |
+
display_name: string;
|
| 6 |
+
generic_names?: string[];
|
| 7 |
+
brand_names?: string[];
|
| 8 |
+
routes?: string[];
|
| 9 |
+
adverse_reactions_snippet?: string | null;
|
| 10 |
+
label_link?: string | null;
|
| 11 |
+
source_detail?: string;
|
| 12 |
+
};
|
| 13 |
+
|
| 14 |
+
type AlternativesResponse = {
|
| 15 |
+
focus_drug: string;
|
| 16 |
+
therapeutic_class: string | null;
|
| 17 |
+
therapeutic_class_field?: string | null;
|
| 18 |
+
suggestions: Suggestion[];
|
| 19 |
+
source: string;
|
| 20 |
+
disclaimer: string;
|
| 21 |
+
errors: string[];
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
type Props = {
|
| 25 |
+
regimenDrugNames: string[];
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
export default function AlternativeMedicineSearch({ regimenDrugNames }: Props) {
|
| 29 |
+
const [query, setQuery] = useState("");
|
| 30 |
+
/** Which row in the current episode regimen drives the FDA "focus" drug (not always the first). */
|
| 31 |
+
const [regimenFocusIndex, setRegimenFocusIndex] = useState(0);
|
| 32 |
+
/** False after user types in the box; true when select/load sets query so regimen row wins on submit. */
|
| 33 |
+
const [focusFromRegimenSelect, setFocusFromRegimenSelect] = useState(true);
|
| 34 |
+
const [loading, setLoading] = useState(false);
|
| 35 |
+
const [error, setError] = useState<string | null>(null);
|
| 36 |
+
const [result, setResult] = useState<AlternativesResponse | null>(null);
|
| 37 |
+
|
| 38 |
+
useEffect(() => {
|
| 39 |
+
if (regimenDrugNames.length === 0) {
|
| 40 |
+
setRegimenFocusIndex(0);
|
| 41 |
+
return;
|
| 42 |
+
}
|
| 43 |
+
setRegimenFocusIndex((prev) => (prev >= regimenDrugNames.length ? 0 : prev));
|
| 44 |
+
}, [regimenDrugNames]);
|
| 45 |
+
|
| 46 |
+
const runSearch = useCallback(
|
| 47 |
+
async (queryDrug: string | undefined, regimen: string[]) => {
|
| 48 |
+
setLoading(true);
|
| 49 |
+
setError(null);
|
| 50 |
+
try {
|
| 51 |
+
const res = await fetch(`${API_BASE}/tools/medication_alternatives`, {
|
| 52 |
+
method: "POST",
|
| 53 |
+
headers: { "Content-Type": "application/json" },
|
| 54 |
+
body: JSON.stringify({
|
| 55 |
+
query_drug: queryDrug?.trim() || null,
|
| 56 |
+
regimen_drugs: regimen,
|
| 57 |
+
max_suggestions: 7,
|
| 58 |
+
}),
|
| 59 |
+
});
|
| 60 |
+
if (!res.ok) {
|
| 61 |
+
const t = await res.text();
|
| 62 |
+
throw new Error(t.slice(0, 200) || `HTTP ${res.status}`);
|
| 63 |
+
}
|
| 64 |
+
setResult((await res.json()) as AlternativesResponse);
|
| 65 |
+
} catch (e) {
|
| 66 |
+
setResult(null);
|
| 67 |
+
setError(e instanceof Error ? e.message : "Request failed");
|
| 68 |
+
} finally {
|
| 69 |
+
setLoading(false);
|
| 70 |
+
}
|
| 71 |
+
},
|
| 72 |
+
[],
|
| 73 |
+
);
|
| 74 |
+
|
| 75 |
+
const safeRegimenIndex =
|
| 76 |
+
regimenDrugNames.length > 0
|
| 77 |
+
? Math.min(Math.max(regimenFocusIndex, 0), regimenDrugNames.length - 1)
|
| 78 |
+
: 0;
|
| 79 |
+
|
| 80 |
+
/** Never send null focus when a regimen exists — avoids API defaulting to regimen[0] (always benzo if first). */
|
| 81 |
+
const resolvedFocusDrug = (): string | undefined => {
|
| 82 |
+
const typed = query.trim();
|
| 83 |
+
const fromList = regimenDrugNames[safeRegimenIndex]?.trim() ?? "";
|
| 84 |
+
if (focusFromRegimenSelect && regimenDrugNames.length > 0) {
|
| 85 |
+
return fromList || typed || undefined;
|
| 86 |
+
}
|
| 87 |
+
return typed || fromList || undefined;
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
const onSubmit = () => {
|
| 91 |
+
void runSearch(resolvedFocusDrug(), regimenDrugNames);
|
| 92 |
+
};
|
| 93 |
+
|
| 94 |
+
const onLoadRegimen = () => {
|
| 95 |
+
const names = regimenDrugNames.length ? regimenDrugNames : [];
|
| 96 |
+
if (!names.length) {
|
| 97 |
+
setError("Reset an episode first so the regimen list is available.");
|
| 98 |
+
return;
|
| 99 |
+
}
|
| 100 |
+
const idx = Math.min(Math.max(regimenFocusIndex, 0), names.length - 1);
|
| 101 |
+
const focus = names[idx] ?? "";
|
| 102 |
+
setRegimenFocusIndex(idx);
|
| 103 |
+
setQuery(focus);
|
| 104 |
+
setFocusFromRegimenSelect(true);
|
| 105 |
+
void runSearch(focus, names);
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
const onRegimenSelectChange = (index: number) => {
|
| 109 |
+
setRegimenFocusIndex(index);
|
| 110 |
+
const name = regimenDrugNames[index]?.trim() ?? "";
|
| 111 |
+
setQuery(name);
|
| 112 |
+
setFocusFromRegimenSelect(true);
|
| 113 |
+
};
|
| 114 |
+
|
| 115 |
+
return (
|
| 116 |
+
<section
|
| 117 |
+
className="panel-surface alt-med-tool panel-wide"
|
| 118 |
+
aria-label="FDA alternatives: openFDA class neighbors"
|
| 119 |
+
>
|
| 120 |
+
<div className="panel-heading">
|
| 121 |
+
<h2>FDA alternatives</h2>
|
| 122 |
+
<span>Tool</span>
|
| 123 |
+
</div>
|
| 124 |
+
{regimenDrugNames.length > 0 ? (
|
| 125 |
+
<label className="field alt-med-tool-regimen-select">
|
| 126 |
+
<span>Focus drug from current regimen</span>
|
| 127 |
+
<select
|
| 128 |
+
aria-label="Select regimen drug for alternatives search"
|
| 129 |
+
value={safeRegimenIndex}
|
| 130 |
+
onChange={(e) => onRegimenSelectChange(Number(e.target.value))}
|
| 131 |
+
>
|
| 132 |
+
{regimenDrugNames.map((name, i) => (
|
| 133 |
+
<option key={`${name}-${i}`} value={i}>
|
| 134 |
+
{name}
|
| 135 |
+
</option>
|
| 136 |
+
))}
|
| 137 |
+
</select>
|
| 138 |
+
</label>
|
| 139 |
+
) : null}
|
| 140 |
+
<div className="alt-med-tool-row">
|
| 141 |
+
<label className="field alt-med-tool-field">
|
| 142 |
+
<span>Drug name (focus)</span>
|
| 143 |
+
<input
|
| 144 |
+
value={query}
|
| 145 |
+
onChange={(e) => {
|
| 146 |
+
setQuery(e.target.value);
|
| 147 |
+
setFocusFromRegimenSelect(false);
|
| 148 |
+
}}
|
| 149 |
+
placeholder="e.g. diazepam, ibuprofen, benzodiazepine (or simulator token like benzodiazepine_like)"
|
| 150 |
+
autoComplete="off"
|
| 151 |
+
/>
|
| 152 |
+
</label>
|
| 153 |
+
<div className="alt-med-tool-actions">
|
| 154 |
+
<button type="button" className="secondary" onClick={onLoadRegimen} disabled={loading}>
|
| 155 |
+
Load from episode
|
| 156 |
+
</button>
|
| 157 |
+
<button type="button" onClick={onSubmit} disabled={loading}>
|
| 158 |
+
{loading ? "Searching…" : "Suggest alternatives"}
|
| 159 |
+
</button>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
<p className="muted small-print alt-med-tool-hint">
|
| 163 |
+
Pick a regimen row, then search. Up to 7 results — scroll the list below.
|
| 164 |
+
</p>
|
| 165 |
+
{error && <div className="error-banner">{error}</div>}
|
| 166 |
+
{result && (
|
| 167 |
+
<div className="alt-med-tool-results">
|
| 168 |
+
{result.errors?.length ? (
|
| 169 |
+
<ul className="alt-med-tool-errors">
|
| 170 |
+
{result.errors.map((msg) => (
|
| 171 |
+
<li key={msg}>{msg}</li>
|
| 172 |
+
))}
|
| 173 |
+
</ul>
|
| 174 |
+
) : null}
|
| 175 |
+
<p className="muted">
|
| 176 |
+
<strong>Focus:</strong> {result.focus_drug || "—"} · <strong>Class:</strong>{" "}
|
| 177 |
+
{result.therapeutic_class ?? "—"}{" "}
|
| 178 |
+
{result.therapeutic_class_field ? <span>({result.therapeutic_class_field})</span> : null} ·{" "}
|
| 179 |
+
<strong>Source:</strong> {result.source}
|
| 180 |
+
</p>
|
| 181 |
+
<div className="alt-med-suggestions-scroll" role="region" aria-label="FDA alternative suggestions, scrollable">
|
| 182 |
+
<ul className="alt-med-suggestion-list">
|
| 183 |
+
{result.suggestions?.length ? (
|
| 184 |
+
result.suggestions.map((s, idx) => (
|
| 185 |
+
<li key={`${s.display_name}-${idx}`} className="alt-med-suggestion">
|
| 186 |
+
<div>
|
| 187 |
+
<strong>{s.display_name}</strong>
|
| 188 |
+
<span className="muted"> · {s.source_detail ?? "openfda"}</span>
|
| 189 |
+
</div>
|
| 190 |
+
{s.routes?.length ? (
|
| 191 |
+
<div className="muted small-print">Route: {s.routes.join(", ")}</div>
|
| 192 |
+
) : null}
|
| 193 |
+
{s.generic_names?.length ? (
|
| 194 |
+
<div className="muted small-print">Generic: {s.generic_names.join(", ")}</div>
|
| 195 |
+
) : null}
|
| 196 |
+
{s.adverse_reactions_snippet ? (
|
| 197 |
+
<div className="alt-med-ar">ADR label excerpt: {s.adverse_reactions_snippet}</div>
|
| 198 |
+
) : null}
|
| 199 |
+
{s.label_link ? (
|
| 200 |
+
<a className="alt-med-link" href={s.label_link} target="_blank" rel="noreferrer">
|
| 201 |
+
DailyMed / label
|
| 202 |
+
</a>
|
| 203 |
+
) : null}
|
| 204 |
+
</li>
|
| 205 |
+
))
|
| 206 |
+
) : (
|
| 207 |
+
<li className="muted">No suggestions yet — try another spelling or load from episode.</li>
|
| 208 |
+
)}
|
| 209 |
+
</ul>
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
)}
|
| 213 |
+
</section>
|
| 214 |
+
);
|
| 215 |
+
}
|
app/ui/frontend/src/styles/theme.css
CHANGED
|
@@ -1138,6 +1138,108 @@ td {
|
|
| 1138 |
}
|
| 1139 |
}
|
| 1140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
::-webkit-scrollbar {
|
| 1142 |
width: 7px;
|
| 1143 |
height: 7px;
|
|
|
|
| 1138 |
}
|
| 1139 |
}
|
| 1140 |
|
| 1141 |
+
.small-print {
|
| 1142 |
+
font-size: 0.78rem;
|
| 1143 |
+
line-height: 1.35;
|
| 1144 |
+
}
|
| 1145 |
+
|
| 1146 |
+
.alt-med-tool {
|
| 1147 |
+
margin-top: 10px;
|
| 1148 |
+
border: 1px dashed rgba(155, 124, 255, 0.35);
|
| 1149 |
+
background: rgba(8, 11, 27, 0.55);
|
| 1150 |
+
}
|
| 1151 |
+
|
| 1152 |
+
.alt-med-tool .panel-heading h2 {
|
| 1153 |
+
font-size: 1.05rem;
|
| 1154 |
+
}
|
| 1155 |
+
|
| 1156 |
+
.alt-med-tool-regimen-select {
|
| 1157 |
+
margin: 0 0 10px;
|
| 1158 |
+
max-width: min(520px, 100%);
|
| 1159 |
+
}
|
| 1160 |
+
|
| 1161 |
+
.alt-med-tool-regimen-select select {
|
| 1162 |
+
width: 100%;
|
| 1163 |
+
}
|
| 1164 |
+
|
| 1165 |
+
.alt-med-tool-hint {
|
| 1166 |
+
margin: 8px 0 0;
|
| 1167 |
+
max-width: 960px;
|
| 1168 |
+
}
|
| 1169 |
+
|
| 1170 |
+
.alt-med-tool-row {
|
| 1171 |
+
display: flex;
|
| 1172 |
+
flex-wrap: wrap;
|
| 1173 |
+
gap: 12px;
|
| 1174 |
+
align-items: flex-end;
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
.alt-med-tool-field {
|
| 1178 |
+
flex: 1 1 220px;
|
| 1179 |
+
margin: 0;
|
| 1180 |
+
}
|
| 1181 |
+
|
| 1182 |
+
.alt-med-tool-actions {
|
| 1183 |
+
display: flex;
|
| 1184 |
+
flex-wrap: wrap;
|
| 1185 |
+
gap: 8px;
|
| 1186 |
+
}
|
| 1187 |
+
|
| 1188 |
+
.alt-med-tool-results {
|
| 1189 |
+
margin-top: 12px;
|
| 1190 |
+
}
|
| 1191 |
+
|
| 1192 |
+
.alt-med-tool-errors {
|
| 1193 |
+
color: var(--warning);
|
| 1194 |
+
font-size: 0.85rem;
|
| 1195 |
+
}
|
| 1196 |
+
|
| 1197 |
+
.alt-med-suggestions-scroll {
|
| 1198 |
+
margin-top: 8px;
|
| 1199 |
+
max-height: 17.5rem;
|
| 1200 |
+
overflow-y: auto;
|
| 1201 |
+
overflow-x: hidden;
|
| 1202 |
+
padding-right: 4px;
|
| 1203 |
+
border-radius: 12px;
|
| 1204 |
+
border: 1px solid var(--line-soft);
|
| 1205 |
+
background: rgba(5, 8, 20, 0.35);
|
| 1206 |
+
}
|
| 1207 |
+
|
| 1208 |
+
.alt-med-suggestion-list {
|
| 1209 |
+
list-style: none;
|
| 1210 |
+
margin: 0;
|
| 1211 |
+
padding: 8px;
|
| 1212 |
+
display: flex;
|
| 1213 |
+
flex-direction: column;
|
| 1214 |
+
gap: 6px;
|
| 1215 |
+
}
|
| 1216 |
+
|
| 1217 |
+
.alt-med-suggestion {
|
| 1218 |
+
padding: 8px 10px;
|
| 1219 |
+
border-radius: 10px;
|
| 1220 |
+
border: 1px solid var(--line-soft);
|
| 1221 |
+
background: rgba(13, 16, 35, 0.45);
|
| 1222 |
+
flex-shrink: 0;
|
| 1223 |
+
}
|
| 1224 |
+
|
| 1225 |
+
.alt-med-ar {
|
| 1226 |
+
margin-top: 4px;
|
| 1227 |
+
font-size: 0.76rem;
|
| 1228 |
+
color: var(--muted);
|
| 1229 |
+
line-height: 1.35;
|
| 1230 |
+
display: -webkit-box;
|
| 1231 |
+
-webkit-box-orient: vertical;
|
| 1232 |
+
-webkit-line-clamp: 2;
|
| 1233 |
+
overflow: hidden;
|
| 1234 |
+
}
|
| 1235 |
+
|
| 1236 |
+
.alt-med-link {
|
| 1237 |
+
display: inline-block;
|
| 1238 |
+
margin-top: 6px;
|
| 1239 |
+
font-size: 0.82rem;
|
| 1240 |
+
color: var(--accent-2);
|
| 1241 |
+
}
|
| 1242 |
+
|
| 1243 |
::-webkit-scrollbar {
|
| 1244 |
width: 7px;
|
| 1245 |
height: 7px;
|
docker/space/README.md
CHANGED
|
@@ -12,28 +12,46 @@ Never commit or paste Hugging Face tokens into chat or the repo. If a token was
|
|
| 12 |
|
| 13 |
```bash
|
| 14 |
cd polyguard-rl
|
| 15 |
-
docker build -
|
| 16 |
```
|
| 17 |
|
| 18 |
-
3. Push the Space repo
|
| 19 |
|
| 20 |
-
|
| 21 |
|
| 22 |
-
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
## Runtime
|
| 27 |
|
| 28 |
- **Port:** Space sets `PORT` (default `7860`). Nginx listens on `PORT` and routes `/api/*` → API, `/ws` → OpenEnv WebSocket, `/` → built React app.
|
| 29 |
-
- **First boot:** If `checkpoints/active/grpo_adapter` is missing, `entrypoint.sh` runs `scripts/install_hf_active_bundle.py`
|
| 30 |
- **CORS:** Set via `POLYGUARD_ALLOW_HF_SPACE_CORS=true` (default in the Space Dockerfile).
|
| 31 |
|
| 32 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
| Name
|
| 35 |
-
|-----------|-----|
|
| 36 |
-
| `HF_TOKEN` |
|
| 37 |
|
| 38 |
## Local smoke (same as Space)
|
| 39 |
|
|
|
|
| 12 |
|
| 13 |
```bash
|
| 14 |
cd polyguard-rl
|
| 15 |
+
docker build -t polyguard-space .
|
| 16 |
```
|
| 17 |
|
| 18 |
+
3. Push the Space repo. The root **`Dockerfile`** is the full demo (Vite UI + nginx + API + OpenEnv). Hugging Face uses it automatically when **Dockerfile path** is empty. If your Space was created earlier with a different Dockerfile, trigger **Factory reboot** after pushing so the new image builds.
|
| 19 |
|
| 20 |
+
4. Commit and push to the Space repository. HF builds the image on their builders (you do not need to `docker push` to Docker Hub for standard Spaces).
|
| 21 |
|
| 22 |
+
## FDA panel / latest UI missing on the live Space
|
| 23 |
|
| 24 |
+
Pushing code to GitHub alone does **not** refresh `huggingface.co/spaces/...` unless that Space is connected to the same repo **and** rebuilds from the branch that has your UI (for example `fda` vs `main`). This repo’s usual demo path is **upload via Hub API**:
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
cd polyguard-rl
|
| 28 |
+
export HF_TOKEN="hf_..." # write token; never commit it
|
| 29 |
+
uv run python scripts/deploy_space_api.py --repo-id TheJackBright/polyguard-openenv
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
Wait for **Build** in the Space logs to finish, then use **Factory reboot** or a hard browser refresh if the page still looks old. **Dockerfile path** should be empty (default `Dockerfile`) or `Dockerfile` / `Dockerfile.space`. If the Space uses the **full monorepo** as its Git root, set Dockerfile path to the repo-root `Dockerfile` or to `polyguard-rl/Dockerfile`.
|
| 33 |
|
| 34 |
## Runtime
|
| 35 |
|
| 36 |
- **Port:** Space sets `PORT` (default `7860`). Nginx listens on `PORT` and routes `/api/*` → API, `/ws` → OpenEnv WebSocket, `/` → built React app.
|
| 37 |
+
- **First boot:** If `checkpoints/active/grpo_adapter` is missing, `entrypoint.sh` runs `scripts/install_hf_active_bundle.py`. That pulls `TheJackBright/polyguard-openenv-training-full-artifacts` (slow, ~1.1 GB).
|
| 38 |
- **CORS:** Set via `POLYGUARD_ALLOW_HF_SPACE_CORS=true` (default in the Space Dockerfile).
|
| 39 |
|
| 40 |
+
## If logs show `401` / `RepositoryNotFoundError` on startup
|
| 41 |
+
|
| 42 |
+
The artifact **model repo is private, gated, or needs a license click** while anonymous downloads are blocked. The UI can still “work” using the **heuristic ranker** and public base models, but **your trained bundle is not installed**.
|
| 43 |
+
|
| 44 |
+
**Fix (pick one):**
|
| 45 |
+
|
| 46 |
+
1. **Space secret (recommended):** Space → **Settings** → **Secrets** → add **`HF_TOKEN`** = a [read token](https://huggingface.co/settings/tokens) that can access `polyguard-openenv-training-full-artifacts`. Restart the Space.
|
| 47 |
+
2. **Hub settings:** Make that model repo **public**, or ensure **gated** access allows the token you use in (1).
|
| 48 |
+
3. **Ignore:** Leave as-is if ranker-only behavior is enough for the demo.
|
| 49 |
+
|
| 50 |
+
## Secrets
|
| 51 |
|
| 52 |
+
| Name | Use |
|
| 53 |
+
|------------|-----|
|
| 54 |
+
| `HF_TOKEN` | **Required** if the artifact repo is not anonymously readable; `huggingface_hub` reads it automatically. |
|
| 55 |
|
| 56 |
## Local smoke (same as Space)
|
| 57 |
|
docs/DEMO_RECORDING_SCRIPT.md
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PolyGuard Space UI — demo recording script (shot-by-shot)
|
| 2 |
+
|
| 3 |
+
Use this document while screen-recording the Hugging Face Space (or local Docker). Target length: **8–14 minutes** for a full pass, or **3–5 minutes** for a highlights reel.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Before you hit record
|
| 8 |
+
|
| 9 |
+
1. **Open the Space** in a clean browser profile or incognito (fewer extensions → fewer glitches).
|
| 10 |
+
2. **Set resolution**: 1920×1080 or 1440×900; browser zoom **100%**.
|
| 11 |
+
3. **Fullscreen** the Space iframe or use HF “Open in new tab” so the URL bar shows the Space domain.
|
| 12 |
+
4. **Wait for cold start**: first load may download the model bundle (several minutes). The **Event Log** and **Model Truth** panel will tell you if the policy failed to load (heuristic fallback is still usable for env steps).
|
| 13 |
+
5. **Optional**: hide mouse cursor in OBS if you prefer; otherwise move slowly and pause **2 seconds** on each panel after major clicks.
|
| 14 |
+
|
| 15 |
+
**Primary Space (product):** `https://huggingface.co/spaces/TheJackBright/polyguard-openenv`
|
| 16 |
+
Runtime: nginx fronts the **product API** (default `8200`) and **OpenEnv service** (`8100`); see `docker/space/entrypoint.sh`.
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## Where the model lives (Qwen and artifacts)
|
| 21 |
+
|
| 22 |
+
This matters for what you say on camera.
|
| 23 |
+
|
| 24 |
+
| Location | What it is |
|
| 25 |
+
| --- | --- |
|
| 26 |
+
| **On the Space container** | Working directory `/app` (see `entrypoint.sh`: `cd /app`). |
|
| 27 |
+
| **Downloaded bundle** | If `checkpoints/active/grpo_adapter/adapter_config.json` is missing at boot, `scripts/install_hf_active_bundle.py` pulls the **HF usable model bundle** into `checkpoints/active/`. |
|
| 28 |
+
| **Typical layout after install** | `checkpoints/active/active_model_manifest.json` — which artifact is active (often **GRPO adapter** on top of base). |
|
| 29 |
+
| **Weights** | `checkpoints/active/grpo_adapter/` (LoRA/PEFT), optionally `checkpoints/active/merged/` (full merged weights), `checkpoints/active/sft_adapter/`. |
|
| 30 |
+
| **Base model name** | Usually **`Qwen/Qwen2.5-0.5B-Instruct`** as the Transformers base for adapters (set via env e.g. `POLYGUARD_HF_MODEL`). |
|
| 31 |
+
|
| 32 |
+
**What the UI proves:** the **Model Truth** panel calls **`GET /policy/model_status`** (product API). It shows `model_id` / `base_model`, `run_id`, `preferred_artifact` / `loaded_source`, and availability flags. Say on camera: *“This is live from the API, not hard-coded in the frontend.”*
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
+
## UI map (what appears on screen)
|
| 37 |
+
|
| 38 |
+
| Region | Purpose |
|
| 39 |
+
| --- | --- |
|
| 40 |
+
| **Hero** (“PolyGuard neural safety cockpit”) | Marketing copy + quick stats. |
|
| 41 |
+
| **Top bar** | **Agent Workbench** vs **Env Explorer**, **Task** dropdown, **Reset Episode**, **Q Tips**. |
|
| 42 |
+
| **Status chips** | “Live” / model line; in Env mode one chip reads **ws env** (WebSocket to OpenEnv). |
|
| 43 |
+
| **Model Truth** | Qwen / artifact / run / availability. |
|
| 44 |
+
| **Advanced strip** | Only if Task = **Advanced** — pick raw `difficulty` + `sub_environment`. |
|
| 45 |
+
| **Episode Overview** | Mode, task, difficulty, environment, step budget, last reward, patient id, **Patient Summary**, **Risk Delta**. |
|
| 46 |
+
| **Candidate Actions** | Legal moves: `candidate_id`, action type, target/replacement, estimated safety delta (or **Blocked**). |
|
| 47 |
+
| **Action Console** | Confidence, rationale, **Submit** vs **Run Agent** (Agent mode only for Run Agent). |
|
| 48 |
+
| **Reward Channels** | Bars for total + primary + component scores (see below). |
|
| 49 |
+
| **Current Medications** | Cards from observation. |
|
| 50 |
+
| **Action History / Warnings** | Step trace and env warnings. |
|
| 51 |
+
| **Decision / Explanation / Evidence** | **Agent mode only** (filled after API steps that return those fields). |
|
| 52 |
+
| **Event Log** | Human-readable trace of resets, steps, rewards, errors. |
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Feature encyclopedia — every panel, branch, and agent
|
| 57 |
+
|
| 58 |
+
Use this section as a **script appendix** or **judge handout**. It mirrors the React workbench in `app/ui/frontend/src/App.tsx`, the API in `app/api/`, and the orchestrator in `app/agents/orchestrator.py`.
|
| 59 |
+
|
| 60 |
+
### A. How the Space is wired (under the hood)
|
| 61 |
+
|
| 62 |
+
| Piece | Role |
|
| 63 |
+
| --- | --- |
|
| 64 |
+
| **Browser → nginx** | HF Space exposes one origin; nginx routes paths. |
|
| 65 |
+
| **Product API** | Vite uses `API_BASE` (default **`/api`**). FastAPI serves catalog, reset, step_candidate, orchestrate, model_status, reward_breakdown, etc. |
|
| 66 |
+
| **OpenEnv HTTP/WS** | `ENV_BASE` defaults to **same origin** on Spaces (not localhost). Web UI opens **`ws(s)://<origin>/ws`** for Env Explorer. |
|
| 67 |
+
| **Two Python processes** | `entrypoint.sh` starts **uvicorn** for `app.env.fastapi_app` (env, port **8100**) and **uvicorn** for `app.api` (product API, port **8200**). Agent mode reset/step still use the **API’s** in-process `PolyGuardEnv`; Env mode uses the **separate** env service over WebSocket. |
|
| 68 |
+
| **Important** | Agent and Env UIs maintain **separate React state** (`agentObservation` vs `envObservation`). Toggling mode **clears the Event Log** and clears the inactive branch’s episode state so you always know which backend path you are exercising. |
|
| 69 |
+
|
| 70 |
+
### B. Hero (“PolyGuard neural safety cockpit”)
|
| 71 |
+
|
| 72 |
+
| Stat | Source | What to say on camera |
|
| 73 |
+
| --- | --- | --- |
|
| 74 |
+
| **Runtime** | `mode === "agent"` → “Agent Workbench”; else “Env Explorer”. | “This is which transport I am using right now.” |
|
| 75 |
+
| **Scenario** | Human label for current `taskId` from catalog presets or Advanced. | “Which curriculum preset is bound to difficulty + sub-environment.” |
|
| 76 |
+
| **Candidates** | `candidate_action_set.length` from the **active** observation. | “How many legal moves the env is offering after the last reset/step.” |
|
| 77 |
+
| **Reward** | Last scalar reward for the active branch (`null` → shown as `-`). | “Verifier scalar after the last step in this mode only.” |
|
| 78 |
+
|
| 79 |
+
### C. Top bar — every control
|
| 80 |
+
|
| 81 |
+
| Control | Behavior |
|
| 82 |
+
| --- | --- |
|
| 83 |
+
| **Agent Workbench** | Sets `mode` to `agent`. Clears env state, event log, error; clears agent panels if switching from env (see `handleModeChange`). |
|
| 84 |
+
| **Env Explorer** | Sets `mode` to `env`. Clears agent-specific observation/reward/decision/evidence. |
|
| 85 |
+
| **Task** `<select>` | Options: each **task preset** from `GET /env/catalog` (`task_presets`), plus **Advanced**. Changing a preset updates internal `difficulty` + `sub_environment` to match the preset. |
|
| 86 |
+
| **Reset Episode** | **Agent:** `POST /env/reset` with body from preset (`{ task_id }`) or `{ difficulty, sub_environment }`. Refreshes **Model Truth** first. Clears reward breakdown, decision, explanation, evidence, sets default candidate. **Env:** WebSocket `reset` with `{ difficulty, sub_environment }` only (no `task_id` in WS path—preset is flattened to those two fields). **Always** clears `events` at start of reset handler, then appends one “Reset … in agent/env” line. |
|
| 87 |
+
| **Q Tips** | Opens modal walkthrough; highlights DOM nodes with `[data-guide="…"]`. **Skip** stores `polyguard.qtips.v2.seen` in localStorage so first visit auto-opens tips. |
|
| 88 |
+
| **Status chips** | First chip: **Live** if observation loaded and not done, else **Complete** / **Ready**. Second chip: in Agent mode, derived from **`modelSignal()`** (Qwen verified or not); in Env mode shows **`ws env`**. |
|
| 89 |
+
|
| 90 |
+
### D. Model Truth panel — field by field
|
| 91 |
+
|
| 92 |
+
Data from **`GET /policy/model_status`** (`PolicyProviderRouter` / `active_model_status`).
|
| 93 |
+
|
| 94 |
+
| Field in UI | Typical meaning |
|
| 95 |
+
| --- | --- |
|
| 96 |
+
| **Heading label** | “Qwen 0.5B active” only when Space config matches a strict check (enabled + active + availability + model id regex for **Qwen2.5-0.5B-Instruct**); else “Qwen not verified” or Ollama-specific text if Ollama wins locally. |
|
| 97 |
+
| **Detail paragraph** | Human sentence: model name, artifact, `run_id`, optional **load_error**. |
|
| 98 |
+
| **Model** | `model_id` or `base_model` — HF id of the loaded or configured base. |
|
| 99 |
+
| **Run** | `run_id` from manifest / sweep activation (which training bundle). |
|
| 100 |
+
| **Artifact** | `loaded_source` or `preferred_artifact` — e.g. **`grpo_adapter`**, **`merged`**, **`sft_adapter`**. |
|
| 101 |
+
| **Availability** | Key/value pairs from `availability` dict (which load stages succeeded). |
|
| 102 |
+
|
| 103 |
+
**Ollama branch (local dev):** If `status.ollama.enabled && available`, the UI labels **Ollama Qwen active** and mentions `POLYGUARD_PROVIDER_PREFERENCE` order. Spaces Dockerfile sets **`POLYGUARD_ENABLE_OLLAMA=false`** by default.
|
| 104 |
+
|
| 105 |
+
### E. Advanced strip (Task = Advanced)
|
| 106 |
+
|
| 107 |
+
Only rendered when `taskId === "advanced"`. Two selects:
|
| 108 |
+
|
| 109 |
+
1. **Difficulty:** `easy` \| `medium` \| `hard` — passed to reset as `difficulty`.
|
| 110 |
+
2. **Environment:** every string in `catalog.sub_environments` (DDI, BANDIT_MINING, REGIMEN_RISK, PRECISION_DOSING, LONGITUDINAL_DEPRESCRIBING, WEB_SEARCH_MISSING_DATA, ALTERNATIVE_SUGGESTION, NEW_DRUG_DECOMPOSITION).
|
| 111 |
+
|
| 112 |
+
**What each sub-environment stresses (one line each):**
|
| 113 |
+
|
| 114 |
+
| Sub-environment | What the episode emphasizes |
|
| 115 |
+
| --- | --- |
|
| 116 |
+
| **DDI** | Drug–drug interaction exposure and pair risk. |
|
| 117 |
+
| **BANDIT_MINING** | Policy / bandit exploration style scenario (see preset “Bandit Mining”). |
|
| 118 |
+
| **REGIMEN_RISK** | Overall regimen burden and safety tradeoffs. |
|
| 119 |
+
| **PRECISION_DOSING** | Dose buckets, organ-sensitive flags in observation. |
|
| 120 |
+
| **LONGITUDINAL_DEPRESCRIBING** | Multi-step taper / stop sequences over time. |
|
| 121 |
+
| **WEB_SEARCH_MISSING_DATA** | Rewards process fidelity for evidence-fetch actions. |
|
| 122 |
+
| **ALTERNATIVE_SUGGESTION** | Substitution / alternative action types rewarded more. |
|
| 123 |
+
| **NEW_DRUG_DECOMPOSITION** | Hard track: decompose novel drug string into components. |
|
| 124 |
+
|
| 125 |
+
### F. Episode Overview — every KPI and subsection
|
| 126 |
+
|
| 127 |
+
**KPI grid (always eight rows):**
|
| 128 |
+
|
| 129 |
+
| KPI | Source |
|
| 130 |
+
| --- | --- |
|
| 131 |
+
| **Mode** | Literal “Agent Workbench” or “Env Explorer”. |
|
| 132 |
+
| **Task** | Preset label or “Advanced”. |
|
| 133 |
+
| **Difficulty** | `observation.deterministic_contract.difficulty` or `-`. |
|
| 134 |
+
| **Environment** | `deterministic_contract.sub_environment` or `observation.sub_environment`. |
|
| 135 |
+
| **Step Budget** | `observation.step_budget_remaining`. |
|
| 136 |
+
| **Last Reward** | Active branch’s last reward (after reset, Agent clears to `-` until first step). |
|
| 137 |
+
| **Patient** | `patient_summary.patient_id` or `patient_summary.id`. |
|
| 138 |
+
| **Status** | Complete if `done`, else Live if observation exists, else Ready. |
|
| 139 |
+
|
| 140 |
+
**Patient Summary `<dl>`:** First **8** keys of `observation.patient_summary` (keys humanized: underscores → spaces, title case). Typical keys include demographics, allergies, high-level clinical flags—whatever the backend puts on `PolyGuardObservation`.
|
| 141 |
+
|
| 142 |
+
**Risk Delta `<dl>`:** First **8** entries of `observation.burden_score_summary` — burden-related scalars the env uses for reward deltas.
|
| 143 |
+
|
| 144 |
+
### G. Candidate Actions list — each column
|
| 145 |
+
|
| 146 |
+
Each row is one **`CandidateAction`** from `candidate_action_set`.
|
| 147 |
+
|
| 148 |
+
| Column / concept | Meaning |
|
| 149 |
+
| --- | --- |
|
| 150 |
+
| **`candidate_id`** | Stable id (e.g. `cand_…`) — must match when submitting. |
|
| 151 |
+
| **Action label** | Humanized `action_type` (STOP_DRUG, SUBSTITUTE_WITHIN_CLASS, …). |
|
| 152 |
+
| **Third column** | `target_drug` **or** `replacement_drug` **or** `mode` — whichever is most informative. |
|
| 153 |
+
| **Right column** | `estimated_safety_delta` formatted to 3 decimals, or **Blocked** if `legality_precheck === false`. |
|
| 154 |
+
| **Disabled rows** | You cannot select illegal candidates; click does nothing. |
|
| 155 |
+
| **Default selection** | **Agent:** first candidate in list. **Env:** first **legal** candidate that is not `KEEP_REGIMEN` and not `REQUEST_*`, else first legal non–KEEP_REGIMEN, else first in list (`defaultCandidateForMode`). |
|
| 156 |
+
|
| 157 |
+
**Hidden fields you can mention if showing JSON elsewhere:** `dose_bucket`, `taper_days`, `monitoring_plan`, `evidence_query`, `new_drug_name`, `candidate_components`, `uncertainty_score`, `rationale_tags`, `required_monitoring`, `burden_delta`, `disease_stability_estimate`.
|
| 158 |
+
|
| 159 |
+
### H. Action Console — every input and button
|
| 160 |
+
|
| 161 |
+
| UI element | Effect |
|
| 162 |
+
| --- | --- |
|
| 163 |
+
| **Type / Mode / Target / Replacement / Dose / Uncertainty** | Read-only snapshot of the **currently selected** candidate. |
|
| 164 |
+
| **Confidence** | Number input **0.001–0.999** step 0.001; sent as `confidence` on **Submit Candidate** (Agent) or embedded in WS payload (Env). |
|
| 165 |
+
| **Rationale** | Free text → `rationale_brief` / rationale on the action. |
|
| 166 |
+
| **Submit Candidate** (Agent) | Calls `POST /env/step_candidate` with `{ candidate_id, confidence, rationale_brief }`. API finds matching legal action and calls `env.step`. |
|
| 167 |
+
| **Submit Env Step** (Env) | Same confidence/rationale + full action payload built by `buildActionPayload` → WS `step`. |
|
| 168 |
+
| **Run Agent** | **Only when** `mode === "agent"` **and** observation exists **and** not `done`. Calls `POST /agents/orchestrate` with empty JSON body. **Disabled** in Env mode. |
|
| 169 |
+
| **Done notice** | If `done`, shows which mode completed and `termination_reason` from `info` if present. Primary button becomes **Reset Episode** (shortcut). |
|
| 170 |
+
|
| 171 |
+
### I. Reward Channels — every bar (exact keys)
|
| 172 |
+
|
| 173 |
+
The UI renders **exactly these keys** in order (`REWARD_KEYS` in `App.tsx` — **14** rows):
|
| 174 |
+
|
| 175 |
+
| # | Key | Role |
|
| 176 |
+
| --- | --- | --- |
|
| 177 |
+
| 1 | `total_reward` | Weighted aggregate of component scores (`aggregate_rewards` in `reward_scaling.py`). |
|
| 178 |
+
| 2 | `primary_safety_legality` | Roll-up: legality, candidate alignment, anti-cheat, uncertainty calibration (`reward_router.compute_primary_reward_channels`). |
|
| 179 |
+
| 3 | `primary_clinical_improvement` | Roll-up: safety delta, burden improvement, disease stability. |
|
| 180 |
+
| 4 | `primary_dosing_quality` | Roll-up: dosing quality + abstention quality. |
|
| 181 |
+
| 5 | `primary_process_integrity` | Roll-up: format compliance, efficiency, process fidelity, explanation grounding. |
|
| 182 |
+
| 6 | `legality_score` | Action legal per safety verifier. |
|
| 183 |
+
| 7 | `safety_delta_score` | Movement on severe DDI / risk proxy vs pre-step state. |
|
| 184 |
+
| 8 | `burden_improvement_score` | Medication burden before vs after. |
|
| 185 |
+
| 9 | `disease_stability_score` | Stability heuristic vs disruptive action types. |
|
| 186 |
+
| 10 | `dosing_quality_score` | Dose-mode and bucket appropriateness. |
|
| 187 |
+
| 11 | `process_fidelity_score` | Follows intended workflow for sub-environment (e.g. fetch evidence when required). |
|
| 188 |
+
| 12 | `explanation_grounding_score` | Rationale present / grounded. |
|
| 189 |
+
| 13 | `anti_cheat_score` | Collapses when anti-cheat triggers. |
|
| 190 |
+
| 14 | `uncertainty_calibration_score` | Confidence vs uncertainty alignment. |
|
| 191 |
+
|
| 192 |
+
**Note:** `total_reward` is row 1; rows 2–5 are **primary** channels; rows 6–14 are **exposed component** scores. Other components (`format_compliance_score`, `efficiency_score`, `candidate_alignment_score`, `abstention_quality_score`) still exist **in the backend** `RewardBreakdown` and feed primaries + total, but this UI **does not** give them their own bar rows.
|
| 193 |
+
|
| 194 |
+
Bars show **`-`** when the value is missing (no step yet or breakdown not returned). Bar width = value × 100% with value clamped to `[0.001, 0.999]`.
|
| 195 |
+
|
| 196 |
+
**Agent vs breakdown source:** After a step, UI prefers `info.reward_breakdown`; it may also call **`GET /env/reward_breakdown`**. **Env:** uses `info.reward_breakdown` from the WebSocket step packet; if empty, the UI clears the reward panel.
|
| 197 |
+
|
| 198 |
+
### J. Current Medications cards
|
| 199 |
+
|
| 200 |
+
Built from `observation.medication_table[]`. Each card:
|
| 201 |
+
|
| 202 |
+
- **Title:** `drug` / `drug_id` / `name`.
|
| 203 |
+
- **High-risk ribbon:** if `high_risk` or `is_high_risk_elderly` or Beers / warning flags.
|
| 204 |
+
- **Body:** `indication` or `class_name` or `atc_class`.
|
| 205 |
+
- **Meta row:** dose bucket or mg dose; taper vs `monitoring` or `route`.
|
| 206 |
+
|
| 207 |
+
### K. Action History vs Warnings
|
| 208 |
+
|
| 209 |
+
| Panel | Source |
|
| 210 |
+
| --- | --- |
|
| 211 |
+
| **Action History** | `observation.action_history` — each item shows step index and `action_type` / `candidate_id` / reward snippet. |
|
| 212 |
+
| **Warnings** | `observation.warning_summary` — list of human-readable env warnings (DDIs, constraints, etc.). |
|
| 213 |
+
|
| 214 |
+
### L. Decision / Explanation / Evidence (Agent only)
|
| 215 |
+
|
| 216 |
+
Rendered as JSON `<pre>` blocks:
|
| 217 |
+
|
| 218 |
+
| Title | When populated | Content origin |
|
| 219 |
+
| --- | --- | --- |
|
| 220 |
+
| **Decision** | Agent mode only. | **`final_action`** on the packet. For **`step_candidate`**, the API returns the standard **step** payload — **typically no `final_action` field**, so this panel may stay **empty after manual submit**. For **`orchestrate`**, **`final_action`** is the **`PolyGuardAction`** after critic (what actually hit `env.step`). |
|
| 221 |
+
| **Explanation** | Agent mode only. | **`explanation`** — output of **`ExplainerAgent`** after the step (`orchestrate` returns it). Usually **empty** after raw `step_candidate` unless API adds it. |
|
| 222 |
+
| **Evidence** | Agent mode only. | **`evidence`** key on packet. **`orchestrate`** returns **`evidence_out`** from **`EvidenceAgent.run(state)`** (retrieval / web-fallback bundle). **`step_candidate`** does not attach orchestrator evidence — panel often **empty** on manual clicks. |
|
| 223 |
+
|
| 224 |
+
**Demo takeaway:** Tell viewers: *“To populate Decision / Explanation / Evidence in the UI, use **Run Agent** (orchestrate). Manual **Submit Candidate** updates the env and rewards but does not replay the full multi-agent JSON into those three panels.”*
|
| 225 |
+
|
| 226 |
+
### M. Event Log vs Q Tips
|
| 227 |
+
|
| 228 |
+
| Feature | Behavior |
|
| 229 |
+
| --- | --- |
|
| 230 |
+
| **Event Log** | Prepends timestamped strings: resets, each step’s reward line, errors. **Capped** at 24 lines. Cleared when you click **Reset Episode** (handler starts with `setEvents([])` then appends) — *not* the same as mode switch clearing. |
|
| 231 |
+
| **Q Tips** | 10-step overlay; does not mutate env. |
|
| 232 |
+
|
| 233 |
+
### N. Orchestrator — every agent in order (`Run Agent`)
|
| 234 |
+
|
| 235 |
+
When **`POST /agents/orchestrate`** runs, `Orchestrator.run_step` executes:
|
| 236 |
+
|
| 237 |
+
| Step | Agent class | What it does (operator language) |
|
| 238 |
+
| --- | --- | --- |
|
| 239 |
+
| 1 | **`MedRecAgent`** | Summarizes current medication list / reconciliation view for downstream modules. Output key: `medrec`. |
|
| 240 |
+
| 2 | **`EvidenceAgent`** | Retrieves **local evidence** (and optional web fallback) for missing or thin context. Shown in UI **`evidence`** when orchestrating. |
|
| 241 |
+
| 3 | **`GraphSafetyAgent`** | Graph-style **DDI / duplicate therapy** style signals. Output: `graph`. |
|
| 242 |
+
| 4 | **`DosingAgent`** | Flags **dose-sensitive** windows and dosing opportunities. Feeds **`dosing_active`** into supervisor. |
|
| 243 |
+
| 5 | **`CandidateAgent`** | Wraps env **candidate builder** — produces the legal `CandidateAction` list. |
|
| 244 |
+
| 6 | **`SupervisorAgent`** | Chooses planner **mode**: regimen vs dose vs **REVIEW** (conservative routing). |
|
| 245 |
+
| 7 | **Contextual bandit** | **`ContextualBanditPolicy`** (LinUCB or Thompson sampling via `POLYGUARD_BANDIT_ALGO`) proposes **top-k** (`POLYGUARD_BANDIT_TOP_K`) candidates for the planner to consider. |
|
| 246 |
+
| 8 | **`PlannerAgent`** | Calls **`PolicyProviderRouter.select_candidate`** — this is where **Transformers + Qwen + PEFT** (or Ollama, or **safety ranker fallback**) picks a **`candidate_id`** and rationale. |
|
| 247 |
+
| 9 | **`CriticAgent`** | Safety veto / repair. May replace proposed action with a safer **`final_action`**. |
|
| 248 |
+
| 10 | **Replan / debate** (optional) | If `coordination_mode` is `replan_on_veto` or `lightweight_debate` and critic rejects, planner may rerun on **review** candidates; `debate_rounds` increments. |
|
| 249 |
+
| 11 | **`PolyGuardEnv.step`** | Commits **`final_action`**, returns `observation`, `reward`, `done`, `info`. |
|
| 250 |
+
| 12 | **Bandit `update`** | If the chosen candidate was in the bandit pool, **updates bandit statistics with the reward** (learning signal for next orchestrate). |
|
| 251 |
+
| 13 | **`ExplainerAgent`** | Builds **`explanation`** object for audit / UI. |
|
| 252 |
+
|
| 253 |
+
**Environment variables (mention for power users):**
|
| 254 |
+
|
| 255 |
+
| Variable | Effect |
|
| 256 |
+
| --- | --- |
|
| 257 |
+
| **`POLYGUARD_POLICY_STACK`** | `llm+bandit` (default): planner sees **bandit-shortlisted** candidates. `llm-only`: all supervisor-filtered candidates. `bandit-only`: **no LLM** — first bandit pick with fixed rationale. |
|
| 258 |
+
| **`POLYGUARD_BANDIT_*`** | Algorithm, alpha, epsilon, seed, top-k. |
|
| 259 |
+
| **`POLYGUARD_PROVIDER_PREFERENCE`** | e.g. `transformers` vs `ollama` order. |
|
| 260 |
+
| **`POLYGUARD_ENABLE_ACTIVE_MODEL`** | Must be true on Space for bundle path; **`POLYGUARD_HF_MODEL`** sets base id for adapters. |
|
| 261 |
+
|
| 262 |
+
### O. Qwen and fallbacks (planner path)
|
| 263 |
+
|
| 264 |
+
`PolicyProviderRouter` (`app/models/policy/provider_runtime.py`):
|
| 265 |
+
|
| 266 |
+
1. Builds a **JSON instruction** listing candidates and asks for `candidate_id=…; rationale=…`.
|
| 267 |
+
2. Tries providers in **`POLYGUARD_PROVIDER_PREFERENCE`** (default **Transformers** on Space).
|
| 268 |
+
3. Parses model text for a legal `candidate_id`; on failure uses **`safety_ranker`** deterministic ordering.
|
| 269 |
+
|
| 270 |
+
**So:** Even without Qwen load, **Run Agent** still completes using **ranker / bandit** — mention that if Model Truth is red.
|
| 271 |
+
|
| 272 |
+
### P. Full observation contract (API / types)
|
| 273 |
+
|
| 274 |
+
The TypeScript type `EnvObservation` (`lib/types.ts`) lists fields the backend **may** send. The main workbench **highlights** patient summary, medication table, candidates, burden summary, action history, warnings, step budget, and sub-environment. **Not all fields get their own panel** — if you open browser DevTools → Network → `reset` / `step` response, you can narrate extras:
|
| 275 |
+
|
| 276 |
+
| Field | Typical use |
|
| 277 |
+
| --- | --- |
|
| 278 |
+
| `comorbidity_summary` | Comorbidity list for the patient. |
|
| 279 |
+
| `organ_function_summary` | eGFR / hepatic flags for dosing scenarios. |
|
| 280 |
+
| `labs_vitals_summary` | Labs relevant to risk scoring. |
|
| 281 |
+
| `graph_safety_summary` | Aggregated graph / DDI context. |
|
| 282 |
+
| `precision_dosing_flags` | Tags when sub-environment is dosing-heavy. |
|
| 283 |
+
| `unresolved_conflicts` | Specialist conflict strings. |
|
| 284 |
+
| `abstention_indicators` | When the env suggests review / abstain. |
|
| 285 |
+
| `deterministic_contract` | Difficulty + sub-environment + scenario id contract for reproducibility. |
|
| 286 |
+
|
| 287 |
+
### Q. Q Tips — copy for each slide (matches `GUIDE_STEPS`)
|
| 288 |
+
|
| 289 |
+
| # | Title | Body (read aloud or paraphrase) |
|
| 290 |
+
| --- | --- | --- |
|
| 291 |
+
| 1 | Start here | PolyGuard is an interactive OpenEnv workbench; top bar picks runtime, scenario, reset. |
|
| 292 |
+
| 2 | Choose the runtime | Agent Workbench = REST API + reward breakdown + Qwen path; Env Explorer = WebSocket to OpenEnv. |
|
| 293 |
+
| 3 | Pick a scenario | Presets load real patient/regimen state from backend. |
|
| 294 |
+
| 4 | Check the model truth | `/policy/model_status`; Qwen only “verified” when API says adapters live. |
|
| 295 |
+
| 5 | Read the episode state | Task, patient, step budget, reward, risk delta from latest env response. |
|
| 296 |
+
| 6 | Review legal actions | Candidate rows = legal moves; inspect safety delta and mode. |
|
| 297 |
+
| 7 | Submit or ask the agent | Submit Candidate vs Run Agent; check model panel before claiming LLM. |
|
| 298 |
+
| 8 | Inspect reward channels | Real scorer output per channel; empty = no step yet. |
|
| 299 |
+
| 9 | Track regimen changes | Medication cards + history + warnings = not canned. |
|
| 300 |
+
| 10 | Follow the run | Event log shows resets, steps, rewards, errors plainly. |
|
| 301 |
+
|
| 302 |
+
---
|
| 303 |
+
|
| 304 |
+
## Agent Workbench vs Env Explorer (say this exactly on camera)
|
| 305 |
+
|
| 306 |
+
| | **Agent Workbench** | **Env Explorer** |
|
| 307 |
+
| --- | --- | --- |
|
| 308 |
+
| **Reset** | `POST /env/reset` with task preset (e.g. `{ "task_id": "easy_screening" }`) via product API. | WebSocket `reset` message to OpenEnv **`/ws`** with `{ difficulty, sub_environment }`. |
|
| 309 |
+
| **Submit** | `POST /env/step_candidate` — product API resolves `candidate_id` + your confidence + rationale into a full action and steps the **same** in-process `PolyGuardEnv`. | WebSocket `step` — payload built from selected candidate; talks **directly** to OpenEnv service. |
|
| 310 |
+
| **Run Agent** | **`POST /agents/orchestrate`** — runs the full **orchestrator** (med rec, evidence, graph, dosing, candidates, supervisor, bandit, **planner/LLM**, critic, env step, explainer). | Button **disabled** — there is no orchestrator path over raw WS-only mode in this UI. |
|
| 311 |
+
| **Decision / Explanation / Evidence panels** | **Populated** after orchestrate or after steps that echo `final_action` / `explanation` / `evidence` (orchestrate returns rich `evidence` from `EvidenceAgent` pipeline). | **Always empty** in the UI by design — those panels are `null` in Env mode (`App.tsx` only passes agent-mode state to DetailPanels). |
|
| 312 |
+
| **Reward breakdown** | From step `info.reward_breakdown` or fallback `GET /env/reward_breakdown`. | From WS step packet `info.reward_breakdown` when present. |
|
| 313 |
+
| **Switching mode** | Clears the **Event Log** and resets the other mode’s transient state — mention that so viewers don’t think it’s a bug. | Same. |
|
| 314 |
+
|
| 315 |
+
**One-liner for judges:** *“Agent Workbench is the full product API plus optional LLM-orchestrated policy; Env Explorer is the raw OpenEnv WebSocket contract for the same underlying environment.”*
|
| 316 |
+
|
| 317 |
+
---
|
| 318 |
+
|
| 319 |
+
## Reward channels — what they mean and how they’re computed (talk track)
|
| 320 |
+
|
| 321 |
+
Rewards are **verifier-backed**, **bounded** to roughly **`[0.001, 0.999]`** (3 decimal places in UI).
|
| 322 |
+
|
| 323 |
+
### Four primary channels (high level)
|
| 324 |
+
|
| 325 |
+
These are **averages of component groups** (`app/env/reward_router.py` — `compute_primary_reward_channels`):
|
| 326 |
+
|
| 327 |
+
1. **`primary_safety_legality`** — legality, candidate id alignment, anti-cheat, uncertainty calibration.
|
| 328 |
+
2. **`primary_clinical_improvement`** — safety delta vs severe pairs, burden improvement, disease stability.
|
| 329 |
+
3. **`primary_dosing_quality`** — dosing quality + abstention (e.g. appropriate review requests under uncertainty).
|
| 330 |
+
4. **`primary_process_integrity`** ��� format compliance, efficiency (step budget), process fidelity, explanation grounding.
|
| 331 |
+
|
| 332 |
+
### Components (examples — `compute_reward_breakdown`)
|
| 333 |
+
|
| 334 |
+
The environment builds scores such as:
|
| 335 |
+
|
| 336 |
+
- **`legality_score`**: high if the action is legal per safety report.
|
| 337 |
+
- **`safety_delta_score` / `burden_improvement_score`**: from **before/after** burden and severe DDI pair counts (`_delta_to_reward`).
|
| 338 |
+
- **`anti_cheat_score`**: collapses if anti-cheat flags the trajectory.
|
| 339 |
+
- **`uncertainty_calibration_score`**: penalizes overconfidence vs modeled uncertainty.
|
| 340 |
+
- **Sub-environment tweaks**: e.g. `WEB_SEARCH_MISSING_DATA` boosts process fidelity when using `FETCH_EXTERNAL_EVIDENCE`; `NEW_DRUG_DECOMPOSITION` rewards decomposition actions with components.
|
| 341 |
+
|
| 342 |
+
Then components are **scaled/clamped**, **primary channels** recomputed, and **`total_reward`** = weighted aggregate (`aggregate_rewards`).
|
| 343 |
+
|
| 344 |
+
**Demo line:** *“Bars update only after a real step — empty fields mean we haven’t stepped yet, not fake filler.”*
|
| 345 |
+
|
| 346 |
+
---
|
| 347 |
+
|
| 348 |
+
## Built-in **Q Tips** (on-screen tour)
|
| 349 |
+
|
| 350 |
+
Click **Q Tips** in the top bar. The app cycles **10 slides** (`App.tsx` → `GUIDE_STEPS`):
|
| 351 |
+
|
| 352 |
+
1. Start here — top bar, scenarios, reset.
|
| 353 |
+
2. Choose the runtime — Agent vs Env.
|
| 354 |
+
3. Pick a scenario — presets load real patient/regimen state.
|
| 355 |
+
4. Check the model truth — `/policy/model_status`.
|
| 356 |
+
5. Read episode state — overview + patient summary.
|
| 357 |
+
6. Review legal actions — candidates.
|
| 358 |
+
7. Submit or ask the agent — Submit vs Run Agent.
|
| 359 |
+
8. Inspect reward channels.
|
| 360 |
+
9. Medications + history/warnings.
|
| 361 |
+
10. Event log — errors and connectivity.
|
| 362 |
+
|
| 363 |
+
**Recording tip:** Record **Q Tips** once in full voiceover (“I’ll use the in-app tour…”) then dismiss and do the live walkthrough below.
|
| 364 |
+
|
| 365 |
+
---
|
| 366 |
+
|
| 367 |
+
## Shot-by-shot recording script
|
| 368 |
+
|
| 369 |
+
### Scene 0 — Intro (30–45 s)
|
| 370 |
+
|
| 371 |
+
**Action:** Scroll slightly so hero + top bar are visible.
|
| 372 |
+
**Say:** *“This is PolyGuard on Hugging Face Spaces: an OpenEnv workbench for polypharmacy safety. The backend runs a real `PolyGuardEnv` with verifiable rewards; the UI can drive it through the product API or raw OpenEnv WebSockets.”*
|
| 373 |
+
|
| 374 |
+
---
|
| 375 |
+
|
| 376 |
+
### Scene 1 — Model Truth (45–60 s)
|
| 377 |
+
|
| 378 |
+
**Action:** Stay on **Agent Workbench**. Click nothing yet; point at **Model Truth**.
|
| 379 |
+
**Say:** *“Model Truth is live from `/policy/model_status`. Here we see the base model—typically Qwen 2.5 0.5B Instruct—which artifact is loaded—often the GRPO adapter—and the run id. On Spaces, weights are under `/app/checkpoints/active` after the bundle installer runs.”*
|
| 380 |
+
|
| 381 |
+
**If panel shows unavailable:** *“Cold start or CPU load can delay the bundle; the environment still works for manual candidate submission; Run Agent may fall back to non-LLM routing depending on config.”*
|
| 382 |
+
|
| 383 |
+
---
|
| 384 |
+
|
| 385 |
+
### Scene 2 — Easy task, manual submit (Agent) (90–120 s)
|
| 386 |
+
|
| 387 |
+
**Action:** Task → **Easy Screening** (DDI, easy). **Reset Episode.**
|
| 388 |
+
**Say:** *“Easy Screening fixes difficulty easy and sub-environment DDI—drug–drug interaction screening.”*
|
| 389 |
+
|
| 390 |
+
**Action:** Pan **Episode Overview** — read **Patient Summary** and **Risk Delta** aloud briefly.
|
| 391 |
+
**Say:** *“This patient block and risk delta come straight from the observation object.”*
|
| 392 |
+
|
| 393 |
+
**Action:** **Candidate Actions** — click 2–3 rows; show **Blocked** vs legal. Select a **legal** row.
|
| 394 |
+
**Say:** *“Candidates are legal moves from the env; illegal rows are disabled .”*
|
| 395 |
+
|
| 396 |
+
**Action:** **Action Console** — tweak **Confidence** and **Rationale** slightly. Click **Submit Candidate**.
|
| 397 |
+
**Say:** *“Submit Candidate hits `/env/step_candidate` with my chosen legal action, confidence, and rationale.”*
|
| 398 |
+
|
| 399 |
+
**Action:** After response, pause on **Reward Channels** and **Last Reward** in overview.
|
| 400 |
+
**Say:** *“These bars are the verifier breakdown; total reward is the scalar GRPO-style signal we train on.”*
|
| 401 |
+
|
| 402 |
+
**Action:** **Action History** — show one new line. **Event Log** — show the new reward line.
|
| 403 |
+
**Say:** *“History and event log give an audit trail—not a canned animation.”*
|
| 404 |
+
|
| 405 |
+
---
|
| 406 |
+
|
| 407 |
+
### Scene 3 — Run Agent (orchestrator + LLM path) (90–120 s)
|
| 408 |
+
|
| 409 |
+
**Prerequisite:** Prefer recording when Model Truth shows **enabled** and **active** with Qwen artifacts.
|
| 410 |
+
|
| 411 |
+
**Action:** **Reset Episode** again (same or different task). Click **Run Agent**. Wait for completion.
|
| 412 |
+
**Say:** *“Run Agent calls `/agents/orchestrate`. That runs med reconciliation, evidence retrieval, graph safety, dosing hints, candidate generation, supervisor mode, a contextual bandit shortlist, then the planner—here that’s where the loaded Qwen policy can choose among candidates—the critic veto, environment step, and explainer.”*
|
| 413 |
+
|
| 414 |
+
**Action:** Scroll to **Decision**, **Explanation**, **Evidence** JSON panels.
|
| 415 |
+
**Say:** *“These three panels are only populated in Agent Workbench mode. Env Explorer deliberately hides them because the raw WebSocket client doesn’t run the full orchestrator response bundle.”*
|
| 416 |
+
|
| 417 |
+
**Action:** Point at **Evidence** — mention structured retriever output vs empty object if task didn’t fetch.
|
| 418 |
+
**Say:** *“Evidence is whatever the evidence agent produced for this state—grounding for clinician trust.”*
|
| 419 |
+
|
| 420 |
+
---
|
| 421 |
+
|
| 422 |
+
### Scene 4 — Env Explorer contrast (60–90 s)
|
| 423 |
+
|
| 424 |
+
**Action:** Click **Env Explorer**. **Reset Episode** (same task: Easy Screening).
|
| 425 |
+
**Say:** *“Now the UI resets over WebSocket `reset` to the OpenEnv service on port 8100—same scenarios, different transport.”*
|
| 426 |
+
|
| 427 |
+
**Action:** Select a candidate, **Submit Env Step**.
|
| 428 |
+
**Say:** *“Submit Env Step sends a WebSocket `step` with the action payload—no `/agents/orchestrate`.”*
|
| 429 |
+
|
| 430 |
+
**Action:** Scroll to **Decision / Explanation / Evidence** — show they stay **empty** or “No data.”
|
| 431 |
+
**Say:** *“This is intentional: I’m proving the low-level env API, not the full agent stack.”*
|
| 432 |
+
|
| 433 |
+
**Action:** **Event Log** — note new lines tagged from env step.
|
| 434 |
+
|
| 435 |
+
---
|
| 436 |
+
|
| 437 |
+
### Scene 5 — Task variety (2–3 minutes, optional montage)
|
| 438 |
+
|
| 439 |
+
For each preset, do **Reset** + **one** legal **Submit** (Agent mode is enough):
|
| 440 |
+
|
| 441 |
+
| Task | Difficulty | Sub-environment | What to say |
|
| 442 |
+
| --- | --- | --- | --- |
|
| 443 |
+
| **Easy Screening** | easy | DDI | “Fast DDI-focused episode.” |
|
| 444 |
+
| **Budgeted Screening** | medium | REGIMEN_RISK | “More steps, regimen-risk tradeoffs.” |
|
| 445 |
+
| **Complex Tradeoff** | hard | REGIMEN_RISK | “Harder patient draw, tighter budgets.” |
|
| 446 |
+
| **Bandit Mining** | hard | BANDIT_MINING | “Bandit-style policy mining scenario.” |
|
| 447 |
+
|
| 448 |
+
**Action:** Switch Task to **Advanced**. Set e.g. **hard** + **PRECISION_DOSING**. Reset.
|
| 449 |
+
**Say:** *“Advanced exposes every sub-environment enum the backend supports—precision dosing, deprescribing, web-search missing data, alternatives, new-drug decomposition.”*
|
| 450 |
+
|
| 451 |
+
---
|
| 452 |
+
|
| 453 |
+
### Scene 6 — Medications + warnings (45 s)
|
| 454 |
+
|
| 455 |
+
**Action:** After any step with regimen change, show **Current Medications** cards (high-risk styling).
|
| 456 |
+
**Say:** *“Cards mirror `medication_table` from the observation; warnings list is explicit env output.”*
|
| 457 |
+
|
| 458 |
+
---
|
| 459 |
+
|
| 460 |
+
### Scene 7 — Closing (30 s)
|
| 461 |
+
|
| 462 |
+
**Say:** *“That’s the full loop: HF Space hosts OpenEnv + API, Qwen adapters live under checkpoints/active, Agent Workbench demonstrates orchestrated LLM decisions with evidence and explanations, and Env Explorer proves the same environment over raw WebSockets for OpenEnv compatibility.”*
|
| 463 |
+
|
| 464 |
+
---
|
| 465 |
+
|
| 466 |
+
## OBS / QuickTime checklist
|
| 467 |
+
|
| 468 |
+
- [ ] Capture **system audio** if you add voiceover in post; or record mic in OBS.
|
| 469 |
+
- [ ] **1920×1080**, 30 fps (or 60 if you want smooth cursor).
|
| 470 |
+
- [ ] **2 s pause** after each button click before scrolling away.
|
| 471 |
+
- [ ] If Space sleeps, **mouse jiggle** or refresh before recording.
|
| 472 |
+
- [ ] Export **MP4 H.264** for YouTube / HF dataset card.
|
| 473 |
+
|
| 474 |
+
---
|
| 475 |
+
|
| 476 |
+
## Quick troubleshooting on camera (if something breaks)
|
| 477 |
+
|
| 478 |
+
| Symptom | What to say / do |
|
| 479 |
+
| --- | --- |
|
| 480 |
+
| WebSocket errors in Event Log | “Env service reconnect—refresh page; WS URL is derived from the Space origin.” |
|
| 481 |
+
| Run Agent fails | “Check Model Truth—model may still be downloading or Ollama disabled on Space.” |
|
| 482 |
+
| Reward bars all dash | “No step yet—reset and submit once.” |
|
| 483 |
+
| Candidates empty | “Reset episode—env didn’t initialize.” |
|
| 484 |
+
|
| 485 |
+
---
|
| 486 |
+
|
| 487 |
+
## Related docs
|
| 488 |
+
|
| 489 |
+
- [UI overview](ui.md)
|
| 490 |
+
- [Deployment](deployment.md)
|
| 491 |
+
- [Environment design](environment_design.md)
|
| 492 |
+
- [Reward design](reward_design.md)
|
| 493 |
+
- [Architecture](architecture.md)
|
docs/deployment.md
CHANGED
|
@@ -30,7 +30,7 @@ uv run python -c "from huggingface_hub import HfApi; print(HfApi().space_info('$
|
|
| 30 |
openenv validate --url "https://thejackbright-polyguard-openenv.hf.space"
|
| 31 |
```
|
| 32 |
|
| 33 |
-
`scripts/deploy_space_api.py` is the preferred deployment path for this repo because it uploads a valid Docker Space README frontmatter bundle through `huggingface_hub.HfApi`. `scripts/deploy_space.sh` remains available, but the current OpenEnv CLI path may fail with invalid generated `colorFrom`/`colorTo` metadata.
|
| 34 |
|
| 35 |
Useful `scripts/deploy_space.sh` flags:
|
| 36 |
|
|
|
|
| 30 |
openenv validate --url "https://thejackbright-polyguard-openenv.hf.space"
|
| 31 |
```
|
| 32 |
|
| 33 |
+
`scripts/deploy_space_api.py` is the preferred deployment path for this repo because it uploads a valid Docker Space README frontmatter bundle through `huggingface_hub.HfApi`. `scripts/deploy_space.sh` remains available, but the current OpenEnv CLI path may fail with invalid generated `colorFrom`/`colorTo` metadata. Pushing to GitHub alone does not change the Hub Space unless that Space is configured to rebuild from that repo and branch; run the deploy script (with `HF_TOKEN`) after UI or API changes so the Docker image rebuilds. See `docker/space/README.md` for Dockerfile path, monorepo layout, and cache/reboot notes.
|
| 34 |
|
| 35 |
Useful `scripts/deploy_space.sh` flags:
|
| 36 |
|
docs/final_submission_audit.md
CHANGED
|
@@ -23,7 +23,7 @@ The only known judge-facing blocker is external storytelling: the README blog UR
|
|
| 23 |
| Product/demo | Implemented | FastAPI product API, React/Vite workbench, policy lab, training monitor, replay, dosing, and safety views. |
|
| 24 |
| Results and plots | Implemented | Tracked `docs/results/*.json` and PNG plots, including SFT baseline sweep evidence and top-level environment-backed GRPO evidence. |
|
| 25 |
| HF Space deployment | Implemented | Public Space is running on CPU basic, Space metadata is available, and tracked `docs/results/hf_space_verification.json` reports OpenEnv validation passed. |
|
| 26 |
-
| Colab notebook | Implemented | README
|
| 27 |
| Story artifact | Pending external publication | `docs/hf_blog_draft.md` exists, but the README blog URL returns 404 until published. |
|
| 28 |
| Full public per-model GRPO sweep | Not claimed | Current public/tracked evidence is a 3-model SFT-baseline sweep plus a top-level GRPO run. Private training artifact repos require auth and must be mirrored before being used as public evidence. |
|
| 29 |
|
|
|
|
| 23 |
| Product/demo | Implemented | FastAPI product API, React/Vite workbench, policy lab, training monitor, replay, dosing, and safety views. |
|
| 24 |
| Results and plots | Implemented | Tracked `docs/results/*.json` and PNG plots, including SFT baseline sweep evidence and top-level environment-backed GRPO evidence. |
|
| 25 |
| HF Space deployment | Implemented | Public Space is running on CPU basic, Space metadata is available, and tracked `docs/results/hf_space_verification.json` reports OpenEnv validation passed. |
|
| 26 |
+
| Colab notebook | Implemented | README Colab URL targets `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`; `notebooks/09_training_loop.ipynb` is the modular alternative. |
|
| 27 |
| Story artifact | Pending external publication | `docs/hf_blog_draft.md` exists, but the README blog URL returns 404 until published. |
|
| 28 |
| Full public per-model GRPO sweep | Not claimed | Current public/tracked evidence is a 3-model SFT-baseline sweep plus a top-level GRPO run. Private training artifact repos require auth and must be mirrored before being used as public evidence. |
|
| 29 |
|
docs/idea_document_traceability.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Idea document and participant guide — implementation map
|
| 2 |
+
|
| 3 |
+
This ties your polypharmacy / OpenEnv design notes and typical hackathon submission requirements to files in this repository.
|
| 4 |
+
|
| 5 |
+
## Submission narrative (required bullets)
|
| 6 |
+
|
| 7 |
+
| Requirement | Status | Where |
|
| 8 |
+
| --- | --- | --- |
|
| 9 |
+
| Problem statement | Documented + implemented | Root [`README.md`](../../README.md), `polyguard-rl/README.md`, `docs/safety.md` |
|
| 10 |
+
| Environment (agent operates here) | Implemented | `PolyGuardEnv`, `app/env/env_core.py`, `app/env/fastapi_app.py`, `openenv.yaml`, `server/app.py` |
|
| 11 |
+
| Agent capabilities | Implemented | `app/agents/`, `docs/agents.md` |
|
| 12 |
+
| Tasks | Implemented | Scenario JSONL under `data/scenarios/`, presets in `app/env/catalog.py` |
|
| 13 |
+
| Reward / evaluation logic | Implemented | `app/env/reward_router.py`, `app/env/verifier.py`, `configs/rewards.yaml`, `docs/reward_design.md`, `docs/evaluation.md` |
|
| 14 |
+
| Post-training / self-improvement | Implemented | `scripts/train_sft_trl.py`, `scripts/train_grpo_trl.py`, `app/training/grpo_trl.py`, `docs/training.md` |
|
| 15 |
+
|
| 16 |
+
## Your “Plan” sections vs codebase
|
| 17 |
+
|
| 18 |
+
| Plan item | Status | Notes |
|
| 19 |
+
| --- | --- | --- |
|
| 20 |
+
| OpenEnv `reset` / `step` / `state`, timeouts, safety | Done | `env_core.py`, `fastapi_app.py`, max steps per sub-env, `anti_cheat.py` |
|
| 21 |
+
| Local + remote execution | Done | Local FastAPI + `docker-compose.yml`, HF Space via `scripts/deploy_space_api.py`, `Dockerfile.space`, `docker/space/` |
|
| 22 |
+
| Specific envs: DDI, bandit mining, regimen risk | Done | `SubEnvironment` enum, transitions in `app/env/transition.py` |
|
| 23 |
+
| Precision dosing, deprescribing, web search, alternatives, new drug (hard) | Done | Matching enum values + scenario tracks; “new drug” is `NEW_DRUG_DECOMPOSITION` |
|
| 24 |
+
| Multiple reward functions + anti-hacking | Done | 13 components → 4 channels; anti-cheat and tests in `tests/` |
|
| 25 |
+
| TRL + Unsloth, metrics, generations | Done | TRL scripts + reports; Unsloth optional (`--use-unsloth`); `app/training/metrics.py` |
|
| 26 |
+
| Post-training + inference | Done | merge + `test_inference_postsave.py`, active manifest / API path |
|
| 27 |
+
| Product / Space demo, UI | Done | FastAPI `app/api/`, React `app/ui/frontend/`, Space deployment scripts |
|
| 28 |
+
| Benchmarks + plots + sample generations | Done | `scripts/evaluate_*.py`, `docs/results/`, `scripts/generate_submission_evidence.py` |
|
| 29 |
+
| Deploy: OpenEnv, container, HF Space | Done | See `docs/deployment.md` |
|
| 30 |
+
| Easy / medium / hard | Done | `scenarios_easy.jsonl`, `scenarios_medium.jsonl`, `scenarios_hard.jsonl` |
|
| 31 |
+
|
| 32 |
+
## Themes (world modeling, multi-agent, self-improvement)
|
| 33 |
+
|
| 34 |
+
| Theme | Status | Notes |
|
| 35 |
+
| --- | --- | --- |
|
| 36 |
+
| World modeling / professional tasks | Primary fit | Stateful regimen, verifiers, tool-like actions |
|
| 37 |
+
| Multi-agent | Partial | Supervisor/orchestrator and policy stack (`app/agents/orchestrator.py`, `supervisor_agent.py`); not a separate multi-player env |
|
| 38 |
+
| Self-improving systems | Via GRPO | Environment-backed RLVR-style training, not online self-play |
|
| 39 |
+
|
| 40 |
+
## “What to submit” checklist
|
| 41 |
+
|
| 42 |
+
| Deliverable | Status |
|
| 43 |
+
| --- | --- |
|
| 44 |
+
| GitHub repo + URLs in README | Root + `polyguard-rl/README.md` |
|
| 45 |
+
| HF Space URL | In README |
|
| 46 |
+
| Points from doc | `docs/participant_guide_traceability.md`, this file |
|
| 47 |
+
| Colab | `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`, `notebooks/09_training_loop.ipynb` |
|
| 48 |
+
| Video or blog | README links blog; **publish** draft in `docs/hf_blog_draft.md` or swap URL |
|
| 49 |
+
|
| 50 |
+
## Future ideas from your notes (not claimed as done)
|
| 51 |
+
|
| 52 |
+
- Medicine images / barcodes: listed under Future Work in README.
|
| 53 |
+
- Web search agents: sub-env `WEB_SEARCH_MISSING_DATA` exists; “full web agent product” is beyond current scope.
|
| 54 |
+
|
| 55 |
+
## Fresh clone reminder
|
| 56 |
+
|
| 57 |
+
Generated data and many `outputs/` reports are produced by scripts (see `scripts/bootstrap_data.py`, `scripts/acceptance_gate.py` `REQUIRED_ARTIFACTS`). Run the bootstrap/build pipeline before expecting strict `POLYGUARD_ENFORCE_SUBMISSION_LINKS=true` acceptance to pass on an empty workspace.
|
docs/participant_guide_traceability.md
CHANGED
|
@@ -18,7 +18,7 @@ This audit maps the hackathon guide, FAQ, and judging criteria to concrete PolyG
|
|
| 18 |
| Export adapters safely and test inference | `scripts/merge_adapters_safe.py` and `scripts/test_inference_postsave.py` |
|
| 19 |
| Show results with plots and reports | `docs/results/*.json`, tracked reward/process/legal/success/sweep plot PNGs, a 3-model SFT-baseline sweep, and a top-level environment-backed GRPO run |
|
| 20 |
| Host the environment on Hugging Face Spaces | `scripts/deploy_space_api.py`, `scripts/deploy_space.sh`, Docker runtime, `docs/results/hf_space_verification.json`, and live Space health/metadata checks |
|
| 21 |
-
| Include a Colab training notebook | `notebooks/09_training_loop.ipynb` |
|
| 22 |
| Link story material from README | README links the selected Hugging Face blog/story URL; publish it before final hand-in if the external URL is still 404 |
|
| 23 |
|
| 24 |
## Current Evidence Status
|
|
|
|
| 18 |
| Export adapters safely and test inference | `scripts/merge_adapters_safe.py` and `scripts/test_inference_postsave.py` |
|
| 19 |
| Show results with plots and reports | `docs/results/*.json`, tracked reward/process/legal/success/sweep plot PNGs, a 3-model SFT-baseline sweep, and a top-level environment-backed GRPO run |
|
| 20 |
| Host the environment on Hugging Face Spaces | `scripts/deploy_space_api.py`, `scripts/deploy_space.sh`, Docker runtime, `docs/results/hf_space_verification.json`, and live Space health/metadata checks |
|
| 21 |
+
| Include a Colab training notebook | [`PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`](../PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (README Colab link) and [`notebooks/09_training_loop.ipynb`](../notebooks/09_training_loop.ipynb) (modular walkthrough) |
|
| 22 |
| Link story material from README | README links the selected Hugging Face blog/story URL; publish it before final hand-in if the external URL is still 404 |
|
| 23 |
|
| 24 |
## Current Evidence Status
|
docs/ui.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
# UI
|
| 2 |
|
| 3 |
React/Vite workbench provides operator-oriented pages for patient workbench, replay, policy lab, dosing, safety, and training monitoring.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# UI
|
| 2 |
|
| 3 |
React/Vite workbench provides operator-oriented pages for patient workbench, replay, policy lab, dosing, safety, and training monitoring.
|
| 4 |
+
|
| 5 |
+
For a **panel-by-panel and agent-by-agent** reference (Model Truth, Agent vs Env, Qwen, orchestrator stack, every reward bar, Q Tips copy), see [DEMO_RECORDING_SCRIPT.md](DEMO_RECORDING_SCRIPT.md) § *Feature encyclopedia*.
|
| 6 |
+
|
| 7 |
+
The **FDA alternatives** panel (last block in the Home workbench, full width) calls `POST /tools/medication_alternatives` (up to 7 rows, scrollable list). Optional env: `POLYGUARD_OPENFDA_API_KEY`, `POLYGUARD_MED_TOOL_URL` + `POLYGUARD_MED_TOOL_TOKEN` (server-side only).
|
scripts/deploy_space_api.py
CHANGED
|
@@ -8,8 +8,9 @@ still shipping the same OpenEnv/FastAPI runtime.
|
|
| 8 |
from __future__ import annotations
|
| 9 |
|
| 10 |
import argparse
|
| 11 |
-
from pathlib import Path
|
| 12 |
import shutil
|
|
|
|
|
|
|
| 13 |
|
| 14 |
from huggingface_hub import HfApi
|
| 15 |
|
|
@@ -17,6 +18,18 @@ from huggingface_hub import HfApi
|
|
| 17 |
ROOT = Path(__file__).resolve().parents[1]
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def parse_args() -> argparse.Namespace:
|
| 21 |
parser = argparse.ArgumentParser(description="Deploy PolyGuard OpenEnv Space with valid HF metadata.")
|
| 22 |
parser.add_argument("--repo-id", default="TheJackBright/polyguard-openenv")
|
|
@@ -64,7 +77,8 @@ def build_bundle(bundle_dir: Path) -> None:
|
|
| 64 |
"colorFrom: blue",
|
| 65 |
"colorTo: green",
|
| 66 |
"sdk: docker",
|
| 67 |
-
|
|
|
|
| 68 |
"pinned: false",
|
| 69 |
"---",
|
| 70 |
"",
|
|
@@ -83,6 +97,9 @@ def main() -> None:
|
|
| 83 |
print(f"bundle_dir={bundle_dir}")
|
| 84 |
return
|
| 85 |
|
|
|
|
|
|
|
|
|
|
| 86 |
api = HfApi()
|
| 87 |
api.create_repo(
|
| 88 |
repo_id=args.repo_id,
|
|
@@ -95,7 +112,7 @@ def main() -> None:
|
|
| 95 |
repo_id=args.repo_id,
|
| 96 |
repo_type="space",
|
| 97 |
folder_path=str(bundle_dir),
|
| 98 |
-
commit_message=
|
| 99 |
ignore_patterns=[
|
| 100 |
".git/*",
|
| 101 |
".venv/*",
|
|
@@ -109,6 +126,11 @@ def main() -> None:
|
|
| 109 |
print(f"space_url=https://huggingface.co/spaces/{args.repo_id}")
|
| 110 |
print(f"runtime_url=https://{args.repo_id.replace('/', '-').lower()}.hf.space")
|
| 111 |
print(f"bundle_dir={bundle_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
if __name__ == "__main__":
|
|
|
|
| 8 |
from __future__ import annotations
|
| 9 |
|
| 10 |
import argparse
|
|
|
|
| 11 |
import shutil
|
| 12 |
+
import subprocess
|
| 13 |
+
from pathlib import Path
|
| 14 |
|
| 15 |
from huggingface_hub import HfApi
|
| 16 |
|
|
|
|
| 18 |
ROOT = Path(__file__).resolve().parents[1]
|
| 19 |
|
| 20 |
|
| 21 |
+
def _git_revision() -> str:
|
| 22 |
+
try:
|
| 23 |
+
return subprocess.check_output(
|
| 24 |
+
["git", "rev-parse", "--short", "HEAD"],
|
| 25 |
+
cwd=str(ROOT),
|
| 26 |
+
stderr=subprocess.DEVNULL,
|
| 27 |
+
text=True,
|
| 28 |
+
).strip()
|
| 29 |
+
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
|
| 30 |
+
return "unknown"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
def parse_args() -> argparse.Namespace:
|
| 34 |
parser = argparse.ArgumentParser(description="Deploy PolyGuard OpenEnv Space with valid HF metadata.")
|
| 35 |
parser.add_argument("--repo-id", default="TheJackBright/polyguard-openenv")
|
|
|
|
| 77 |
"colorFrom: blue",
|
| 78 |
"colorTo: green",
|
| 79 |
"sdk: docker",
|
| 80 |
+
# Must match nginx / EXPOSE in Dockerfile (Space UI + /api proxy); 8100/8200 are loopback-only.
|
| 81 |
+
"app_port: 7860",
|
| 82 |
"pinned: false",
|
| 83 |
"---",
|
| 84 |
"",
|
|
|
|
| 97 |
print(f"bundle_dir={bundle_dir}")
|
| 98 |
return
|
| 99 |
|
| 100 |
+
rev = _git_revision()
|
| 101 |
+
commit_message = f"Deploy PolyGuard OpenEnv Space (polyguard-rl @ {rev})"
|
| 102 |
+
|
| 103 |
api = HfApi()
|
| 104 |
api.create_repo(
|
| 105 |
repo_id=args.repo_id,
|
|
|
|
| 112 |
repo_id=args.repo_id,
|
| 113 |
repo_type="space",
|
| 114 |
folder_path=str(bundle_dir),
|
| 115 |
+
commit_message=commit_message,
|
| 116 |
ignore_patterns=[
|
| 117 |
".git/*",
|
| 118 |
".venv/*",
|
|
|
|
| 126 |
print(f"space_url=https://huggingface.co/spaces/{args.repo_id}")
|
| 127 |
print(f"runtime_url=https://{args.repo_id.replace('/', '-').lower()}.hf.space")
|
| 128 |
print(f"bundle_dir={bundle_dir}")
|
| 129 |
+
print(f"deployed_src_revision={rev}")
|
| 130 |
+
print(
|
| 131 |
+
"If the live UI still looks old: open the Space → Settings → Factory reboot, "
|
| 132 |
+
"or hard-refresh the browser (Vite hashes usually bust cache after rebuild)."
|
| 133 |
+
)
|
| 134 |
|
| 135 |
|
| 136 |
if __name__ == "__main__":
|
scripts/install_hf_active_bundle.py
CHANGED
|
@@ -77,13 +77,24 @@ def main() -> None:
|
|
| 77 |
allow = f"{args.bundle_path.strip('/')}/**"
|
| 78 |
|
| 79 |
print(f"Downloading snapshot of {args.repo_id}@{args.revision} (pattern {allow}) …", flush=True)
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
bundle_root = snap_root / args.bundle_path
|
| 89 |
ckpt_src = bundle_root / "checkpoints"
|
|
|
|
| 77 |
allow = f"{args.bundle_path.strip('/')}/**"
|
| 78 |
|
| 79 |
print(f"Downloading snapshot of {args.repo_id}@{args.revision} (pattern {allow}) …", flush=True)
|
| 80 |
+
try:
|
| 81 |
+
snapshot_download(
|
| 82 |
+
repo_id=args.repo_id,
|
| 83 |
+
repo_type="model",
|
| 84 |
+
revision=args.revision,
|
| 85 |
+
local_dir=str(snap_root),
|
| 86 |
+
allow_patterns=[allow],
|
| 87 |
+
)
|
| 88 |
+
except Exception as exc:
|
| 89 |
+
err = f"{type(exc).__name__}: {exc}"
|
| 90 |
+
hint = (
|
| 91 |
+
"\n[install_hf_active_bundle] Hub returned an error (401/404 often means the artifact repo is private or gated).\n"
|
| 92 |
+
" • Hugging Face Space: Space Settings → Secrets → add HF_TOKEN (read access to that model repo).\n"
|
| 93 |
+
" • Or change the repo to public / accept the license on the model card while logged in.\n"
|
| 94 |
+
" • Without a successful download, POLYGUARD falls back to heuristics / ranker (no local GRPO weights).\n"
|
| 95 |
+
)
|
| 96 |
+
print(f"{hint} • Raw error: {err}\n", flush=True)
|
| 97 |
+
raise SystemExit(1) from exc
|
| 98 |
|
| 99 |
bundle_root = snap_root / args.bundle_path
|
| 100 |
ckpt_src = bundle_root / "checkpoints"
|
submission_bundle/README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PolyGuard Submission Bundle
|
| 2 |
+
|
| 3 |
+
This folder contains push-friendly training evidence from the Hugging Face runs.
|
| 4 |
+
|
| 5 |
+
Included:
|
| 6 |
+
- `grpo_training_cycle/`: GRPO run plots, reports, and docs result JSONs.
|
| 7 |
+
- `sft_baseline/`: SFT baseline sweep plots, reports, and docs result JSONs.
|
| 8 |
+
- `local_results/`: Current local result mirrors from `outputs/plots`, `outputs/reports`, and `docs/results`.
|
| 9 |
+
- `notebooks/`: Project notebooks, including `09_training_loop.ipynb`.
|
| 10 |
+
|
| 11 |
+
Not included:
|
| 12 |
+
- Model weights, adapters, checkpoints, optimizer states, or tokenizer/model binary payloads.
|
| 13 |
+
- These were intentionally removed after download cancellation so the repository can be pushed without large model artifacts.
|
submission_bundle/grpo_training_cycle/docs_results/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Result Artifacts
|
| 2 |
+
|
| 3 |
+
These tracked files mirror the latest local smoke/evaluation artifacts so the README can show stable evidence even though `outputs/` and `checkpoints/` are intentionally git-ignored.
|
| 4 |
+
|
| 5 |
+
Current status:
|
| 6 |
+
|
| 7 |
+
- OpenEnv structure/runtime validation passes locally.
|
| 8 |
+
- Test suite passes locally.
|
| 9 |
+
- Frontend production build passes locally.
|
| 10 |
+
- SFT and GRPO artifacts in this folder are non-fallback TRL Transformers evidence from a tiny local compliance run.
|
| 11 |
+
- `postsave_inference.json` loads the merged artifact rather than the fallback policy.
|
| 12 |
+
- `improvement_report.json` shows positive average-reward improvement against the no-change baseline.
|
| 13 |
+
- `hf_space_verification.json` records a live Hugging Face Space validation pass.
|
| 14 |
+
|
| 15 |
+
For a stronger final pitch, replace these artifacts after a larger Colab/HF GPU run:
|
| 16 |
+
|
| 17 |
+
- `sft_trl_run.json`
|
| 18 |
+
- `grpo_trl_run.json`
|
| 19 |
+
- `postsave_inference.json`
|
| 20 |
+
- `improvement_report.json`
|
| 21 |
+
- all plot PNGs
|
| 22 |
+
- `hf_space_verification.json`
|
submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"missing_files": [],
|
| 3 |
+
"missing_artifacts": [],
|
| 4 |
+
"missing_readme_markers": [],
|
| 5 |
+
"missing_readme_links": [],
|
| 6 |
+
"strict_submission_links": true,
|
| 7 |
+
"missing_submission_env": [],
|
| 8 |
+
"strict_submission_failures": [],
|
| 9 |
+
"submission_ready": true,
|
| 10 |
+
"status": "ok"
|
| 11 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png
ADDED
|
submission_bundle/grpo_training_cycle/docs_results/avg_reward.png
ADDED
|
submission_bundle/grpo_training_cycle/docs_results/baselines.json
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"no_change": {
|
| 3 |
+
"mode": "REGIMEN_OPT",
|
| 4 |
+
"action_type": "KEEP_REGIMEN",
|
| 5 |
+
"target_drug": null,
|
| 6 |
+
"replacement_drug": null,
|
| 7 |
+
"dose_bucket": "NA",
|
| 8 |
+
"taper_days": null,
|
| 9 |
+
"monitoring_plan": null,
|
| 10 |
+
"evidence_query": null,
|
| 11 |
+
"new_drug_name": null,
|
| 12 |
+
"candidate_components": [],
|
| 13 |
+
"candidate_id": "cand_01",
|
| 14 |
+
"confidence": 0.8,
|
| 15 |
+
"rationale_brief": "Baseline no-change policy."
|
| 16 |
+
},
|
| 17 |
+
"rules_only": {
|
| 18 |
+
"mode": "REGIMEN_OPT",
|
| 19 |
+
"action_type": "SUBSTITUTE_WITHIN_CLASS",
|
| 20 |
+
"target_drug": "opioid_like",
|
| 21 |
+
"replacement_drug": "non_opioid_analgesic",
|
| 22 |
+
"dose_bucket": "NA",
|
| 23 |
+
"taper_days": null,
|
| 24 |
+
"monitoring_plan": null,
|
| 25 |
+
"evidence_query": null,
|
| 26 |
+
"new_drug_name": null,
|
| 27 |
+
"candidate_components": [],
|
| 28 |
+
"candidate_id": "cand_04",
|
| 29 |
+
"confidence": 0.75,
|
| 30 |
+
"rationale_brief": "Rules-only selected top legal candidate."
|
| 31 |
+
},
|
| 32 |
+
"greedy": {
|
| 33 |
+
"mode": "REGIMEN_OPT",
|
| 34 |
+
"action_type": "SUBSTITUTE_WITHIN_CLASS",
|
| 35 |
+
"target_drug": "opioid_like",
|
| 36 |
+
"replacement_drug": "non_opioid_analgesic",
|
| 37 |
+
"dose_bucket": "NA",
|
| 38 |
+
"taper_days": null,
|
| 39 |
+
"monitoring_plan": null,
|
| 40 |
+
"evidence_query": null,
|
| 41 |
+
"new_drug_name": null,
|
| 42 |
+
"candidate_components": [],
|
| 43 |
+
"candidate_id": "cand_04",
|
| 44 |
+
"confidence": 0.72,
|
| 45 |
+
"rationale_brief": "Greedy safety/burden improvement baseline."
|
| 46 |
+
},
|
| 47 |
+
"contextual_bandit": {
|
| 48 |
+
"mode": "REGIMEN_OPT",
|
| 49 |
+
"action_type": "SUBSTITUTE_WITHIN_CLASS",
|
| 50 |
+
"target_drug": "opioid_like",
|
| 51 |
+
"replacement_drug": "non_opioid_analgesic",
|
| 52 |
+
"dose_bucket": "NA",
|
| 53 |
+
"taper_days": null,
|
| 54 |
+
"monitoring_plan": null,
|
| 55 |
+
"evidence_query": null,
|
| 56 |
+
"new_drug_name": null,
|
| 57 |
+
"candidate_components": [],
|
| 58 |
+
"candidate_id": "cand_04",
|
| 59 |
+
"confidence": 0.68,
|
| 60 |
+
"rationale_brief": "Contextual bandit selected candidate."
|
| 61 |
+
},
|
| 62 |
+
"contextual_bandit_topk": [
|
| 63 |
+
{
|
| 64 |
+
"candidate_id": "cand_09",
|
| 65 |
+
"score": 1.1532307878304324,
|
| 66 |
+
"exploration_bonus": 1.1532307878304324,
|
| 67 |
+
"algorithm": "linucb"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"candidate_id": "cand_10",
|
| 71 |
+
"score": 1.1489735636645433,
|
| 72 |
+
"exploration_bonus": 1.1489735636645433,
|
| 73 |
+
"algorithm": "linucb"
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"candidate_id": "cand_08",
|
| 77 |
+
"score": 1.1447401451857973,
|
| 78 |
+
"exploration_bonus": 1.1447401451857973,
|
| 79 |
+
"algorithm": "linucb"
|
| 80 |
+
}
|
| 81 |
+
],
|
| 82 |
+
"beam_search": {
|
| 83 |
+
"mode": "REGIMEN_OPT",
|
| 84 |
+
"action_type": "SUBSTITUTE_WITHIN_CLASS",
|
| 85 |
+
"target_drug": "opioid_like",
|
| 86 |
+
"replacement_drug": "non_opioid_analgesic",
|
| 87 |
+
"dose_bucket": "NA",
|
| 88 |
+
"taper_days": null,
|
| 89 |
+
"monitoring_plan": null,
|
| 90 |
+
"evidence_query": null,
|
| 91 |
+
"new_drug_name": null,
|
| 92 |
+
"candidate_components": [],
|
| 93 |
+
"candidate_id": "cand_04",
|
| 94 |
+
"confidence": 0.74,
|
| 95 |
+
"rationale_brief": "Beam-search(3) top candidate."
|
| 96 |
+
},
|
| 97 |
+
"baseline_policy": "no_change_candidate",
|
| 98 |
+
"episodes": 8,
|
| 99 |
+
"avg_reward": 0.747,
|
| 100 |
+
"legality_rate": 1.0,
|
| 101 |
+
"success_rate": 0.0,
|
| 102 |
+
"policy_stack_ablations": {
|
| 103 |
+
"bandit-only": {
|
| 104 |
+
"avg_reward": 0.7616666666666667,
|
| 105 |
+
"legality_rate": 1.0,
|
| 106 |
+
"steps": 3.0
|
| 107 |
+
},
|
| 108 |
+
"llm-only": {
|
| 109 |
+
"avg_reward": 0.7753333333333333,
|
| 110 |
+
"legality_rate": 1.0,
|
| 111 |
+
"steps": 3.0
|
| 112 |
+
},
|
| 113 |
+
"llm+bandit": {
|
| 114 |
+
"avg_reward": 0.7753333333333333,
|
| 115 |
+
"legality_rate": 1.0,
|
| 116 |
+
"steps": 3.0
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"offline_policy_eval": {
|
| 3 |
+
"avg_reward": 0.772833,
|
| 4 |
+
"legal_rate": 1.0,
|
| 5 |
+
"success_rate": 0.0
|
| 6 |
+
},
|
| 7 |
+
"safety_eval": {
|
| 8 |
+
"severe_violation_rate": 0.0,
|
| 9 |
+
"illegal_step_rate": 0.0
|
| 10 |
+
},
|
| 11 |
+
"dosing_eval": {
|
| 12 |
+
"target_attainment": 0.75,
|
| 13 |
+
"toxicity_avoidance": 1.0
|
| 14 |
+
},
|
| 15 |
+
"robustness_eval": {
|
| 16 |
+
"missing_labs_safety_rate": 0.666667,
|
| 17 |
+
"noisy_dose_info_safety_rate": 1.0,
|
| 18 |
+
"conflicting_meds_safety_rate": 1.0,
|
| 19 |
+
"alias_noise_safety_rate": 1.0,
|
| 20 |
+
"hidden_duplicate_detection_rate": 1.0,
|
| 21 |
+
"wrong_candidate_id_resilience": 1.0,
|
| 22 |
+
"stale_evidence_safety_rate": 1.0,
|
| 23 |
+
"delayed_ade_manifestation_safety_rate": 1.0
|
| 24 |
+
},
|
| 25 |
+
"calibration_eval": {
|
| 26 |
+
"ece_proxy": 0.08625
|
| 27 |
+
},
|
| 28 |
+
"abstention_eval": {
|
| 29 |
+
"appropriate_abstention_rate": 0.0
|
| 30 |
+
},
|
| 31 |
+
"process_eval": {
|
| 32 |
+
"process_fidelity": 0.92,
|
| 33 |
+
"avg_invalid_actions": 0.333333
|
| 34 |
+
},
|
| 35 |
+
"subgroup_eval": {
|
| 36 |
+
"renal_compromise": {
|
| 37 |
+
"avg_reward": 0.774,
|
| 38 |
+
"legal_rate": 1.0
|
| 39 |
+
},
|
| 40 |
+
"hepatic_compromise": {
|
| 41 |
+
"avg_reward": 0.779333,
|
| 42 |
+
"legal_rate": 1.0
|
| 43 |
+
},
|
| 44 |
+
"frail": {
|
| 45 |
+
"avg_reward": 0.781667,
|
| 46 |
+
"legal_rate": 1.0
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"explainability_eval": {
|
| 50 |
+
"grounding_rate": 0.8
|
| 51 |
+
}
|
| 52 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"offline_policy_eval": {
|
| 3 |
+
"avg_reward": 0.772833,
|
| 4 |
+
"legal_rate": 1.0,
|
| 5 |
+
"success_rate": 0.0
|
| 6 |
+
},
|
| 7 |
+
"safety_eval": {
|
| 8 |
+
"severe_violation_rate": 0.0,
|
| 9 |
+
"illegal_step_rate": 0.0
|
| 10 |
+
},
|
| 11 |
+
"dosing_eval": {
|
| 12 |
+
"target_attainment": 0.75,
|
| 13 |
+
"toxicity_avoidance": 1.0
|
| 14 |
+
},
|
| 15 |
+
"robustness_eval": {
|
| 16 |
+
"missing_labs_safety_rate": 0.666667,
|
| 17 |
+
"noisy_dose_info_safety_rate": 1.0,
|
| 18 |
+
"conflicting_meds_safety_rate": 1.0,
|
| 19 |
+
"alias_noise_safety_rate": 1.0,
|
| 20 |
+
"hidden_duplicate_detection_rate": 1.0,
|
| 21 |
+
"wrong_candidate_id_resilience": 1.0,
|
| 22 |
+
"stale_evidence_safety_rate": 1.0,
|
| 23 |
+
"delayed_ade_manifestation_safety_rate": 1.0
|
| 24 |
+
},
|
| 25 |
+
"calibration_eval": {
|
| 26 |
+
"ece_proxy": 0.08625
|
| 27 |
+
},
|
| 28 |
+
"abstention_eval": {
|
| 29 |
+
"appropriate_abstention_rate": 0.0
|
| 30 |
+
},
|
| 31 |
+
"process_eval": {
|
| 32 |
+
"process_fidelity": 0.92,
|
| 33 |
+
"avg_invalid_actions": 0.333333
|
| 34 |
+
},
|
| 35 |
+
"subgroup_eval": {
|
| 36 |
+
"renal_compromise": {
|
| 37 |
+
"avg_reward": 0.774,
|
| 38 |
+
"legal_rate": 1.0
|
| 39 |
+
},
|
| 40 |
+
"hepatic_compromise": {
|
| 41 |
+
"avg_reward": 0.779333,
|
| 42 |
+
"legal_rate": 1.0
|
| 43 |
+
},
|
| 44 |
+
"frail": {
|
| 45 |
+
"avg_reward": 0.781667,
|
| 46 |
+
"legal_rate": 1.0
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"explainability_eval": {
|
| 50 |
+
"grounding_rate": 0.8
|
| 51 |
+
}
|
| 52 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/dose_train.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_size": 120.0,
|
| 3 |
+
"status": "trained",
|
| 4 |
+
"train_mae": 0.0025,
|
| 5 |
+
"model_path": "outputs/models/dose_model.pkl"
|
| 6 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"avg_reward": 0.7785555555555557,
|
| 3 |
+
"legality_rate": 1.0,
|
| 4 |
+
"severe_violation_rate": 0.0,
|
| 5 |
+
"abstention_rate": 0.0,
|
| 6 |
+
"avg_episode_length": 2.0,
|
| 7 |
+
"success_rate": 0.0,
|
| 8 |
+
"avg_burden_delta": 0.0,
|
| 9 |
+
"avg_safety_delta": 0.5,
|
| 10 |
+
"avg_dosing_quality": 0.75,
|
| 11 |
+
"avg_process_fidelity": 0.9200000000000002,
|
| 12 |
+
"exploit_detection_count": 3.0,
|
| 13 |
+
"reward_columns": {
|
| 14 |
+
"format_compliance_score": 0.999,
|
| 15 |
+
"candidate_alignment_score": 0.999,
|
| 16 |
+
"legality_score": 0.999,
|
| 17 |
+
"safety_delta_score": 0.5,
|
| 18 |
+
"burden_improvement_score": 0.5,
|
| 19 |
+
"disease_stability_score": 0.9000000000000001,
|
| 20 |
+
"dosing_quality_score": 0.75,
|
| 21 |
+
"abstention_quality_score": 0.56,
|
| 22 |
+
"efficiency_score": 0.77,
|
| 23 |
+
"process_fidelity_score": 0.9200000000000002,
|
| 24 |
+
"explanation_grounding_score": 0.7999999999999999,
|
| 25 |
+
"anti_cheat_score": 0.6663333333333333,
|
| 26 |
+
"uncertainty_calibration_score": 0.87
|
| 27 |
+
}
|
| 28 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"frontier_models": [
|
| 3 |
+
"qwen2.5:7b-instruct",
|
| 4 |
+
"qwen2.5:14b-instruct"
|
| 5 |
+
],
|
| 6 |
+
"deployment_mode": "hf_or_vllm_ready",
|
| 7 |
+
"notes": "Baseline complete; ready for larger model sweep."
|
| 8 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/graph_train.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 180,
|
| 3 |
+
"status": "trained",
|
| 4 |
+
"model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl"
|
| 5 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"ablations": {
|
| 4 |
+
"bandit_only": {
|
| 5 |
+
"avg_reward": 0.779625,
|
| 6 |
+
"legality_rate": 1.0,
|
| 7 |
+
"severe_violation_rate": 0.0,
|
| 8 |
+
"abstention_rate": 0.0,
|
| 9 |
+
"avg_episode_length": 2.8125,
|
| 10 |
+
"success_rate": 0.0,
|
| 11 |
+
"avg_burden_delta": 0.0,
|
| 12 |
+
"avg_safety_delta": 0.483125,
|
| 13 |
+
"avg_dosing_quality": 0.75,
|
| 14 |
+
"avg_process_fidelity": 0.9056250000000008,
|
| 15 |
+
"exploit_detection_count": 2.0,
|
| 16 |
+
"timeout_rate": 0.0,
|
| 17 |
+
"failure_visible_rate": 0.0625,
|
| 18 |
+
"avg_invalid_actions": 0.0625,
|
| 19 |
+
"reward_columns": {
|
| 20 |
+
"format_compliance_score": 0.9989999999999996,
|
| 21 |
+
"candidate_alignment_score": 0.9989999999999996,
|
| 22 |
+
"legality_score": 0.9989999999999996,
|
| 23 |
+
"safety_delta_score": 0.483125,
|
| 24 |
+
"burden_improvement_score": 0.5,
|
| 25 |
+
"disease_stability_score": 0.8999999999999995,
|
| 26 |
+
"dosing_quality_score": 0.75,
|
| 27 |
+
"abstention_quality_score": 0.5600000000000002,
|
| 28 |
+
"efficiency_score": 0.5855625,
|
| 29 |
+
"process_fidelity_score": 0.9056250000000008,
|
| 30 |
+
"explanation_grounding_score": 0.8000000000000004,
|
| 31 |
+
"anti_cheat_score": 0.9366249999999997,
|
| 32 |
+
"uncertainty_calibration_score": 0.8531250000000004
|
| 33 |
+
},
|
| 34 |
+
"primary_reward_channels": {
|
| 35 |
+
"safety_legality": 0.9469062499999998,
|
| 36 |
+
"clinical_improvement": 0.6273749999999997,
|
| 37 |
+
"dosing_quality": 0.6550000000000001,
|
| 38 |
+
"process_integrity": 0.8225937500000001
|
| 39 |
+
},
|
| 40 |
+
"policy_stack": "bandit-only",
|
| 41 |
+
"failure_mining": {
|
| 42 |
+
"total_rows": 32,
|
| 43 |
+
"failure_rows": 2,
|
| 44 |
+
"top_failure_reasons": [
|
| 45 |
+
{
|
| 46 |
+
"reason": "repeated_action_loop",
|
| 47 |
+
"count": 2
|
| 48 |
+
}
|
| 49 |
+
]
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"llm_only": {
|
| 53 |
+
"avg_reward": 0.7723913043478261,
|
| 54 |
+
"legality_rate": 1.0,
|
| 55 |
+
"severe_violation_rate": 0.0,
|
| 56 |
+
"abstention_rate": 0.0,
|
| 57 |
+
"avg_episode_length": 1.9565217391304348,
|
| 58 |
+
"success_rate": 0.0,
|
| 59 |
+
"avg_burden_delta": 0.0,
|
| 60 |
+
"avg_safety_delta": 0.4882608695652174,
|
| 61 |
+
"avg_dosing_quality": 0.75,
|
| 62 |
+
"avg_process_fidelity": 0.9000000000000005,
|
| 63 |
+
"exploit_detection_count": 7.0,
|
| 64 |
+
"timeout_rate": 0.0,
|
| 65 |
+
"failure_visible_rate": 0.30434782608695654,
|
| 66 |
+
"avg_invalid_actions": 0.30434782608695654,
|
| 67 |
+
"reward_columns": {
|
| 68 |
+
"format_compliance_score": 0.9989999999999999,
|
| 69 |
+
"candidate_alignment_score": 0.9989999999999999,
|
| 70 |
+
"legality_score": 0.9989999999999999,
|
| 71 |
+
"safety_delta_score": 0.4882608695652174,
|
| 72 |
+
"burden_improvement_score": 0.5,
|
| 73 |
+
"disease_stability_score": 0.8999999999999998,
|
| 74 |
+
"dosing_quality_score": 0.75,
|
| 75 |
+
"abstention_quality_score": 0.5600000000000004,
|
| 76 |
+
"efficiency_score": 0.7027826086956522,
|
| 77 |
+
"process_fidelity_score": 0.9000000000000005,
|
| 78 |
+
"explanation_grounding_score": 0.8000000000000003,
|
| 79 |
+
"anti_cheat_score": 0.6952608695652175,
|
| 80 |
+
"uncertainty_calibration_score": 0.8482608695652176
|
| 81 |
+
},
|
| 82 |
+
"primary_reward_channels": {
|
| 83 |
+
"safety_legality": 0.8853478260869562,
|
| 84 |
+
"clinical_improvement": 0.6290869565217388,
|
| 85 |
+
"dosing_quality": 0.6549999999999998,
|
| 86 |
+
"process_integrity": 0.8504782608695656
|
| 87 |
+
},
|
| 88 |
+
"policy_stack": "llm-only",
|
| 89 |
+
"failure_mining": {
|
| 90 |
+
"total_rows": 23,
|
| 91 |
+
"failure_rows": 7,
|
| 92 |
+
"top_failure_reasons": [
|
| 93 |
+
{
|
| 94 |
+
"reason": "repeated_action_loop",
|
| 95 |
+
"count": 7
|
| 96 |
+
}
|
| 97 |
+
]
|
| 98 |
+
}
|
| 99 |
+
},
|
| 100 |
+
"llm_bandit": {
|
| 101 |
+
"avg_reward": 0.7647391304347826,
|
| 102 |
+
"legality_rate": 1.0,
|
| 103 |
+
"severe_violation_rate": 0.0,
|
| 104 |
+
"abstention_rate": 0.0,
|
| 105 |
+
"avg_episode_length": 1.9565217391304348,
|
| 106 |
+
"success_rate": 0.0,
|
| 107 |
+
"avg_burden_delta": 0.0,
|
| 108 |
+
"avg_safety_delta": 0.48982608695652174,
|
| 109 |
+
"avg_dosing_quality": 0.717391304347826,
|
| 110 |
+
"avg_process_fidelity": 0.9000000000000005,
|
| 111 |
+
"exploit_detection_count": 7.0,
|
| 112 |
+
"timeout_rate": 0.0,
|
| 113 |
+
"failure_visible_rate": 0.30434782608695654,
|
| 114 |
+
"avg_invalid_actions": 0.30434782608695654,
|
| 115 |
+
"reward_columns": {
|
| 116 |
+
"format_compliance_score": 0.9989999999999999,
|
| 117 |
+
"candidate_alignment_score": 0.9989999999999999,
|
| 118 |
+
"legality_score": 0.9989999999999999,
|
| 119 |
+
"safety_delta_score": 0.48982608695652174,
|
| 120 |
+
"burden_improvement_score": 0.5043478260869565,
|
| 121 |
+
"disease_stability_score": 0.8582608695652173,
|
| 122 |
+
"dosing_quality_score": 0.717391304347826,
|
| 123 |
+
"abstention_quality_score": 0.5600000000000004,
|
| 124 |
+
"efficiency_score": 0.7027826086956522,
|
| 125 |
+
"process_fidelity_score": 0.9000000000000005,
|
| 126 |
+
"explanation_grounding_score": 0.8000000000000003,
|
| 127 |
+
"anti_cheat_score": 0.6952608695652175,
|
| 128 |
+
"uncertainty_calibration_score": 0.8126086956521739
|
| 129 |
+
},
|
| 130 |
+
"primary_reward_channels": {
|
| 131 |
+
"safety_legality": 0.8765217391304347,
|
| 132 |
+
"clinical_improvement": 0.6171739130434781,
|
| 133 |
+
"dosing_quality": 0.6386956521739129,
|
| 134 |
+
"process_integrity": 0.8504782608695656
|
| 135 |
+
},
|
| 136 |
+
"policy_stack": "llm+bandit",
|
| 137 |
+
"failure_mining": {
|
| 138 |
+
"total_rows": 23,
|
| 139 |
+
"failure_rows": 7,
|
| 140 |
+
"top_failure_reasons": [
|
| 141 |
+
{
|
| 142 |
+
"reason": "repeated_action_loop",
|
| 143 |
+
"count": 7
|
| 144 |
+
}
|
| 145 |
+
]
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"backend": "trl_transformers",
|
| 4 |
+
"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
|
| 5 |
+
"records": 2000,
|
| 6 |
+
"prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl",
|
| 7 |
+
"reward_summary": {
|
| 8 |
+
"count": 4000,
|
| 9 |
+
"avg_reward": 0.782178,
|
| 10 |
+
"avg_reward_components": {
|
| 11 |
+
"format_compliance_score": 0.999,
|
| 12 |
+
"candidate_alignment_score": 0.999,
|
| 13 |
+
"legality_score": 0.985277,
|
| 14 |
+
"safety_delta_score": 0.496104,
|
| 15 |
+
"burden_improvement_score": 0.494346,
|
| 16 |
+
"disease_stability_score": 0.8912,
|
| 17 |
+
"dosing_quality_score": 0.511938,
|
| 18 |
+
"abstention_quality_score": 0.56,
|
| 19 |
+
"efficiency_score": 0.84942,
|
| 20 |
+
"process_fidelity_score": 0.905268,
|
| 21 |
+
"explanation_grounding_score": 0.800248,
|
| 22 |
+
"anti_cheat_score": 0.48004,
|
| 23 |
+
"uncertainty_calibration_score": 0.730195
|
| 24 |
+
},
|
| 25 |
+
"avg_primary_reward_channels": {
|
| 26 |
+
"safety_legality": 0.798661,
|
| 27 |
+
"clinical_improvement": 0.62689,
|
| 28 |
+
"dosing_quality": 0.535969,
|
| 29 |
+
"process_integrity": 0.888448
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"reward_log": "/app/checkpoints/grpo_reward_components.jsonl",
|
| 33 |
+
"train_metrics": {
|
| 34 |
+
"train_runtime": 6960.8084,
|
| 35 |
+
"train_samples_per_second": 0.287,
|
| 36 |
+
"train_steps_per_second": 0.287,
|
| 37 |
+
"total_flos": 0.0,
|
| 38 |
+
"train_loss": 2.3633859725151752e-06
|
| 39 |
+
},
|
| 40 |
+
"artifact_path": "/app/checkpoints/grpo_adapter",
|
| 41 |
+
"unsloth_available": false
|
| 42 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "fallback",
|
| 3 |
+
"backend": "env_reward_fallback",
|
| 4 |
+
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 5 |
+
"records": 2,
|
| 6 |
+
"prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
|
| 7 |
+
"reward_summary": {
|
| 8 |
+
"count": 2,
|
| 9 |
+
"avg_reward": 0.798,
|
| 10 |
+
"avg_reward_components": {
|
| 11 |
+
"format_compliance_score": 0.999,
|
| 12 |
+
"candidate_alignment_score": 0.999,
|
| 13 |
+
"legality_score": 0.999,
|
| 14 |
+
"safety_delta_score": 0.671,
|
| 15 |
+
"burden_improvement_score": 0.525,
|
| 16 |
+
"disease_stability_score": 0.74,
|
| 17 |
+
"dosing_quality_score": 0.5,
|
| 18 |
+
"abstention_quality_score": 0.56,
|
| 19 |
+
"efficiency_score": 0.857,
|
| 20 |
+
"process_fidelity_score": 0.92,
|
| 21 |
+
"explanation_grounding_score": 0.8,
|
| 22 |
+
"anti_cheat_score": 0.5,
|
| 23 |
+
"uncertainty_calibration_score": 0.74
|
| 24 |
+
},
|
| 25 |
+
"avg_primary_reward_channels": {
|
| 26 |
+
"safety_legality": 0.8095,
|
| 27 |
+
"clinical_improvement": 0.645,
|
| 28 |
+
"dosing_quality": 0.53,
|
| 29 |
+
"process_integrity": 0.894
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
|
| 33 |
+
"train_metrics": {
|
| 34 |
+
"steps_executed": 2.0
|
| 35 |
+
},
|
| 36 |
+
"artifact_path": "",
|
| 37 |
+
"unsloth_available": false,
|
| 38 |
+
"trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
|
| 39 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "fallback",
|
| 3 |
+
"backend": "env_reward_fallback",
|
| 4 |
+
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 5 |
+
"records": 1,
|
| 6 |
+
"prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
|
| 7 |
+
"reward_summary": {
|
| 8 |
+
"count": 1,
|
| 9 |
+
"avg_reward": 0.764,
|
| 10 |
+
"avg_reward_components": {
|
| 11 |
+
"format_compliance_score": 0.999,
|
| 12 |
+
"candidate_alignment_score": 0.999,
|
| 13 |
+
"legality_score": 0.999,
|
| 14 |
+
"safety_delta_score": 0.5,
|
| 15 |
+
"burden_improvement_score": 0.5,
|
| 16 |
+
"disease_stability_score": 0.9,
|
| 17 |
+
"dosing_quality_score": 0.5,
|
| 18 |
+
"abstention_quality_score": 0.56,
|
| 19 |
+
"efficiency_score": 0.857,
|
| 20 |
+
"process_fidelity_score": 0.92,
|
| 21 |
+
"explanation_grounding_score": 0.8,
|
| 22 |
+
"anti_cheat_score": 0.001,
|
| 23 |
+
"uncertainty_calibration_score": 0.7
|
| 24 |
+
},
|
| 25 |
+
"avg_primary_reward_channels": {
|
| 26 |
+
"safety_legality": 0.675,
|
| 27 |
+
"clinical_improvement": 0.633,
|
| 28 |
+
"dosing_quality": 0.53,
|
| 29 |
+
"process_integrity": 0.894
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
|
| 33 |
+
"train_metrics": {
|
| 34 |
+
"steps_executed": 1.0
|
| 35 |
+
},
|
| 36 |
+
"artifact_path": "",
|
| 37 |
+
"unsloth_available": false,
|
| 38 |
+
"trl_runtime_error": "forced_fallback"
|
| 39 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "fallback",
|
| 3 |
+
"backend": "env_reward_fallback",
|
| 4 |
+
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 5 |
+
"records": 1,
|
| 6 |
+
"prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
|
| 7 |
+
"reward_summary": {
|
| 8 |
+
"count": 1,
|
| 9 |
+
"avg_reward": 0.764,
|
| 10 |
+
"avg_reward_components": {
|
| 11 |
+
"format_compliance_score": 0.999,
|
| 12 |
+
"candidate_alignment_score": 0.999,
|
| 13 |
+
"legality_score": 0.999,
|
| 14 |
+
"safety_delta_score": 0.5,
|
| 15 |
+
"burden_improvement_score": 0.5,
|
| 16 |
+
"disease_stability_score": 0.9,
|
| 17 |
+
"dosing_quality_score": 0.5,
|
| 18 |
+
"abstention_quality_score": 0.56,
|
| 19 |
+
"efficiency_score": 0.857,
|
| 20 |
+
"process_fidelity_score": 0.92,
|
| 21 |
+
"explanation_grounding_score": 0.8,
|
| 22 |
+
"anti_cheat_score": 0.001,
|
| 23 |
+
"uncertainty_calibration_score": 0.7
|
| 24 |
+
},
|
| 25 |
+
"avg_primary_reward_channels": {
|
| 26 |
+
"safety_legality": 0.675,
|
| 27 |
+
"clinical_improvement": 0.633,
|
| 28 |
+
"dosing_quality": 0.53,
|
| 29 |
+
"process_integrity": 0.894
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
|
| 33 |
+
"train_metrics": {
|
| 34 |
+
"steps_executed": 1.0
|
| 35 |
+
},
|
| 36 |
+
"artifact_path": "",
|
| 37 |
+
"unsloth_available": false,
|
| 38 |
+
"trl_runtime_error": "forced_fallback"
|
| 39 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "fallback",
|
| 3 |
+
"backend": "env_reward_fallback",
|
| 4 |
+
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 5 |
+
"records": 1,
|
| 6 |
+
"prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
|
| 7 |
+
"reward_summary": {
|
| 8 |
+
"count": 1,
|
| 9 |
+
"avg_reward": 0.764,
|
| 10 |
+
"avg_reward_components": {
|
| 11 |
+
"format_compliance_score": 0.999,
|
| 12 |
+
"candidate_alignment_score": 0.999,
|
| 13 |
+
"legality_score": 0.999,
|
| 14 |
+
"safety_delta_score": 0.5,
|
| 15 |
+
"burden_improvement_score": 0.5,
|
| 16 |
+
"disease_stability_score": 0.9,
|
| 17 |
+
"dosing_quality_score": 0.5,
|
| 18 |
+
"abstention_quality_score": 0.56,
|
| 19 |
+
"efficiency_score": 0.857,
|
| 20 |
+
"process_fidelity_score": 0.92,
|
| 21 |
+
"explanation_grounding_score": 0.8,
|
| 22 |
+
"anti_cheat_score": 0.001,
|
| 23 |
+
"uncertainty_calibration_score": 0.7
|
| 24 |
+
},
|
| 25 |
+
"avg_primary_reward_channels": {
|
| 26 |
+
"safety_legality": 0.675,
|
| 27 |
+
"clinical_improvement": 0.633,
|
| 28 |
+
"dosing_quality": 0.53,
|
| 29 |
+
"process_integrity": 0.894
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
|
| 33 |
+
"train_metrics": {
|
| 34 |
+
"steps_executed": 1.0
|
| 35 |
+
},
|
| 36 |
+
"artifact_path": "",
|
| 37 |
+
"unsloth_available": false,
|
| 38 |
+
"trl_runtime_error": "forced_fallback"
|
| 39 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"passed": true,
|
| 3 |
+
"status": "running",
|
| 4 |
+
"checked_on": "2026-04-26",
|
| 5 |
+
"repo_id": "TheJackBright/polyguard-openenv",
|
| 6 |
+
"space_url": "https://huggingface.co/spaces/TheJackBright/polyguard-openenv",
|
| 7 |
+
"runtime_url": "https://thejackbright-polyguard-openenv.hf.space",
|
| 8 |
+
"space_sha": "877add7878fbdf2011ed3d5d378cdca5fe7bac4b",
|
| 9 |
+
"space_private": false,
|
| 10 |
+
"runtime": {
|
| 11 |
+
"stage": "RUNNING",
|
| 12 |
+
"hardware": {
|
| 13 |
+
"current": "cpu-basic",
|
| 14 |
+
"requested": "cpu-basic"
|
| 15 |
+
},
|
| 16 |
+
"replicas": {
|
| 17 |
+
"current": 1,
|
| 18 |
+
"requested": 1
|
| 19 |
+
},
|
| 20 |
+
"domain": "thejackbright-polyguard-openenv.hf.space"
|
| 21 |
+
},
|
| 22 |
+
"openenv_validation": {
|
| 23 |
+
"command": "uv run openenv validate --url https://thejackbright-polyguard-openenv.hf.space",
|
| 24 |
+
"passed": true,
|
| 25 |
+
"passed_count": 6,
|
| 26 |
+
"total_count": 6,
|
| 27 |
+
"failed_criteria": []
|
| 28 |
+
}
|
| 29 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "running",
|
| 3 |
+
"started_at": 1777161126.3536248,
|
| 4 |
+
"finished_at": null,
|
| 5 |
+
"commands": [
|
| 6 |
+
{
|
| 7 |
+
"args": [
|
| 8 |
+
"python",
|
| 9 |
+
"scripts/bootstrap_data.py"
|
| 10 |
+
],
|
| 11 |
+
"returncode": 0,
|
| 12 |
+
"elapsed_seconds": 0.821
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"args": [
|
| 16 |
+
"python",
|
| 17 |
+
"scripts/build_training_corpus.py",
|
| 18 |
+
"--profile",
|
| 19 |
+
"massive",
|
| 20 |
+
"--with-local",
|
| 21 |
+
"--with-synthetic",
|
| 22 |
+
"--with-hf"
|
| 23 |
+
],
|
| 24 |
+
"returncode": 0,
|
| 25 |
+
"elapsed_seconds": 4.367
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"args": [
|
| 29 |
+
"python",
|
| 30 |
+
"scripts/train_sft_trl.py",
|
| 31 |
+
"--model-id",
|
| 32 |
+
"Qwen/Qwen2.5-0.5B-Instruct",
|
| 33 |
+
"--dataset-path",
|
| 34 |
+
"data/processed/training_corpus_sft.json",
|
| 35 |
+
"--epochs",
|
| 36 |
+
"1",
|
| 37 |
+
"--max-steps",
|
| 38 |
+
"20",
|
| 39 |
+
"--batch-size",
|
| 40 |
+
"2",
|
| 41 |
+
"--max-seq-len",
|
| 42 |
+
"512",
|
| 43 |
+
"--use-unsloth"
|
| 44 |
+
],
|
| 45 |
+
"returncode": 0,
|
| 46 |
+
"elapsed_seconds": 24.564
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"args": [
|
| 50 |
+
"reuse_artifact",
|
| 51 |
+
"grpo_adapter",
|
| 52 |
+
"/app/checkpoints/grpo_adapter"
|
| 53 |
+
],
|
| 54 |
+
"returncode": 0,
|
| 55 |
+
"elapsed_seconds": 0.0
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"args": [
|
| 59 |
+
"python",
|
| 60 |
+
"scripts/merge_adapters_safe.py",
|
| 61 |
+
"--adapter-dir",
|
| 62 |
+
"checkpoints/sft_adapter",
|
| 63 |
+
"--output-dir",
|
| 64 |
+
"checkpoints/merged"
|
| 65 |
+
],
|
| 66 |
+
"returncode": 0,
|
| 67 |
+
"elapsed_seconds": 9.014
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"args": [
|
| 71 |
+
"python",
|
| 72 |
+
"scripts/test_inference_postsave.py",
|
| 73 |
+
"--samples",
|
| 74 |
+
"3",
|
| 75 |
+
"--base-model",
|
| 76 |
+
"Qwen/Qwen2.5-0.5B-Instruct"
|
| 77 |
+
],
|
| 78 |
+
"returncode": 0,
|
| 79 |
+
"elapsed_seconds": 14.811
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"args": [
|
| 83 |
+
"python",
|
| 84 |
+
"scripts/evaluate_policy_ablations.py",
|
| 85 |
+
"--episodes",
|
| 86 |
+
"8"
|
| 87 |
+
],
|
| 88 |
+
"returncode": 0,
|
| 89 |
+
"elapsed_seconds": 4.458
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"args": [
|
| 93 |
+
"python",
|
| 94 |
+
"scripts/evaluate_baselines.py"
|
| 95 |
+
],
|
| 96 |
+
"returncode": 0,
|
| 97 |
+
"elapsed_seconds": 4.603
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"args": [
|
| 101 |
+
"python",
|
| 102 |
+
"scripts/evaluate_all.py"
|
| 103 |
+
],
|
| 104 |
+
"returncode": 0,
|
| 105 |
+
"elapsed_seconds": 4.271
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"args": [
|
| 109 |
+
"python",
|
| 110 |
+
"scripts/evaluate_compare_runs.py",
|
| 111 |
+
"--baseline",
|
| 112 |
+
"outputs/reports/baselines.json",
|
| 113 |
+
"--candidate",
|
| 114 |
+
"outputs/reports/benchmark_report.json",
|
| 115 |
+
"--output",
|
| 116 |
+
"outputs/reports/improvement_report.json"
|
| 117 |
+
],
|
| 118 |
+
"returncode": 0,
|
| 119 |
+
"elapsed_seconds": 0.037
|
| 120 |
+
}
|
| 121 |
+
],
|
| 122 |
+
"artifact_repo_id": "TheJackBright/polyguard-openenv-training-artifacts"
|
| 123 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/improvement_report.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"baseline": "outputs/reports/baselines.json",
|
| 4 |
+
"candidate": "outputs/reports/benchmark_report.json",
|
| 5 |
+
"deltas": {
|
| 6 |
+
"avg_reward": 0.025833,
|
| 7 |
+
"legality_rate": 0.0,
|
| 8 |
+
"success_rate": 0.0,
|
| 9 |
+
"avg_process_fidelity": 0.92,
|
| 10 |
+
"timeout_rate": 0.0,
|
| 11 |
+
"failure_visible_rate": 0.0
|
| 12 |
+
},
|
| 13 |
+
"gate": {
|
| 14 |
+
"avg_reward_up": true,
|
| 15 |
+
"legality_up": true,
|
| 16 |
+
"success_up": true
|
| 17 |
+
},
|
| 18 |
+
"improved": true
|
| 19 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"baseline": "outputs/reports/baselines.json",
|
| 4 |
+
"candidate": "outputs/reports/benchmark_report.json",
|
| 5 |
+
"deltas": {
|
| 6 |
+
"avg_reward": -0.0025,
|
| 7 |
+
"legality_rate": 0.0,
|
| 8 |
+
"success_rate": 0.0,
|
| 9 |
+
"avg_process_fidelity": 0.92,
|
| 10 |
+
"timeout_rate": 0.0,
|
| 11 |
+
"failure_visible_rate": 0.0
|
| 12 |
+
},
|
| 13 |
+
"gate": {
|
| 14 |
+
"avg_reward_up": false,
|
| 15 |
+
"legality_up": true,
|
| 16 |
+
"success_up": true
|
| 17 |
+
},
|
| 18 |
+
"improved": false
|
| 19 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"runs": [
|
| 4 |
+
{
|
| 5 |
+
"run": 0,
|
| 6 |
+
"provider": "transformers",
|
| 7 |
+
"candidate_id": "cand_04",
|
| 8 |
+
"latency_ms": 2313.731,
|
| 9 |
+
"rationale": "Transformers fallback selected cand_04 via local ranker."
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"run": 1,
|
| 13 |
+
"provider": "transformers",
|
| 14 |
+
"candidate_id": "cand_02",
|
| 15 |
+
"latency_ms": 0.012,
|
| 16 |
+
"rationale": "Transformers fallback selected cand_02 via local ranker."
|
| 17 |
+
}
|
| 18 |
+
],
|
| 19 |
+
"avg_latency_ms": 1156.872,
|
| 20 |
+
"provider_requested": "transformers",
|
| 21 |
+
"model": "Qwen/Qwen2.5-0.5B-Instruct"
|
| 22 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/legality_rate.png
ADDED
|
submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"avg_reward": 0.77625,
|
| 3 |
+
"legality_rate": 1.0,
|
| 4 |
+
"severe_violation_rate": 0.0,
|
| 5 |
+
"abstention_rate": 0.0,
|
| 6 |
+
"avg_episode_length": 2.0,
|
| 7 |
+
"success_rate": 0.0,
|
| 8 |
+
"avg_burden_delta": 0.0,
|
| 9 |
+
"avg_safety_delta": 0.5,
|
| 10 |
+
"avg_dosing_quality": 0.75,
|
| 11 |
+
"avg_process_fidelity": 0.92,
|
| 12 |
+
"exploit_detection_count": 4.0,
|
| 13 |
+
"reward_columns": {
|
| 14 |
+
"format_compliance_score": 0.9990000000000001,
|
| 15 |
+
"candidate_alignment_score": 0.9990000000000001,
|
| 16 |
+
"legality_score": 0.9990000000000001,
|
| 17 |
+
"safety_delta_score": 0.5,
|
| 18 |
+
"burden_improvement_score": 0.5,
|
| 19 |
+
"disease_stability_score": 0.9000000000000002,
|
| 20 |
+
"dosing_quality_score": 0.75,
|
| 21 |
+
"abstention_quality_score": 0.5600000000000002,
|
| 22 |
+
"efficiency_score": 0.73,
|
| 23 |
+
"process_fidelity_score": 0.92,
|
| 24 |
+
"explanation_grounding_score": 0.7999999999999999,
|
| 25 |
+
"anti_cheat_score": 0.6663333333333333,
|
| 26 |
+
"uncertainty_calibration_score": 0.8699999999999998
|
| 27 |
+
}
|
| 28 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/plot_index.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"plots": [
|
| 3 |
+
"/app/outputs/plots/avg_reward.png",
|
| 4 |
+
"/app/outputs/plots/legality_rate.png",
|
| 5 |
+
"/app/outputs/plots/success_rate.png",
|
| 6 |
+
"/app/outputs/plots/avg_process_fidelity.png",
|
| 7 |
+
"/app/outputs/plots/policy_stack_avg_reward.png"
|
| 8 |
+
]
|
| 9 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png
ADDED
|
submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"status": "ok",
|
| 3 |
+
"model_source": "merged",
|
| 4 |
+
"model_load_error": "",
|
| 5 |
+
"samples": 3,
|
| 6 |
+
"valid_rate": 1.0,
|
| 7 |
+
"avg_env_reward": 0.712333,
|
| 8 |
+
"results": [
|
| 9 |
+
{
|
| 10 |
+
"idx": 0,
|
| 11 |
+
"prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
|
| 12 |
+
"completion": "}\n\nTo choose a candidate ID, I will follow these steps:\n\n1. **Review the list of candidates**: The provided list includes six candidate IDs: `cand_01`, `cand_02`, `cand_03`, `cand_04`, `cand_05`, `cand_08`, `cand_09`, and",
|
| 13 |
+
"candidate_id": "cand_01",
|
| 14 |
+
"selected_candidate": "cand_01",
|
| 15 |
+
"env_reward": 0.717,
|
| 16 |
+
"done": true,
|
| 17 |
+
"valid": true,
|
| 18 |
+
"termination_reason": "exploit_detection"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"idx": 1,
|
| 22 |
+
"prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
|
| 23 |
+
"completion": "}\n\nSure, I can help you choose a candidate ID based on the information provided. However, I need to know which of the given candidates is your patient ID so that I can provide a suitable rationale for choosing that specific candidate.\n\nPlease let me know which candidate ID you are referring to, and I will proceed with selecting the appropriate candidate ID and providing the rationale accordingly. If you don't have a",
|
| 24 |
+
"candidate_id": null,
|
| 25 |
+
"selected_candidate": "cand_01",
|
| 26 |
+
"env_reward": 0.703,
|
| 27 |
+
"done": true,
|
| 28 |
+
"valid": true,
|
| 29 |
+
"termination_reason": "exploit_detection"
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"idx": 2,
|
| 33 |
+
"prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
|
| 34 |
+
"completion": "}\n\nTo choose a candidate ID for the patient with the identifier `patient_8002`, I will follow these steps:\n\n1. **Identify the Patient Identifier**: The patient's identifier is `patient_8002`.\n\n2. **List of Candidate IDs**: The list of candidate IDs provided is:\n - cand_07\n - cand_01\n -",
|
| 35 |
+
"candidate_id": "cand_07",
|
| 36 |
+
"selected_candidate": "cand_01",
|
| 37 |
+
"env_reward": 0.717,
|
| 38 |
+
"done": true,
|
| 39 |
+
"valid": true,
|
| 40 |
+
"termination_reason": "exploit_detection"
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
}
|
submission_bundle/grpo_training_cycle/docs_results/risk_train.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_size": 180.0,
|
| 3 |
+
"status": "trained",
|
| 4 |
+
"train_mae": 0.0033,
|
| 5 |
+
"model_path": "outputs/models/tabular_risk.pkl"
|
| 6 |
+
}
|