Spaces:

Otter21
/

Gov_Workflow_RL

Running

App Files Files Community

Siddharaj Shirke commited on 10 days ago

Commit

df97e68

0 Parent(s):

deploy: clean code-only snapshot for HF Space

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +73 -0
.env.example +46 -0
.gitignore +67 -0
Dockerfile +51 -0
README.md +191 -0
app/README.md +23 -0
app/__init__.py +20 -0
app/api_gateway.py +257 -0
app/baselines.py +161 -0
app/config.py +87 -0
app/engine.py +1712 -0
app/env.py +553 -0
app/event_engine.py +101 -0
app/graders.py +176 -0
app/main.py +0 -0
app/models.py +509 -0
app/persistence.py +335 -0
app/reward.py +108 -0
app/sector_profiles.py +183 -0
app/signal_computer.py +81 -0
app/simulator.py +1106 -0
app/state_machine.py +107 -0
app/story_router.py +407 -0
app/tasks.py +144 -0
app/training_jobs.py +634 -0
app/utils.py +25 -0
audit.py +367 -0
baseline_openai.py +983 -0
client.py +134 -0
docs/FRONTEND_WORKFLOW.md +48 -0
docs/PHASE2_IMPLEMENTATION.md +41 -0
docs/PHASE3_IMPLEMENTATION.md +39 -0
docs/PROJECT_STRUCTURE.md +41 -0
frontend/react/.gitignore +2 -0
frontend/react/README.md +24 -0
frontend/react/index.html +16 -0
frontend/react/package-lock.json +2050 -0
frontend/react/package.json +22 -0
frontend/react/postcss.config.js +6 -0
frontend/react/src/App.jsx +21 -0
frontend/react/src/api/client.js +131 -0
frontend/react/src/components/Charts.jsx +142 -0
frontend/react/src/components/Layout.jsx +33 -0
frontend/react/src/components/story-ui/Dashboard.jsx +1589 -0
frontend/react/src/components/story-ui/TrainingTabV2.jsx +1760 -0
frontend/react/src/hooks/useStorySimulation.js +474 -0
frontend/react/src/main.jsx +15 -0
frontend/react/src/styles.css +525 -0
frontend/react/tailwind.config.js +100 -0
frontend/react/vite.config.js +20 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,73 @@

+# VCS / local env
+.git/
+.gitignore
+.venv/
+.venv313/
+.venv311/
+.env
+.env.*
+!.env.example
+# Python cache/build
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+# Frontend cache/deps
+frontend/react/node_modules/
+frontend/react/.vite/
+frontend/react/.vite-temp/
+frontend/react/dist/
+.npm-cache/
+.vite/
+# Runtime/generated data not needed in image build context
+logs/
+reports/
+outputs/
+data/
+results/training_runs/
+results/runs/
+results/eval_logs/
+results/best_model/archived/
+artifacts/
+results/prevalidation_*.log
+# Test/dev-only assets
+.pytest_cache/
+.tmp/
+docs/
+examples/
+tests/
+gov_workflow_openenv_tests/
+pip_bootstrap/
+test_results.txt
+test_rl_output*.txt
+tests/test_output*.txt
+tests/test_run.txt
+phase1_validation.py
+test_phase2.py
+old_simulator.py
+restore_simulator.py
+# Non-runtime docs/notebooks
+GovWorkflow_RL_ENV.ipynb
+Blog.md
+uv.lock
+*.backup
+# IDE/OS noise
+.vscode/
+.idea/
+*.swp
+Thumbs.db
+.DS_Store
+# Legacy static shell not used in deployed image
+app/web/app.js
+app/web/index.html
+app/web/react_app.js
+app/web/styles.css

.env.example ADDED Viewed

	@@ -0,0 +1,46 @@

+# Standard OpenEnv / inference variables
+API_BASE_URL=https://integrate.api.nvidia.com/v1
+MODEL_NAME=meta/llama-3.3-70b-instruct
+HF_TOKEN=
+OPENAI_API_KEY=
+API_KEY=
+LOCAL_IMAGE_NAME=gov-workflow-openenv:latest
+MAX_STEPS=80
+SUCCESS_SCORE_THRESHOLD=0.50
+# Provider-specific API base URLs (used by frontend simulation bridge)
+OPENAI_API_BASE_URL=https://api.openai.com/v1
+NVIDIA_API_BASE_URL=https://integrate.api.nvidia.com/v1
+# Optional fallback model lists (comma-separated)
+MODEL_FALLBACKS=
+NVIDIA_MODEL_FALLBACKS=
+# NVIDIA Build API (fallback / internal)
+# Copy this file to .env and fill in your values
+# Get your key at: https://build.nvidia.com/explore/discover
+NVIDIA_API_KEY=nvapi-your-key-here
+NVIDIA_API_KEY_2=
+# LLM Model Selection
+NVIDIA_MODEL=meta/llama-3.3-70b-instruct
+# Server Settings
+SERVER_HOST=0.0.0.0
+SERVER_PORT=7860
+SERVER_LOG_LEVEL=info
+SERVER_WORKERS=1
+# Environment Settings
+ENV_DEFAULT_TASK_ID=district_backlog_easy
+ENV_DEFAULT_SEED=11
+ENV_MAX_SESSIONS=100
+ENV_MAX_STEPS_PER_EPISODE=500
+# API Throttling
+LLM_CALL_DELAY=12.0
+# Persistence (SQLite + filesystem)
+# For Hugging Face persistent storage, set OPENENV_DATA_DIR=/data/openenv_rl
+STORAGE_ENABLED=true
+OPENENV_DATA_DIR=

.gitignore ADDED Viewed

	@@ -0,0 +1,67 @@

+# Environment secrets - NEVER commit .env
+.env
+.env.local
+.env.production
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+.venv313/
+.venv311/
+*.egg-info/
+dist/
+build/
+# pytest
+.pytest_cache/
+# Local temp/bootstrap
+.tmp/
+pip_bootstrap/
+# Runtime outputs
+outputs/
+logs/
+reports/
+data/
+results/training_runs/
+results/runs/
+results/eval_logs/
+results/best_model/archived/
+artifacts/
+# Frontend build cache/deps
+frontend/react/node_modules/
+frontend/react/.vite/
+frontend/react/.vite-temp/
+frontend/react/dist/
+.vite/
+.npm-cache/
+# Docker/local deployment overrides
+docker-compose.override.yml
+*.local.env
+*.backup
+# Local test artifacts
+test_results.txt
+test_rl_output*.txt
+tests/test_output*.txt
+tests/test_run.txt
+# Pre-submission validation artifacts
+scripts/validate-submission.sh
+results/prevalidation_docker_build.log
+results/prevalidation_*.log
+# Keep benchmark Phase 1 model in Git for Colab/Kaggle transfer
+!results/best_model/phase1/phase1_final.zip
+# Legacy static shell (superseded by Vite bundle)
+app/web/app.js
+app/web/index.html
+app/web/react_app.js
+app/web/styles.css

Dockerfile ADDED Viewed

	@@ -0,0 +1,51 @@

+# Gov Workflow OpenEnv
+# Multi-stage image:
+# 1) build Vite frontend assets
+# 2) run FastAPI backend and serve built UI under /ui
+FROM node:20-slim AS frontend-build
+WORKDIR /web
+COPY frontend/react/package.json frontend/react/package-lock.json ./frontend/react/
+RUN cd frontend/react && npm ci --no-audit --no-fund
+COPY frontend/react ./frontend/react
+RUN cd frontend/react && npm run build
+FROM python:3.11-slim AS runtime
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    OPENENV_DATA_DIR=/data/openenv_rl \
+    STORAGE_ENABLED=true \
+    PORT=7860
+WORKDIR /app
+# Runtime OS dependencies (torch/sb3 commonly require libgomp at runtime)
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt requirements_rl.txt ./
+RUN python -m pip install --upgrade pip \
+    && python -m pip install -r requirements.txt \
+    && python -m pip install -r requirements_rl.txt
+COPY . .
+COPY --from=frontend-build /web/frontend/react/dist ./app/web/vite_dist
+RUN mkdir -p /data/openenv_rl \
+    && useradd --create-home --uid 10001 appuser \
+    && chown -R appuser:appuser /app /data/openenv_rl
+USER appuser
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:7860/health', timeout=3)" || exit 1
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md ADDED Viewed

	@@ -0,0 +1,191 @@

+---
+title: Gov Workflow OpenEnv
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Gov Workflow OpenEnv
+## Quick Links
+- Hugging Face Space URL (Dummy, update later): [https://huggingface.co/spaces/your-username/your-space-name](https://huggingface.co/spaces/your-username/your-space-name)
+  This placeholder will be replaced with the final deployed demo link.
+- Blog path in codebase: `OPENENV_RL/Blog.md`
+  Project write-up and narrative documentation for design choices and outcomes.
+- Notebook path: `OPENENV_RL/GovWorkflow_RL_ENV.ipynb`
+  Main OpenEnv RL government workflow notebook used as the judge-facing criteria book. It contains the practical judging context, environment setup, and the full end-to-end flow in one place.
+- Notebook Colab URL: [https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing](https://colab.research.google.com/drive/1ssTnxKoU1nOfSNA3nOeiNM8S4fKFpkby?usp=sharing)
+  Cloud version of the same notebook so judges can run and review the complete workflow without local setup.
+- GRPO Phase 1 training link: [https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing](https://colab.research.google.com/drive/1ND_DZ6xcT2JuH7uGB2AYbiZ1dcHKFfIw?usp=sharing)
+  First-stage GRPO training run where the LLM agent starts learning policy behavior inside the RL environment.
+- GRPO Phase 2 training link: [https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing](https://colab.research.google.com/drive/1ofxEADct_gTX5DGhcnk8lW6p31gFCIFV?usp=sharing)
+  Second-stage GRPO continuation where the same LLM agent is further trained and refined on the RL environment.
+- PPO Phase 1 training (local): `rl/train_ppo.py`
+  Phase 1 PPO baseline training was executed on the local system to establish the RL algorithm baseline before phase-2 progression.
+- PPO Phase 2 training link: [https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing](https://colab.research.google.com/drive/1RVXQs-QAuXLBw0YXJtN4cbEootCTfHO7?usp=sharing)
+  PPO phase 2 training notebook where the RL algorithm is further trained on the same environment for improved policy performance.
+Gov Workflow OpenEnv is a FastAPI-first simulation environment for public service workflow operations.
+It models queue prioritization, officer allocation, missing-document recovery, escalation usage, and fairness-aware SLA management across government services.
+This repository is productionized for:
+- local development (FastAPI + Vite)
+- Docker runtime
+- Hugging Face Spaces (Docker SDK)
+## Current Main-Branch Status
+This README is aligned to the current `main` branch code paths, including:
+- `app.main:app` as primary server runtime
+- React UI served at `/ui` from built Vite assets when available
+- OpenEnv contract endpoints (`/reset`, `/step`, `/state`, `/grade`)
+- frontend API aliases (`/api/*`) and versioned aliases (`/api/v1/*`)
+- training story endpoints (`/training/*`)
+- simulation, RL, persistence, compliance, and history endpoints
+## End-to-End Architecture
+```mermaid
+flowchart LR
+  UI["React UI"] --> API["FastAPI app.main"]
+  API --> ENV["GovWorkflowEnv app/env.py"]
+  API --> SIM["Simulation runtime app/simulator.py"]
+  API --> RL["RL train/eval rl/*"]
+  API --> STORE["PersistenceStore SQLite + filesystem"]
+  API --> STORY["Training Story router /training/*"]
+  API --> OPENENV["Optional OpenEnv adapter /openenv/*"]
+```
+## Core Runtime Components
+- API server: `app/main.py`
+- Environment kernel: `app/env.py`
+- Typed models: `app/models.py`
+- Task registry: `app/tasks.py`
+- Reward shaping: `app/reward.py`
+- Deterministic graders: `app/graders.py`
+- Simulation runtime: `app/simulator.py`
+- Training jobs manager: `app/training_jobs.py`
+- Persistence layer: `app/persistence.py`
+- Transport gateway: `app/api_gateway.py`
+- React frontend: `frontend/react`
+## Task Set (Current Runtime)
+Configured in `app/tasks.py`:
+- `district_backlog_easy`
+- `mixed_urgency_medium`
+- `cross_department_hard`
+- `district_backlog_easy_extreme`
+Benchmark list used by APIs:
+- `district_backlog_easy`
+- `mixed_urgency_medium`
+- `cross_department_hard`
+## Service Coverage
+`ServiceType` includes:
+- `passport`
+- `driving_license`
+- `aadhaar_card`
+- `gst_registration`
+- `income_certificate`
+- `caste_certificate`
+- `birth_certificate`
+- `land_registration`
+Medium and hard tasks currently run with:
+- `income_certificate`
+- `land_registration`
+- `passport`
+- `driving_license`
+- `aadhaar_card`
+## Local Development
+### Prerequisites
+- Python 3.11+
+- Node 20+
+- Docker
+### Install dependencies
+```bash
+pip install -r requirements.txt
+pip install -r requirements_rl.txt
+pip install pytest pytest-asyncio
+npm --prefix frontend/react install
+```
+### Configure environment
+```bash
+copy .env.example .env
+```
+Populate as needed:
+- `API_BASE_URL`
+- `MODEL_NAME`
+- `HF_TOKEN` or `OPENAI_API_KEY`/`API_KEY`
+- optional NVIDIA keys (`NVIDIA_API_KEY`, `NVIDIA_API_KEY_2`)
+- storage settings (`STORAGE_ENABLED`, `OPENENV_DATA_DIR`)
+### Run backend
+```bash
+python scripts/run_local.py --host 127.0.0.1 --port 7860 --reload
+```
+### Run frontend
+```bash
+npm --prefix frontend/react run dev
+```
+Open:
+- UI: `http://127.0.0.1:5173/ui`
+- API docs: `http://127.0.0.1:7860/docs`
+## Repository Layout
+```text
+app/
+  main.py               FastAPI app + API routing + compatibility aliases
+  env.py                GovWorkflowEnv kernel
+  models.py             Typed Pydantic contracts
+  tasks.py              Runtime task registry
+  reward.py             Reward shaping
+  graders.py            Deterministic graders
+  simulator.py          Simulation runtime and live sessions
+  training_jobs.py      Background RL training manager
+  persistence.py        SQLite/filesystem persistence
+  api_gateway.py        direct/http/auto environment transport layer
+  story_router.py       training story endpoints
+rl/
+  gov_workflow_env.py   Gym adapter
+  train_ppo.py          PPO phase training entrypoint
+  evaluate.py           Checkpoint evaluator
+  feature_builder.py    RL feature engineering
+  action_mask.py        Action mask logic
+frontend/react/
+  src/                  React modules/components/api hooks
+scripts/
+  run_local.py          Local FastAPI launcher
+  convert_grpo_csv.py   Training CSV to JSON converter for story endpoints
+openenv.yaml            OpenEnv manifest metadata
+baseline_openai.py      Baseline and LLM runner
+inference.py            Submission-style inference runner
+Dockerfile              Docker image definition
+```
+## License
+BSD-3-Clause

app/README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# app/
+Core environment and API layer.
+- `main.py`: FastAPI app and endpoints
+- `env.py`: GovWorkflowEnv simulation kernel
+- `models.py`: Pydantic action/observation/reward/state models
+- `tasks.py`: easy/medium/hard deterministic task configs
+- `graders.py`: deterministic task scoring (0.0 to 1.0)
+- `reward.py`: dense reward breakdown
+- `baselines.py`: heuristic baseline policies
+- `web/`: frontend assets served by FastAPI at `/ui`
+  - `vite_dist/`: production Vite build output copied during Docker build
+  - legacy files (`index.html`, `react_app.js`, `styles.css`) remain as local fallback
+Additional frontend-focused APIs in `main.py`:
+- `/api/workflows/components`
+- `/api/workflows/run`
+- `/api/rl/models`
+- `/api/rl/run`
+- `/api/rl/evaluate`
+- `/api/simulation/run`
+- `/api/training/jobs`

app/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# from app.env import GovWorkflowEnv
+from app.models import ActionModel, ObservationModel, RewardModel
+try:
+    from client import GovWorkflowClient
+except ModuleNotFoundError:
+    GovWorkflowClient = None  # type: ignore[assignment]
+GovWorkflowAction = ActionModel
+GovWorkflowObservation = ObservationModel
+__all__ = [
+    "ActionModel",
+    "ObservationModel",
+    "RewardModel",
+    "GovWorkflowAction",
+    "GovWorkflowObservation",
+#     "GovWorkflowEnv",
+    "GovWorkflowClient",
+]

app/api_gateway.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""
+Unified environment transport layer.
+This module centralizes environment access so callers can use:
+  - FastAPI HTTP transport
+  - direct in-process transport
+  - dynamic auto selection
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import os
+from typing import Literal, Protocol
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ObservationModel, StepInfoModel
+TransportMode = Literal["auto", "http", "direct"]
+class EnvGateway(Protocol):
+    transport: TransportMode
+    terminated: bool
+    truncated: bool
+    def reset(self) -> ObservationModel: ...
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]: ...
+    def grade(self) -> tuple[float, str, dict[str, float]]: ...
+    def close(self) -> None: ...
+@dataclass
+class DirectEnvGateway:
+    task_id: str
+    seed: int
+    transport: TransportMode = "direct"
+    def __post_init__(self) -> None:
+        self._env = GovWorkflowEnv(task_id=self.task_id)
+        self.terminated = False
+        self.truncated = False
+    def reset(self) -> ObservationModel:
+        obs, _ = self._env.reset(seed=self.seed)
+        self.terminated = False
+        self.truncated = False
+        return obs
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        obs, reward, terminated, truncated, info = self._env.step(action)
+        self.terminated = bool(terminated)
+        self.truncated = bool(truncated)
+        return obs, float(reward), bool(terminated), bool(truncated), info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        result = grade_episode(self._env.state())
+        return float(result.score), str(result.grader_name), dict(result.metrics)
+    def close(self) -> None:
+        close_fn = getattr(self._env, "close", None)
+        if callable(close_fn):
+            close_fn()
+@dataclass
+class HttpEnvGateway:
+    task_id: str
+    seed: int
+    base_url: str
+    api_prefix: str | None = None
+    transport: TransportMode = "http"
+    def __post_init__(self) -> None:
+        try:
+            import requests as _requests
+        except ImportError as exc:
+            raise ImportError("requests is required for HTTP transport.") from exc
+        self._requests = _requests
+        self._session_id: str | None = None
+        self.terminated = False
+        self.truncated = False
+        self.base_url = self.base_url.rstrip("/")
+        self._resolved_prefix = self._normalize_prefix(self.api_prefix)
+    @staticmethod
+    def _normalize_prefix(prefix: str | None) -> str:
+        if prefix is None:
+            return ""
+        p = str(prefix).strip()
+        if not p:
+            return ""
+        if not p.startswith("/"):
+            p = "/" + p
+        return p.rstrip("/")
+    @staticmethod
+    def _candidate_prefixes(explicit_prefix: str | None) -> list[str]:
+        normalized_explicit = HttpEnvGateway._normalize_prefix(explicit_prefix)
+        if normalized_explicit:
+            return [normalized_explicit]
+        env_prefix = HttpEnvGateway._normalize_prefix(os.getenv("OPENENV_ENV_API_PREFIX", ""))
+        configured_candidates = os.getenv("OPENENV_ENV_API_PREFIX_CANDIDATES", "")
+        candidates: list[str] = []
+        for item in [env_prefix, *configured_candidates.split(",")]:
+            normalized = HttpEnvGateway._normalize_prefix(item)
+            if normalized not in candidates:
+                candidates.append(normalized)
+        # Ordered fallbacks: versioned API -> frontend API -> root OpenEnv API.
+        for fallback in ["/api/v1", "/api", ""]:
+            if fallback not in candidates:
+                candidates.append(fallback)
+        return candidates
+    def _resolve_prefix(self) -> str:
+        if self._resolved_prefix:
+            return self._resolved_prefix
+        for prefix in self._candidate_prefixes(self.api_prefix):
+            try:
+                response = self._requests.get(
+                    f"{self.base_url}{prefix}/health",
+                    timeout=3,
+                )
+                if response.ok:
+                    self._resolved_prefix = prefix
+                    return self._resolved_prefix
+            except Exception:
+                continue
+        self._resolved_prefix = ""
+        return self._resolved_prefix
+    def _url(self, path: str) -> str:
+        return f"{self.base_url}{self._resolve_prefix()}{path}"
+    def _post(self, path: str, body: dict) -> dict:
+        response = self._requests.post(
+            self._url(path),
+            json=body,
+            timeout=30,
+        )
+        response.raise_for_status()
+        return response.json()
+    def reset(self) -> ObservationModel:
+        payload = {"task_id": self.task_id, "seed": self.seed}
+        data = self._post("/reset", payload)
+        self._session_id = str(data["session_id"])
+        self.terminated = False
+        self.truncated = False
+        return ObservationModel(**data["observation"])
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        if not self._session_id:
+            raise RuntimeError("Session is not initialized. Call reset() first.")
+        data = self._post(
+            "/step",
+            {
+                "session_id": self._session_id,
+                "action": action.model_dump(exclude_none=True, mode="json"),
+            },
+        )
+        obs = ObservationModel(**data["observation"])
+        info = StepInfoModel(**data["info"])
+        self.terminated = bool(data["terminated"])
+        self.truncated = bool(data["truncated"])
+        return (
+            obs,
+            float(data["reward"]),
+            bool(data["terminated"]),
+            bool(data["truncated"]),
+            info,
+        )
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        if not self._session_id:
+            raise RuntimeError("Session is not initialized. Call reset() first.")
+        data = self._post("/grade", {"session_id": self._session_id})
+        return (
+            float(data["score"]),
+            str(data["grader_name"]),
+            dict(data.get("metrics", {})),
+        )
+    def close(self) -> None:
+        if not self._session_id:
+            return
+        try:
+            self._requests.delete(self._url(f"/sessions/{self._session_id}"), timeout=10)
+        except Exception:
+            pass
+        self._session_id = None
+def _http_reachable(base_url: str) -> bool:
+    try:
+        import requests
+        r = requests.get(f"{base_url.rstrip('/')}/health", timeout=3)
+        return bool(r.ok)
+    except Exception:
+        return False
+def create_env_gateway(
+    *,
+    task_id: str,
+    seed: int,
+    mode: TransportMode = "auto",
+    base_url: str = "http://127.0.0.1:7860",
+    api_prefix: str | None = None,
+    enforce_fastapi: bool = False,
+) -> EnvGateway:
+    """
+    Create environment gateway with dynamic transport selection.
+    Behavior:
+      - mode=http    -> always HTTP
+      - mode=direct  -> always direct (unless enforce_fastapi=True)
+      - mode=auto    -> HTTP if /health reachable, else direct fallback
+    """
+    if enforce_fastapi and mode == "direct":
+        raise RuntimeError("Direct transport is disabled. Set mode to 'http' or 'auto'.")
+    if mode == "http":
+        return HttpEnvGateway(task_id=task_id, seed=seed, base_url=base_url, api_prefix=api_prefix)
+    if mode == "direct":
+        return DirectEnvGateway(task_id=task_id, seed=seed)
+    if _http_reachable(base_url):
+        return HttpEnvGateway(
+            task_id=task_id,
+            seed=seed,
+            base_url=base_url,
+            api_prefix=api_prefix,
+            transport="auto",
+        )
+    if enforce_fastapi:
+        raise RuntimeError(
+            f"FastAPI gateway is required but unavailable at {base_url}. "
+            "Start the API server or disable FORCE_FASTAPI_GATEWAY."
+        )
+    return DirectEnvGateway(task_id=task_id, seed=seed, transport="auto")

app/baselines.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from __future__ import annotations
+from collections.abc import Callable
+from types import SimpleNamespace
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
+PolicyFn = Callable[[ObservationModel], ActionModel]
+def _snapshots(obs: ObservationModel):
+    """Return queue snapshots as a list regardless of Phase 1 (list) or Phase 2 (dict)."""
+    qs = obs.queue_snapshots
+    if isinstance(qs, dict):
+        return list(qs.values())
+    return list(qs)
+def _service_attr(q, *attrs):
+    """Return the first attribute that exists on a QueueSnapshot (Phase 1 vs Phase 2 names)."""
+    for attr in attrs:
+        val = getattr(q, attr, None)
+        if val is not None:
+            return val
+    return 0
+def _service_name(q) -> ServiceType:
+    """Return ServiceType regardless of Phase 1 (.service) or Phase 2 (.service_type)."""
+    return getattr(q, "service_type", None) or getattr(q, "service", None)
+def _service_with_max(obs: ObservationModel, *attrs) -> ServiceType | None:
+    snaps = _snapshots(obs)
+    ranked = sorted(snaps, key=lambda s: _service_attr(s, *attrs), reverse=True)
+    if ranked and _service_attr(ranked[0], *attrs) > 0:
+        return _service_name(ranked[0])
+    return None
+def _reserve_officers(obs: ObservationModel) -> int:
+    pool = obs.officer_pool
+    # Phase 2: idle_officers property
+    if hasattr(pool, "idle_officers"):
+        return int(pool.idle_officers)
+    # Phase 1 fallback
+    return int(getattr(pool, "reserve_officers", 0))
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    pool = obs.officer_pool
+    # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
+    alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
+    raw = alloc_dict.get(service)
+    if raw is None:
+        raw = alloc_dict.get(service.value if hasattr(service, "value") else str(service), 0)
+    return int(raw or 0)
+def urgent_first_policy(obs: ObservationModel) -> ActionModel:
+    target = _service_with_max(obs, "urgent_pending", "urgent_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def oldest_first_policy(obs: ObservationModel) -> ActionModel:
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def backlog_clearance_policy(obs: ObservationModel) -> ActionModel:
+    snaps = _snapshots(obs)
+    # Assign idle officers to the most backlogged service
+    if _reserve_officers(obs) > 0:
+        target = _service_with_max(obs, "total_pending", "active_cases")
+        if target:
+            return ActionModel(
+                action_type=ActionType.ASSIGN_CAPACITY,
+                service_target=target,
+                capacity_assignment={target.value: 1},
+            )
+    # Clear missing-doc bottlenecks
+    target = _service_with_max(obs, "blocked_missing_docs", "missing_docs_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    # Reallocate from least-loaded to most-loaded
+    if len(snaps) >= 2:
+        hot = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"), reverse=True)
+        cold = sorted(snaps, key=lambda s: _service_attr(s, "total_pending", "active_cases"))
+        hot_svc = _service_name(hot[0])
+        cold_svc = _service_name(cold[0])
+        hot_load = _service_attr(hot[0], "total_pending", "active_cases")
+        cold_load = _service_attr(cold[0], "total_pending", "active_cases")
+        if (
+            hot_svc and cold_svc and hot_svc != cold_svc
+            and hot_load - cold_load >= 3
+            and _alloc_for(obs, cold_svc) > 1
+        ):
+            return ActionModel(
+                action_type=ActionType.REALLOCATE_OFFICERS,
+                service_target=cold_svc,
+                reallocation_delta={cold_svc.value: -1, hot_svc.value: 1},
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def greedy_sla_policy(obs: ObservationModel) -> ActionModel:
+    """SLA-focused fallback policy used by historical aliases."""
+    target = _service_with_max(obs, "urgent_pending", "urgent_cases", "breached_cases")
+    if target:
+        return ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service_target=target)
+    return backlog_clearance_policy(obs)
+def random_policy(obs: ObservationModel) -> ActionModel:
+    import random
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+urgent_first_policy = greedy_sla_policy
+fairness_aware_policy = backlog_clearance_policy
+POLICIES: dict[str, PolicyFn] = {
+    "urgent_first":      greedy_sla_policy,
+    "oldest_first":      oldest_first_policy,
+    "backlog_clearance": backlog_clearance_policy,
+    "random_policy":     random_policy,
+    "greedy_sla_policy": greedy_sla_policy,
+    "fairness_aware_policy": fairness_aware_policy,
+}
+def run_policy_episode(task_id: str, policy_name: str, seed: int | None = None, max_steps: int = 500) -> dict:
+    env = GovWorkflowEnv(task_id=task_id)
+    obs, _ = env.reset(seed=seed)
+    policy = POLICIES[policy_name]
+    reward_sum = 0.0
+    for _ in range(max_steps):
+        action = policy(obs)
+        obs, reward, terminated, truncated, _ = env.step(action)
+        reward_sum += reward
+        if terminated or truncated:
+            break
+    state = env.state()
+    grade = grade_episode(state)
+    # Return a SimpleNamespace so attribute access (result.score) works in main.py
+    return SimpleNamespace(
+        task_id=task_id,
+        policy=policy_name,
+        seed=state.seed,
+        reward_sum=round(reward_sum, 4),
+        score=float(grade.score),
+        grader=grade.grader_name,
+        metrics=grade.metrics,
+        steps=int(state.total_steps),
+        completed=int(state.total_completed),
+        backlog=int(state.total_backlog),
+    )

app/config.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# ── Path bootstrap ─────────────────────────────────────────────────────────────
+from __future__ import annotations
+from pathlib import Path
+# Load .env file if it exists — must happen before Pydantic Settings reads env vars
+try:
+    from dotenv import load_dotenv
+except (ImportError, AttributeError):
+    # Keep runtime functional even when python-dotenv is not installed
+    # or when a conflicting `dotenv` package is present.
+    def load_dotenv(*args, **kwargs):  # type: ignore[no-redef]
+        return False
+_ENV_FILE = Path(__file__).resolve().parent.parent / ".env"
+load_dotenv(dotenv_path=_ENV_FILE, override=False)
+# override=False means real environment variables always win over .env values
+# ──────────────────────────────────────────────────────────────────────────────
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class ServerSettings(BaseSettings):
+    """
+    HTTP-server configuration.
+    Read from environment variables prefixed SERVER_.
+    Example: SERVER_PORT=8080  SERVER_LOG_LEVEL=debug
+    Intentionally isolated from EnvSettings — changing server bind
+    options never affects simulation behaviour, and vice-versa.
+    Both classes are instantiated once at import and treated as
+    read-only singletons for the lifetime of the process.
+    """
+    host: str = Field("0.0.0.0", description="Bind host")
+    port: int = Field(7860, description="Bind port — HF Spaces default is 7860")
+    log_level: str = Field(
+        "info", description="Uvicorn log level: debug | info | warning | error"
+    )
+    cors_origins: list[str] = Field(
+        default=["*"],
+        description="Allowed CORS origins. '*' is required for HF Spaces embedding.",
+    )
+    # NOTE: Keep at 1 when using the in-memory session store.
+    # Multiple workers do NOT share process memory.
+    # Use Redis + a shared store before increasing workers in production.
+    workers: int = Field(
+        1, description="Uvicorn worker count — keep at 1 for in-memory sessions"
+    )
+    model_config = SettingsConfigDict(env_prefix="SERVER_", extra="ignore")
+class EnvSettings(BaseSettings):
+    """
+    Simulation-environment defaults.
+    Read from environment variables prefixed ENV_.
+    Example: ENV_DEFAULT_TASK_ID=mixed_urgency_medium  ENV_MAX_SESSIONS=50
+    Controls the environment kernel only. No effect on network
+    binding, logging, or CORS — those belong to ServerSettings.
+    """
+    default_task_id: str = Field(
+        "district_backlog_easy",
+        description="Task used when POST /reset is called without an explicit task_id",
+    )
+    default_seed: int = Field(
+        11,
+        description="Seed used when POST /reset is called without an explicit seed",
+    )
+    max_steps_per_episode: int = Field(
+        500,
+        description="Hard cap on step() calls per session before episode is truncated",
+    )
+    max_sessions: int = Field(
+        100,
+        description="Maximum concurrent in-memory sessions. Oldest is evicted when exceeded.",
+    )
+    model_config = SettingsConfigDict(env_prefix="ENV_", extra="ignore")
+# ── Singletons ────────────────────────────────────────────────────────────────
+# Loaded exactly once at import time. Never mutated at runtime.
+# Tests may monkeypatch individual fields after import if needed.
+server_settings = ServerSettings()
+env_settings = EnvSettings()

app/engine.py ADDED Viewed

	@@ -0,0 +1,1712 @@

+from __future__ import annotations
+import json
+import os
+import random
+import re
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal, Optional
+from openai import OpenAI
+from app.event_engine import EventEngine
+from app.models import (
+    ActionModel,
+    ActionType,
+    ApplicationCase,
+    DelayedEffect,
+    EventType,
+    IntakeChannel,
+    InternalSubstate,
+    ObservationModel,
+    PriorityMode,
+    QueueSnapshot,
+    ServiceType,
+    StageType,
+)
+from app.sector_profiles import get_sector_profile
+from app.state_machine import can_advance
+if TYPE_CHECKING:
+    from app.models import TaskConfig
+LEGACY_NVIDIA_MODEL_POOL = [
+    "meta/llama-3.3-70b-instruct",
+    "qwen/qwen3-next-80b-a3b-instruct",
+    "moonshotai/kimi-k2-instruct-0905",
+    "meta/llama-3.1-405b-instruct",
+    "deepseek-ai/deepseek-v3.2",
+    "qwen/qwq-32b",
+    "mistralai/mixtral-8x22b-instruct-v0.1",
+    "google/gemma-3-27b-it",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+_MODEL_CACHE: dict[tuple[str, str], Any] = {}
+# ─────────────────────────────────────────────
+# DAY RESULT
+# ─────────────────────────────────────────────
+class DayResult:
+    def __init__(self) -> None:
+        self.new_arrivals: int = 0
+        self.new_completions: int = 0
+        self.new_sla_breaches: int = 0
+        self.total_capacity_days: int = 0
+        self.idle_officer_days: int = 0
+        self.stage_advances: int = 0
+        self.newly_unblocked_missing: int = 0
+        self.newly_blocked_missing: int = 0
+        self.newly_unblocked_enrich: int = 0
+        self.field_verif_completed: int = 0
+        self.urgent_completed: int = 0
+        self.digital_arrivals: int = 0
+        self.active_events: list[EventType] = []
+# ─────────────────────────────────────────────
+# DAY SIMULATOR
+# ─────────────────────────────────────────────
+class DaySimulator:
+    """
+    Core daily simulation engine.
+    Accepts TWO calling conventions so both env.py and tests work:
+    Convention A (tests):
+        DaySimulator(task_config=task, rng=rng, event_engine=engine)
+    Convention B (env.py legacy):
+        DaySimulator(seed=42, task_config=task, sector_registry={})
+        — in this case rng and event_engine are built internally.
+    """
+    def __init__(
+        self,
+        task_config: "TaskConfig",
+        rng: Optional[random.Random] = None,
+        event_engine: Optional[EventEngine] = None,
+        seed: Optional[int] = None,
+        sector_registry: Optional[dict] = None,
+    ) -> None:
+        self.task_config = task_config
+        self.task = task_config
+        if rng is not None:
+            self.rng = rng
+        elif seed is not None:
+            self.rng = random.Random(seed)
+        else:
+            self.rng = random.Random(task_config.seed)
+        if event_engine is not None:
+            self.event_engine = event_engine
+        else:
+            _seed = seed if seed is not None else task_config.seed
+            self.event_engine = EventEngine(
+                seed=_seed,
+                scenario_mode=task_config.scenario_mode,
+            )
+        self.sector_registry = sector_registry or {}
+        self.active_cases: list[ApplicationCase] = []
+        self.pending_effects: list[DelayedEffect] = []
+        self.case_counter: int = 0
+    def simulate_day(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        completed_cases: list[ApplicationCase],
+        priority_mode: PriorityMode,
+        officer_allocations: dict,
+    ) -> DayResult:
+        result = DayResult()
+        events = self.event_engine.get_events_for_day(day, self.task_config)
+        params = self.event_engine.apply_events(events, self.task_config)
+        result.active_events = list(params.active_events)
+        new_cases = self._spawn_arrivals(day, params, result)
+        active_cases.extend(new_cases)
+        effective_alloc = self._apply_officer_reduction(officer_allocations, params)
+        self._resolve_field_verification(day, active_cases, result)
+        self._resolve_doc_requests(day, active_cases, result)
+        newly_completed: list[ApplicationCase] = []
+        for service in self.task_config.enabled_services:
+            capacity = effective_alloc.get(service, effective_alloc.get(service.value, 0))
+            result.total_capacity_days += int(capacity)
+            service_cases = [
+                c
+                for c in active_cases
+                if c.service_type == service and not c.completed and not c.rejected
+            ]
+            if not service_cases:
+                result.idle_officer_days += int(capacity)
+                continue
+            sorted_cases = self._sort_queue(service_cases, priority_mode)
+            for case in sorted_cases:
+                if capacity <= 0:
+                    break
+                from app.state_machine import advance_case
+                advanced, final = advance_case(case, day)
+                if advanced:
+                    capacity -= 1
+                    result.stage_advances += 1
+                    if final:
+                        newly_completed.append(case)
+                        if case.is_urgent:
+                            result.urgent_completed += 1
+        if newly_completed:
+            done_ids = {c.case_id for c in newly_completed}
+            still_active = [c for c in active_cases if c.case_id not in done_ids]
+            active_cases.clear()
+            active_cases.extend(still_active)
+            completed_cases.extend(newly_completed)
+            result.new_completions = len(newly_completed)
+        for case in active_cases:
+            case.current_day = day
+            case.waiting_days += 1
+            if day > case.sla_deadline_day and not case.sla_breached:
+                case.sla_breached = True
+                result.new_sla_breaches += 1
+        return result
+    def _apply_officer_reduction(self, allocations: dict, params: Any) -> dict:
+        reduction = int(getattr(params, "officer_reduction", 0))
+        if reduction <= 0:
+            return dict(allocations)
+        effective = dict(allocations)
+        for _ in range(reduction):
+            target = max(effective, key=lambda k: effective[k], default=None)
+            if target is None or effective[target] <= 0:
+                break
+            effective[target] -= 1
+        return effective
+    def _spawn_arrivals(
+        self,
+        day: int,
+        params: Any,
+        result: DayResult,
+    ) -> list[ApplicationCase]:
+        new_cases: list[ApplicationCase] = []
+        for service in self.task_config.enabled_services:
+            base_rate = self.task_config.arrival_rate_per_day.get(
+                service,
+                self.task_config.arrival_rate_per_day.get(service.value, 0.0),
+            )
+            effective_rate = float(base_rate) * float(getattr(params, "arrival_multiplier", 1.0))
+            count = int(effective_rate)
+            if self.rng.random() < (effective_rate - count):
+                count += 1
+            for _ in range(count):
+                case = self._new_case(service, day, params)
+                new_cases.append(case)
+                if case.intake_channel == IntakeChannel.DIGITAL:
+                    result.digital_arrivals += 1
+        result.new_arrivals = len(new_cases)
+        return new_cases
+    def _new_case(self, service: ServiceType, day: int, params: Any) -> ApplicationCase:
+        self.case_counter += 1
+        profile = get_sector_profile(service)
+        sla_days = int(profile.sla_days * getattr(params, "sla_window_multiplier", 1.0))
+        sla_deadline_day = day + sla_days
+        digital_ratio = self.task_config.digital_intake_ratio
+        channel = (
+            IntakeChannel.DIGITAL
+            if self.rng.random() < digital_ratio
+            else IntakeChannel.PAPER
+        )
+        base_missing = profile.missing_docs_probability
+        override = (self.task_config.missing_docs_probability_override or {}).get(
+            service,
+            (self.task_config.missing_docs_probability_override or {}).get(service.value),
+        )
+        if override is not None:
+            base_missing = override
+        defect_rate = (
+            profile.doc_defect_rate_digital
+            if channel == IntakeChannel.DIGITAL
+            else profile.doc_defect_rate_paper
+        )
+        eff_missing = min(
+            1.0,
+            base_missing + getattr(params, "doc_defect_rate_boost", 0.0) * defect_rate,
+        )
+        has_missing = self.rng.random() < eff_missing
+        base_fv = profile.field_verification_probability
+        fv_override = (self.task_config.field_verification_probability_override or {}).get(
+            service,
+            (self.task_config.field_verification_probability_override or {}).get(service.value),
+        )
+        if fv_override is not None:
+            base_fv = fv_override
+        eff_fv = min(1.0, base_fv + getattr(params, "field_verification_boost", 0.0))
+        has_fv = self.rng.random() < eff_fv
+        field_completion_day = day + profile.field_verification_days if has_fv else None
+        from app.models import UrgencyProfile
+        urgency_profile = profile.urgency_profile
+        is_urgent = (
+            urgency_profile == UrgencyProfile.HIGH and self.rng.random() < 0.20
+        ) or (
+            urgency_profile == UrgencyProfile.MODERATE and self.rng.random() < 0.08
+        )
+        return ApplicationCase(
+            case_id=f"case-{self.case_counter:06d}",
+            service_type=service,
+            arrival_day=day,
+            current_day=day,
+            sla_deadline_day=sla_deadline_day,
+            intake_channel=channel,
+            internal_substate=(
+                InternalSubstate.BLOCKED_MISSING_DOCS
+                if has_missing
+                else InternalSubstate.PRE_SCRUTINY
+            ),
+            public_stage=StageType.SUBMISSION,
+            is_urgent=is_urgent,
+            has_missing_docs=has_missing,
+            field_verification_required=has_fv,
+            field_verification_completion_day=field_completion_day,
+        )
+    def _resolve_field_verification(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        result: DayResult,
+    ) -> None:
+        for case in active_cases:
+            if (
+                case.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+                and case.field_verification_completion_day is not None
+                and day >= case.field_verification_completion_day
+            ):
+                case.internal_substate = InternalSubstate.PRE_SCRUTINY
+                case.field_verification_completion_day = None
+                result.field_verif_completed += 1
+    def _resolve_doc_requests(
+        self,
+        day: int,
+        active_cases: list[ApplicationCase],
+        result: DayResult,
+    ) -> None:
+        for case in active_cases:
+            if (
+                case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+                and case.doc_resolution_day is not None
+                and day >= case.doc_resolution_day
+            ):
+                case.internal_substate = InternalSubstate.PRE_SCRUTINY
+                case.doc_resolution_day = None
+                result.newly_unblocked_missing += 1
+    def _sort_queue(
+        self,
+        cases: list[ApplicationCase],
+        priority_mode: PriorityMode,
+    ) -> list[ApplicationCase]:
+        eligible = [c for c in cases if can_advance(c)]
+        if priority_mode == PriorityMode.URGENT_FIRST:
+            return sorted(
+                eligible,
+                key=lambda c: (not c.is_urgent, -c.sla_risk, c.arrival_day),
+            )
+        if priority_mode == PriorityMode.OLDEST_FIRST:
+            return sorted(eligible, key=lambda c: c.arrival_day)
+        if priority_mode == PriorityMode.BACKLOG_CLEARANCE:
+            return sorted(
+                eligible,
+                key=lambda c: (-c.sla_risk, not c.is_urgent, c.arrival_day),
+            )
+        return sorted(
+            eligible,
+            key=lambda c: (
+                -c.sla_risk if c.sla_risk > 0.8 else 0,
+                not c.is_urgent,
+                c.arrival_day,
+            ),
+        )
+    def build_queue_snapshot(
+        self,
+        service: ServiceType,
+        active_cases: list[ApplicationCase],
+        day: int,
+    ) -> QueueSnapshot:
+        cases = [
+            c
+            for c in active_cases
+            if c.service_type == service and not c.completed and not c.rejected
+        ]
+        stage_counts = {s.value: 0 for s in StageType}
+        for c in cases:
+            stage_counts[c.public_stage.value] = stage_counts.get(c.public_stage.value, 0) + 1
+        oldest_age = max((c.waiting_days for c in cases), default=0)
+        avg_wait = sum(c.waiting_days for c in cases) / len(cases) if cases else 0.0
+        sla_risk = sum(c.sla_risk for c in cases) / len(cases) if cases else 0.0
+        return QueueSnapshot(
+            service_type=service,
+            public_stage_counts=stage_counts,
+            total_pending=len(cases),
+            total_completed_today=0,
+            total_sla_breached=sum(1 for c in cases if c.sla_breached),
+            urgent_pending=sum(1 for c in cases if c.is_urgent),
+            blocked_missing_docs=sum(
+                1
+                for c in cases
+                if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            ),
+            field_verification_pending=sum(
+                1
+                for c in cases
+                if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+            ),
+            oldest_case_age_days=oldest_age,
+            avg_waiting_days=round(avg_wait, 2),
+            current_sla_risk=round(min(1.0, sla_risk), 3),
+        )
+# ─────────────────────────────────────────────
+# HIGH-LEVEL SIMULATION ORCHESTRATION
+# ─────────────────────────────────────────────
+class SimulationAgentMode(str, Enum):
+    BASELINE_POLICY = "baseline_policy"
+    LLM_INFERENCE = "llm_inference"
+    TRAINED_RL = "trained_rl"
+@dataclass
+class SimulationRun:
+    task_id: str
+    agent_mode: SimulationAgentMode
+    seed: int
+    total_reward: float
+    score: float
+    grader_name: str
+    summary: dict[str, Any]
+    trace: list[dict[str, Any]]
+def _dedupe(values: list[str | None]) -> list[str]:
+    out: list[str] = []
+    for value in values:
+        if value is None:
+            continue
+        v = str(value).strip()
+        if v and v not in out:
+            out.append(v)
+    return out
+def _env_csv_list(name: str) -> list[str]:
+    raw = os.getenv(name, "").strip()
+    if not raw:
+        return []
+    return [x.strip() for x in raw.split(",") if x.strip()]
+def _extract_json_object(text: str) -> dict[str, Any] | None:
+    text = (text or "").strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+def _enum_service(value: Any) -> ServiceType | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, ServiceType):
+        return value
+    try:
+        return ServiceType(str(value))
+    except Exception:
+        return None
+def _enum_priority(value: Any) -> PriorityMode | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, PriorityMode):
+        return value
+    try:
+        return PriorityMode(str(value))
+    except Exception:
+        return None
+def _action_model_from_kwargs(action_type: ActionType, **kwargs: Any) -> ActionModel:
+    service = _enum_service(kwargs.get("service") or kwargs.get("service_target"))
+    target_service = _enum_service(kwargs.get("target_service"))
+    escalation_target = _enum_service(kwargs.get("escalation_target"))
+    priority_mode = _enum_priority(kwargs.get("priority_mode"))
+    officer_delta = kwargs.get("officer_delta")
+    case_id = kwargs.get("case_id")
+    candidates: list[dict[str, Any]] = []
+    if action_type == ActionType.ADVANCE_TIME:
+        candidates.append({"action_type": action_type})
+    elif action_type == ActionType.SET_PRIORITY_MODE:
+        candidates.extend(
+            [
+                {"action_type": action_type, "priority_mode": priority_mode},
+            ]
+        )
+    elif action_type == ActionType.ASSIGN_CAPACITY:
+        if service is not None:
+            delta = max(1, int(officer_delta or 1))
+            candidates.extend(
+                [
+                    {"action_type": action_type, "service": service, "officer_delta": delta},
+                    {"action_type": action_type, "service_target": service, "officer_delta": delta},
+                    {
+                        "action_type": action_type,
+                        "capacity_assignment": {service.value: delta},
+                    },
+                ]
+            )
+    elif action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
+        if service is not None:
+            candidates.extend(
+                [
+                    {"action_type": action_type, "service": service},
+                    {"action_type": action_type, "service_target": service},
+                ]
+            )
+    elif action_type == ActionType.ESCALATE_SERVICE:
+        svc = escalation_target or service
+        candidates.extend(
+            [
+                {"action_type": action_type, "service": svc, "case_id": case_id},
+                {"action_type": action_type, "service_target": svc, "case_id": case_id},
+                {"action_type": action_type, "escalation_target": svc, "case_id": case_id},
+            ]
+        )
+    elif action_type == ActionType.REALLOCATE_OFFICERS:
+        if service is not None and target_service is not None:
+            delta = max(1, int(officer_delta or 1))
+            candidates.extend(
+                [
+                    {
+                        "action_type": action_type,
+                        "service": service,
+                        "target_service": target_service,
+                        "officer_delta": delta,
+                    },
+                    {
+                        "action_type": action_type,
+                        "reallocation_delta": {
+                            service.value: -delta,
+                            target_service.value: delta,
+                        },
+                    },
+                ]
+            )
+    for candidate in candidates:
+        try:
+            return ActionModel(**candidate)
+        except Exception:
+            continue
+    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
+    if not payload:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    raw_action_type = payload.get("action_type") or payload.get("actionType")
+    try:
+        action_type = ActionType(str(raw_action_type))
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    service = payload.get("service") or payload.get("service_target") or payload.get("serviceTarget")
+    target_service = payload.get("target_service") or payload.get("targetService")
+    escalation_target = payload.get("escalation_target") or payload.get("escalationTarget")
+    priority_mode = payload.get("priority_mode") or payload.get("priorityMode")
+    officer_delta = payload.get("officer_delta") or payload.get("officerDelta")
+    case_id = payload.get("case_id") or payload.get("caseId")
+    if action_type == ActionType.ASSIGN_CAPACITY and not service:
+        assignment = payload.get("capacity_assignment") or {}
+        if isinstance(assignment, dict) and assignment:
+            service, officer_delta = next(iter(assignment.items()))
+    if action_type == ActionType.REALLOCATE_OFFICERS and (not service or not target_service):
+        delta_map = payload.get("reallocation_delta") or {}
+        if isinstance(delta_map, dict) and len(delta_map) >= 2:
+            negatives = [k for k, v in delta_map.items() if int(v) < 0]
+            positives = [k for k, v in delta_map.items() if int(v) > 0]
+            if negatives and positives:
+                service = negatives[0]
+                target_service = positives[0]
+                officer_delta = abs(int(delta_map[service]))
+    return _action_model_from_kwargs(
+        action_type,
+        service=service,
+        target_service=target_service,
+        escalation_target=escalation_target,
+        priority_mode=priority_mode,
+        officer_delta=officer_delta,
+        case_id=case_id,
+    )
+def _recommended_min_steps(task_id: str) -> int:
+    if task_id == "cross_department_hard":
+        return 70
+    if task_id == "mixed_urgency_medium":
+        return 60
+    return 40
+def _queue_snapshot_iter(obs: ObservationModel) -> list[Any]:
+    raw = getattr(obs, "queue_snapshots", [])
+    if isinstance(raw, dict):
+        return list(raw.values())
+    if isinstance(raw, list):
+        return list(raw)
+    try:
+        return list(raw)
+    except Exception:
+        return []
+def _queue_service(q: Any) -> ServiceType | None:
+    return _enum_service(getattr(q, "service", None) or getattr(q, "service_type", None))
+def _queue_active_cases(q: Any) -> int:
+    return int(getattr(q, "active_cases", getattr(q, "total_pending", 0)) or 0)
+def _queue_missing_docs(q: Any) -> int:
+    return int(getattr(q, "missing_docs_cases", getattr(q, "blocked_missing_docs", 0)) or 0)
+def _queue_urgent_cases(q: Any) -> int:
+    return int(getattr(q, "urgent_cases", getattr(q, "urgent_pending", 0)) or 0)
+def _queue_breached_cases(q: Any) -> int:
+    return int(getattr(q, "breached_cases", getattr(q, "total_sla_breached", 0)) or 0)
+def _queue_avg_age(q: Any) -> float:
+    if hasattr(q, "avg_age_days"):
+        return float(getattr(q, "avg_age_days") or 0.0)
+    if hasattr(q, "oldest_case_age_days"):
+        return float(getattr(q, "oldest_case_age_days") or 0.0)
+    return float(getattr(q, "avg_waiting_days", 0.0) or 0.0)
+def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for q in _queue_snapshot_iter(obs):
+        service = _queue_service(q)
+        if service is None:
+            continue
+        rows.append(
+            {
+                "service": service.value,
+                "active_cases": _queue_active_cases(q),
+                "missing_docs_cases": _queue_missing_docs(q),
+                "urgent_cases": _queue_urgent_cases(q),
+                "breached_cases": _queue_breached_cases(q),
+                "avg_age_days": _queue_avg_age(q),
+            }
+        )
+    return rows
+def _pool_allocations(obs: ObservationModel) -> dict[Any, Any]:
+    pool = getattr(obs, "officer_pool", None)
+    if pool is None:
+        return {}
+    return getattr(pool, "allocations", getattr(pool, "allocated", {})) or {}
+def _reserve_officers(obs: ObservationModel) -> int:
+    pool = getattr(obs, "officer_pool", None)
+    if pool is None:
+        return 0
+    for name in ("reserve_officers", "idle_officers", "available_officers"):
+        if hasattr(pool, name):
+            try:
+                return int(getattr(pool, name) or 0)
+            except Exception:
+                pass
+    return 0
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    allocs = _pool_allocations(obs)
+    raw = allocs.get(service)
+    if raw is None:
+        raw = allocs.get(service.value, 0)
+    return int(raw or 0)
+def _top_backlog_service(
+    obs: ObservationModel,
+    *,
+    exclude: ServiceType | None = None,
+) -> ServiceType | None:
+    ranked: list[Any] = []
+    for q in _queue_snapshot_iter(obs):
+        service = _queue_service(q)
+        if service is None or service == exclude:
+            continue
+        ranked.append(q)
+    if not ranked:
+        return None
+    ranked.sort(
+        key=lambda q: (
+            _queue_active_cases(q) + (2 * _queue_breached_cases(q)) + _queue_urgent_cases(q),
+            _queue_avg_age(q),
+        ),
+        reverse=True,
+    )
+    return _queue_service(ranked[0])
+def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
+    candidates = [q for q in _queue_snapshot_iter(obs) if _queue_missing_docs(q) > 0]
+    if not candidates:
+        return None
+    candidates.sort(key=lambda q: (_queue_missing_docs(q), _queue_active_cases(q)), reverse=True)
+    return _queue_service(candidates[0])
+def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
+    services = [s for s in (_queue_service(q) for q in _queue_snapshot_iter(obs)) if s is not None]
+    services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
+    for service in services:
+        if _alloc_for(obs, service) > 0:
+            return service
+    return None
+def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
+    has_reserve = _reserve_officers(obs) > 0
+    snapshots = _queue_snapshot_iter(obs)
+    has_missing = any(_queue_missing_docs(q) > 0 for q in snapshots)
+    has_backlog = any(_queue_active_cases(q) > 0 for q in snapshots)
+    has_budget = int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0
+    staffed_services = [q for q in snapshots if (_queue_service(q) is not None and _alloc_for(obs, _queue_service(q)) > 0)]
+    can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
+    return {
+        ActionType.SET_PRIORITY_MODE: True,
+        ActionType.ADVANCE_TIME: True,
+        ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
+        ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
+        ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
+        ActionType.REALLOCATE_OFFICERS: can_reallocate,
+    }
+def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
+    mask = _compute_action_mask(obs)
+    allowed = [k.value for k, ok in mask.items() if ok]
+    blocked = [k.value for k, ok in mask.items() if not ok]
+    return allowed, blocked
+def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
+    top_backlog = _top_backlog_service(obs)
+    top_missing = _service_with_missing_docs(obs)
+    if _reserve_officers(obs) > 0 and top_backlog is not None:
+        return (
+            _action_model_from_kwargs(
+                ActionType.ASSIGN_CAPACITY,
+                service=top_backlog,
+                officer_delta=1,
+            ),
+            "high-impact: assign reserve capacity to top backlog service",
+        )
+    if top_missing is not None:
+        return (
+            _action_model_from_kwargs(
+                ActionType.REQUEST_MISSING_DOCUMENTS,
+                service=top_missing,
+            ),
+            "high-impact: clear missing-document bottleneck",
+        )
+    if int(getattr(obs, "escalation_budget_remaining", 0) or 0) > 0:
+        hot = sorted(
+            _queue_snapshot_iter(obs),
+            key=lambda q: (_queue_breached_cases(q), _queue_active_cases(q), _queue_urgent_cases(q)),
+            reverse=True,
+        )
+        if hot and (_queue_breached_cases(hot[0]) > 0 or _queue_active_cases(hot[0]) > 0):
+            service = _queue_service(hot[0])
+            if service is not None:
+                return (
+                    _action_model_from_kwargs(
+                        ActionType.ESCALATE_SERVICE,
+                        service=service,
+                    ),
+                    "high-impact: escalate highest SLA-risk service",
+                )
+    source = _service_with_officers(obs)
+    if source is not None and _alloc_for(obs, source) > 0:
+        target = _top_backlog_service(obs, exclude=source)
+        if target is not None and target != source:
+            return (
+                _action_model_from_kwargs(
+                    ActionType.REALLOCATE_OFFICERS,
+                    service=source,
+                    target_service=target,
+                    officer_delta=1,
+                ),
+                "high-impact: reallocate one officer toward highest backlog",
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
+def _repair_action_for_observation(
+    action: ActionModel,
+    obs: ObservationModel,
+) -> tuple[ActionModel, str | None]:
+    mask = _compute_action_mask(obs)
+    at = action.action_type
+    if not bool(mask.get(at, True)):
+        fallback, why = _best_high_impact_action(obs)
+        return fallback, f"masked {at.value}; {why}"
+    if at == ActionType.ADVANCE_TIME:
+        return action, None
+    if at == ActionType.SET_PRIORITY_MODE:
+        if getattr(action, "priority_mode", None) is None:
+            return (
+                _action_model_from_kwargs(
+                    ActionType.SET_PRIORITY_MODE,
+                    priority_mode=PriorityMode.BACKLOG_CLEARANCE,
+                ),
+                "missing priority_mode, defaulted to backlog_clearance",
+            )
+        return action, None
+    if at == ActionType.ASSIGN_CAPACITY:
+        reserve = _reserve_officers(obs)
+        if reserve <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"reserve officers exhausted; {why}"
+        service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _top_backlog_service(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no service available for assign_capacity; {why}"
+        delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
+        delta = min(delta, reserve)
+        repaired = _action_model_from_kwargs(
+            ActionType.ASSIGN_CAPACITY,
+            service=service,
+            officer_delta=delta,
+        )
+        return repaired, "repaired assign_capacity payload"
+    if at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        service = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_missing_docs(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no missing-doc queue available; {why}"
+        repaired = _action_model_from_kwargs(
+            ActionType.REQUEST_MISSING_DOCUMENTS,
+            service=service,
+        )
+        return repaired, "repaired request_missing_documents payload"
+    if at == ActionType.ESCALATE_SERVICE:
+        if int(getattr(obs, "escalation_budget_remaining", 0) or 0) <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"escalation budget exhausted; {why}"
+        service = (
+            _enum_service(getattr(action, "service", None))
+            or _enum_service(getattr(action, "service_target", None))
+            or _enum_service(getattr(action, "escalation_target", None))
+            or _top_backlog_service(obs)
+        )
+        case_id = getattr(action, "case_id", None)
+        if service is None and case_id is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no escalation target available; {why}"
+        repaired = _action_model_from_kwargs(
+            ActionType.ESCALATE_SERVICE,
+            service=service,
+            case_id=case_id,
+        )
+        return repaired, "repaired escalate_service payload"
+    if at == ActionType.REALLOCATE_OFFICERS:
+        source = _enum_service(getattr(action, "service", None) or getattr(action, "service_target", None)) or _service_with_officers(obs)
+        if source is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no staffed source service; {why}"
+        source_alloc = _alloc_for(obs, source)
+        if source_alloc <= 0:
+            source = _service_with_officers(obs)
+            source_alloc = _alloc_for(obs, source) if source is not None else 0
+        if source is None or source_alloc <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"insufficient source officers; {why}"
+        target = _enum_service(getattr(action, "target_service", None))
+        if target is None or target == source:
+            target = _top_backlog_service(obs, exclude=source)
+        if target is None or target == source:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"missing distinct target_service; {why}"
+        delta = max(1, int(getattr(action, "officer_delta", 1) or 1))
+        delta = min(delta, source_alloc)
+        repaired = _action_model_from_kwargs(
+            ActionType.REALLOCATE_OFFICERS,
+            service=source,
+            target_service=target,
+            officer_delta=delta,
+        )
+        return repaired, "repaired reallocate_officers payload"
+    return action, None
+def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
+    if agent_mode == SimulationAgentMode.BASELINE_POLICY:
+        return "baseline_policy"
+    if agent_mode == SimulationAgentMode.TRAINED_RL:
+        return "trained_rl"
+    return os.getenv("MODEL_NAME", "llm_inference")
+def _log_step_line(step_row: dict[str, Any]) -> str:
+    done = "true" if bool(step_row.get("done")) else "false"
+    error = step_row.get("last_action_error") or "null"
+    action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
+    source = step_row.get("decision_source") or "unknown"
+    model = step_row.get("model_used") or "null"
+    repair = step_row.get("repair_note") or "null"
+    switch_note = step_row.get("switch_note") or "null"
+    return (
+        f"[STEP] step={step_row.get('step', 0)} action={action} "
+        f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
+        f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
+    )
+def _resolve_model_path_or_raise(model_path: str) -> str:
+    p = Path(model_path).expanduser()
+    if not p.is_absolute():
+        p = (Path.cwd() / p).resolve()
+    if p.is_dir():
+        candidates = [
+            p / "best_model.zip",
+            p / "model.zip",
+            p / "checkpoint.zip",
+        ]
+        zip_files = sorted(p.glob("*.zip"))
+        candidates.extend(zip_files)
+        for candidate in candidates:
+            if candidate.exists():
+                return str(candidate)
+    if p.exists():
+        return str(p)
+    raise FileNotFoundError(f"Model path not found: {model_path}")
+def _load_model_cached_or_raise(model_abs: str, model_type: Literal["maskable", "recurrent"]) -> Any:
+    key = (model_abs, model_type)
+    if key in _MODEL_CACHE:
+        return _MODEL_CACHE[key]
+    if model_type == "recurrent":
+        from sb3_contrib import RecurrentPPO
+        model = RecurrentPPO.load(model_abs)
+    else:
+        try:
+            from sb3_contrib import MaskablePPO
+            model = MaskablePPO.load(model_abs)
+        except Exception:
+            from stable_baselines3 import PPO
+            model = PPO.load(model_abs)
+    _MODEL_CACHE[key] = model
+    return model
+def _safe_invalid_action_count(final_state: Any) -> int:
+    if hasattr(final_state, "total_invalid_actions"):
+        return int(getattr(final_state, "total_invalid_actions") or 0)
+    metrics = getattr(final_state, "metrics", None)
+    if metrics is not None and hasattr(metrics, "total_invalid_actions"):
+        return int(getattr(metrics, "total_invalid_actions") or 0)
+    return 0
+class LiveSimulationSession:
+    def __init__(
+        self,
+        *,
+        task_id: str,
+        agent_mode: SimulationAgentMode,
+        max_steps: int,
+        seed: int | None,
+        policy_name: str | None = None,
+        model_path: str | None = None,
+        model_type: Literal["maskable", "recurrent"] = "maskable",
+    ) -> None:
+        self.task_id = task_id
+        self.agent_mode = agent_mode
+        recommended = _recommended_min_steps(task_id)
+        self.max_steps = max(int(max_steps), int(recommended)) if agent_mode == SimulationAgentMode.LLM_INFERENCE else int(max_steps)
+        self.seed = int(seed if seed is not None else random.randint(1, 999999))
+        self.policy_name = policy_name or "backlog_clearance"
+        self.model_path = model_path
+        self.model_type = model_type
+        self.trace: list[dict[str, Any]] = []
+        self.total_reward = 0.0
+        self.step_idx = 0
+        self.done = False
+        self.summary: dict[str, Any] | None = None
+        self.score: float | None = None
+        self.grader_name: str | None = None
+        self.env: Any = None
+        self.obs: ObservationModel | Any = None
+        self.policy: Any = None
+        self.rl_env: Any = None
+        self.rl_model: Any = None
+        self.rl_lstm_state: Any = None
+        self.rl_episode_start: Any = None
+        self.llm_runtimes: list[dict[str, Any]] = []
+        self.llm_route: list[str] = []
+        self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
+        self.consecutive_failure_steps = 0
+        self.recovery_steps_remaining = 0
+        self.auto_switch_count = 0
+        self.last_switch_reason: str | None = None
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            self._init_trained()
+        else:
+            self._init_core()
+    def start_line(self) -> dict[str, Any]:
+        return {
+            "log": (
+                f"[START] task={self.task_id} env=gov-workflow-openenv "
+                f"model={_model_label_for_mode(self.agent_mode)}"
+            ),
+            "observation": self.obs
+        }
+    def _init_core(self) -> None:
+        from app.baselines import POLICIES, backlog_clearance_policy
+        from app.env import GovWorkflowEnv
+        self.env = GovWorkflowEnv(task_id=self.task_id)
+        self.obs, _ = self.env.reset(seed=self.seed)
+        if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
+            self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
+        else:
+            self.policy = self._llm_action_with_meta
+            self._init_llm_runtimes()
+    def _init_llm_runtimes(self) -> None:
+        openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
+        nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
+        openai_keys = _dedupe(
+            [
+                os.getenv("HF_TOKEN"),
+                os.getenv("OPENAI_API_KEY"),
+                os.getenv("API_KEY"),
+            ]
+        )
+        nvidia_keys = _dedupe(
+            [
+                os.getenv("NVIDIA_API_KEY"),
+                os.getenv("NVIDIA_API_KEY_2"),
+            ]
+        )
+        openai_models = _dedupe(
+            [
+                os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
+                *_env_csv_list("MODEL_FALLBACKS"),
+            ]
+        )
+        nvidia_models = _dedupe(
+            [
+                os.getenv("NVIDIA_MODEL"),
+                *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
+                *LEGACY_NVIDIA_MODEL_POOL,
+            ]
+        )
+        runtimes: list[dict[str, Any]] = []
+        if openai_keys and openai_models:
+            clients: list[tuple[OpenAI, str]] = []
+            for idx, key in enumerate(openai_keys, start=1):
+                try:
+                    clients.append(
+                        (
+                            OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0),
+                            f"openai_key_{idx}",
+                        )
+                    )
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "openai-compatible",
+                        "base_url": openai_base,
+                        "clients": clients,
+                        "models": openai_models,
+                    }
+                )
+        if nvidia_keys and nvidia_models:
+            clients = []
+            for idx, key in enumerate(nvidia_keys, start=1):
+                try:
+                    clients.append(
+                        (
+                            OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0),
+                            f"nvidia_key_{idx}",
+                        )
+                    )
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "nvidia",
+                        "base_url": nvidia_base,
+                        "clients": clients,
+                        "models": nvidia_models,
+                    }
+                )
+        self.llm_runtimes = runtimes
+        self.llm_model_stats = {}
+        for runtime in runtimes:
+            provider = str(runtime.get("provider"))
+            for model in runtime.get("models", []):
+                self.llm_model_stats[(provider, str(model))] = {
+                    "calls": 0,
+                    "invalid": 0,
+                    "repaired": 0,
+                    "failures": 0,
+                    "cooldown_until_step": 0,
+                }
+        openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
+        nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
+        openai_route = (
+            f"openai-compatible ({len(openai_runtime['clients'])} keys, {len(openai_runtime['models'])} models)"
+            if openai_runtime is not None
+            else "openai-compatible (unavailable: missing API key/model)"
+        )
+        nvidia_route = (
+            f"nvidia ({len(nvidia_runtime['clients'])} keys, {len(nvidia_runtime['models'])} models)"
+            if nvidia_runtime is not None
+            else "nvidia (unavailable: missing API key/model)"
+        )
+        self.llm_route = [
+            openai_route,
+            nvidia_route,
+            "adaptive ranking: prefer models with lower invalid/repaired rates",
+            "heuristic fallback (backlog_clearance_policy)",
+        ]
+    def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
+        def _score(model_name: str) -> tuple[float, int]:
+            stat = self.llm_model_stats.get((provider, model_name), {})
+            calls = max(1, int(stat.get("calls", 0)))
+            invalid_rate = float(stat.get("invalid", 0)) / calls
+            repaired_rate = float(stat.get("repaired", 0)) / calls
+            fail_rate = float(stat.get("failures", 0)) / calls
+            cooldown = int(stat.get("cooldown_until_step", 0))
+            cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
+            return (
+                invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty,
+                -calls,
+            )
+        return sorted([str(m) for m in models], key=_score)
+    def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
+        if self.recovery_steps_remaining > 0:
+            self.recovery_steps_remaining -= 1
+            action, why = _best_high_impact_action(obs)
+            return action, {
+                "decision_source": "auto_recovery_policy",
+                "provider": "heuristic",
+                "model_used": "backlog_clearance_policy",
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+                "repair_note": why,
+            }
+        attempts = 0
+        last_error = ""
+        allowed_actions, blocked_actions = _masked_action_type_hints(obs)
+        schema_hint = {
+            "required_fields": {
+                "set_priority_mode": ["action_type", "priority_mode"],
+                "assign_capacity": ["action_type", "service", "officer_delta"],
+                "request_missing_documents": ["action_type", "service"],
+                "escalate_service": ["action_type", "service"],
+                "advance_time": ["action_type"],
+                "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
+            },
+            "allowed_priority_mode": [m.value for m in PriorityMode],
+            "allowed_services": [s.value for s in ServiceType],
+        }
+        system_prompt = (
+            "You are controlling a government workflow simulator. "
+            "Return exactly one JSON object only. No markdown. No explanation. "
+            "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
+            "escalate_service, advance_time, reallocate_officers. "
+            "Rules: "
+            "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
+            "2) assign_capacity requires service + officer_delta>0. "
+            "3) request_missing_documents requires service with missing_docs_cases>0. "
+            "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
+            "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
+            "Use lowercase enum values."
+        )
+        user_prompt = (
+            "Observation:\n"
+            f"{obs.model_dump_json() if hasattr(obs, 'model_dump_json') else json.dumps(getattr(obs, 'dict', lambda: {})())}\n"
+            f"Allowed action types now: {allowed_actions}\n"
+            f"Blocked action types now: {blocked_actions}\n"
+            f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
+            f"Last action validity: {getattr(obs, 'last_action_valid', True)}\n"
+            f"Last action message: {getattr(obs, 'last_action_message', '')}\n"
+            "Return action JSON."
+        )
+        for runtime in self.llm_runtimes:
+            provider = str(runtime["provider"])
+            ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
+            for client, key_label in runtime["clients"]:
+                for model in ranked_models:
+                    attempts += 1
+                    stat_key = (provider, model)
+                    try:
+                        out = client.chat.completions.create(
+                            model=model,
+                            messages=[
+                                {"role": "system", "content": system_prompt},
+                                {"role": "user", "content": user_prompt},
+                            ],
+                            temperature=0.0,
+                            max_tokens=200,
+                            stream=False,
+                        )
+                        content = (out.choices[0].message.content or "").strip()
+                        action = _coerce_action(_extract_json_object(content))
+                        if stat_key in self.llm_model_stats:
+                            self.llm_model_stats[stat_key]["calls"] += 1
+                        return action, {
+                            "decision_source": "llm",
+                            "provider": provider,
+                            "model_used": model,
+                            "llm_attempts": attempts,
+                            "llm_error": None,
+                            "llm_key_label": key_label,
+                        }
+                    except Exception as exc:
+                        last_error = str(exc)
+                        stat = self.llm_model_stats.get(stat_key)
+                        if stat is not None:
+                            stat["calls"] += 1
+                            stat["failures"] += 1
+                            if stat["failures"] >= 2:
+                                stat["cooldown_until_step"] = self.step_idx + 5
+                        continue
+        action, why = _best_high_impact_action(obs)
+        if not self.llm_runtimes:
+            last_error = "No LLM credentials configured."
+        return action, {
+            "decision_source": "heuristic_fallback",
+            "provider": "heuristic",
+            "model_used": "backlog_clearance_policy",
+            "llm_attempts": attempts,
+            "llm_error": last_error or None,
+            "llm_key_label": None,
+            "repair_note": why,
+        }
+    def _init_trained(self) -> None:
+        import numpy as np
+        from rl.gov_workflow_env import GovWorkflowGymEnv
+        if not self.model_path:
+            raise ValueError("model_path is required for trained_rl simulation.")
+        model_abs = _resolve_model_path_or_raise(self.model_path)
+        self.rl_model = _load_model_cached_or_raise(model_abs, self.model_type)
+        self.rl_env = GovWorkflowGymEnv(
+            task_id=self.task_id,
+            seed=self.seed,
+            hard_action_mask=True,
+        )
+        self.obs, _ = self.rl_env.reset(seed=self.seed)
+        self.rl_lstm_state = None
+        self.rl_episode_start = np.array([True], dtype=bool)
+    def step_once(self) -> tuple[dict[str, Any], str, bool]:
+        if self.done:
+            raise RuntimeError("Simulation already finished.")
+        self.step_idx += 1
+        row = self._step_trained() if self.agent_mode == SimulationAgentMode.TRAINED_RL else self._step_core()
+        self.trace.append(row)
+        self.total_reward += float(row["reward"])
+        step_log = _log_step_line(row)
+        if row["done"] or self.step_idx >= self.max_steps:
+            self._finalize()
+            row["done"] = True
+            return row, step_log, True
+        return row, step_log, False
+    def end_line(self) -> str:
+        if self.score is None:
+            return "[END] success=false steps=0 score=0.00 rewards="
+        rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
+        success = "true" if self.score >= 0.5 else "false"
+        return f"[END] success={success} steps={len(self.trace)} score={self.score:.2f} rewards={rewards}"
+    def step_line(self, action: dict | ActionModel) -> dict[str, Any]:
+        """Test wrapper for executing an action and returning observation + reward."""
+        if isinstance(action, dict):
+            action = _coerce_action(action)
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        return {"observation": self.obs, "reward": reward}
+    def snapshot(self) -> dict[str, Any]:
+        return {
+            "task_id": self.task_id,
+            "agent_mode": self.agent_mode.value,
+            "seed": self.seed,
+            "max_steps": self.max_steps,
+            "step_idx": self.step_idx,
+            "done": self.done,
+            "total_reward": float(self.total_reward),
+            "score": self.score,
+            "grader_name": self.grader_name,
+            "summary": self.summary,
+            "trace_len": len(self.trace),
+            "llm_route": list(self.llm_route),
+        }
+    def close(self) -> None:
+        try:
+            if self.env is not None and hasattr(self.env, "close"):
+                self.env.close()
+        except Exception:
+            pass
+        try:
+            if self.rl_env is not None and hasattr(self.rl_env, "close"):
+                self.rl_env.close()
+        except Exception:
+            pass
+    def _step_core(self) -> dict[str, Any]:
+        if self.env is None:
+            raise RuntimeError("Core simulation env not initialized.")
+        if self.agent_mode == SimulationAgentMode.BASELINE_POLICY:
+            action = self.policy(self.obs)
+            meta = {
+                "decision_source": "baseline_policy",
+                "provider": "local_policy",
+                "model_used": self.policy_name,
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+            }
+        else:
+            raw_decision = self.policy(self.obs)
+            if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
+                action, meta = raw_decision
+            else:
+                action, meta = raw_decision, {}
+            if not isinstance(meta, dict):
+                meta = {}
+            if not isinstance(action, ActionModel):
+                if isinstance(action, dict):
+                    action = _coerce_action(action)
+                else:
+                    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+                    meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
+            allowed_mask = _compute_action_mask(self.obs)
+            if not bool(allowed_mask.get(action.action_type, True)):
+                masked_fallback, why = _best_high_impact_action(self.obs)
+                action = masked_fallback
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = f"action masked at runtime; {why}"
+            repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
+            if repair_note:
+                action = repaired_action
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = repair_note
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        done = bool(terminated or truncated)
+        last_action_error = getattr(info, "last_action_error", None)
+        if last_action_error is None:
+            last_action_error = getattr(info, "action_explanation", None)
+        row = {
+            "step": self.step_idx,
+            "day": self.obs.day,
+            "action_type": action.action_type.value,
+            "action_payload": action.model_dump(exclude_none=True, mode="json"),
+            "reward": float(reward),
+            "done": done,
+            "backlog": getattr(self.obs, "total_backlog", 0),
+            "completed": getattr(self.obs, "total_completed", 0),
+            "sla_breaches": getattr(self.obs, "total_sla_breaches", 0),
+            "fairness_gap": float(
+                getattr(self.obs, "fairness_gap", getattr(self.obs, "fairness_index", 0.0)) or 0.0
+            ),
+            "escalation_budget_remaining": getattr(self.obs, "escalation_budget_remaining", 0),
+            "invalid_action": bool(getattr(info, "invalid_action", False)),
+            "last_action_error": last_action_error,
+            "queue_rows": _queue_rows(self.obs),
+        }
+        row.update(meta)
+        if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
+            is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+            is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
+            model_used = str(row.get("model_used") or "")
+            provider = str(row.get("provider") or "")
+            stat_key = (provider, model_used)
+            stat = self.llm_model_stats.get(stat_key)
+            if stat is not None:
+                if is_repaired:
+                    stat["repaired"] += 1
+                if is_invalid:
+                    stat["invalid"] += 1
+                    stat["failures"] += 1
+                else:
+                    stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
+            is_failure_pattern = is_invalid or is_repaired
+            self.consecutive_failure_steps = self.consecutive_failure_steps + 1 if is_failure_pattern else 0
+            if self.consecutive_failure_steps >= 4:
+                if stat is not None:
+                    stat["cooldown_until_step"] = self.step_idx + 6
+                self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
+                self.auto_switch_count += 1
+                self.last_switch_reason = "repeated invalid/repaired pattern detected"
+                row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
+                self.consecutive_failure_steps = 0
+        return row
+    def _step_trained(self) -> dict[str, Any]:
+        import numpy as np
+        masks = self.rl_env.action_masks()
+        if self.model_type == "recurrent":
+            action, self.rl_lstm_state = self.rl_model.predict(
+                self.obs,
+                state=self.rl_lstm_state,
+                episode_start=self.rl_episode_start,
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+            if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
+                valid = np.flatnonzero(masks)
+                action_idx = int(valid[0]) if valid.size > 0 else 18
+        else:
+            from sb3_contrib.common.maskable.utils import get_action_masks
+            action, _ = self.rl_model.predict(
+                self.obs,
+                action_masks=get_action_masks(self.rl_env),
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+        self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
+        done = bool(terminated or truncated)
+        if self.model_type == "recurrent":
+            self.rl_episode_start = np.array([done], dtype=bool)
+        core_env = self.rl_env.core_env
+        core_obs = core_env._build_observation()
+        action_model, action_label = _decode_action_idx(action_idx)
+        return {
+            "step": self.step_idx,
+            "day": core_obs.day,
+            "action_type": action_label,
+            "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
+            "action_index": action_idx,
+            "reward": float(reward),
+            "done": done,
+            "backlog": core_obs.total_backlog,
+            "completed": core_obs.total_completed,
+            "sla_breaches": core_obs.total_sla_breaches,
+            "fairness_gap": float(
+                getattr(core_obs, "fairness_gap", getattr(core_obs, "fairness_index", 0.0)) or 0.0
+            ),
+            "escalation_budget_remaining": core_obs.escalation_budget_remaining,
+            "invalid_action": bool(info.get("invalid_action", False)),
+            "last_action_error": info.get("last_action_error") or info.get("action_explanation"),
+            "queue_rows": _queue_rows(core_obs),
+            "decision_source": "trained_rl",
+            "provider": "rl",
+            "model_used": self.model_path or "trained_rl",
+            "llm_attempts": 0,
+            "llm_error": None,
+            "llm_key_label": None,
+        }
+    def _finalize(self) -> None:
+        if self.done:
+            return
+        self.done = True
+        from app.graders import grade_episode
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            final_state = self.rl_env.core_env.state()
+        else:
+            final_state = self.env.state()
+        gr = grade_episode(final_state)
+        self.score = float(gr.score)
+        self.grader_name = gr.grader_name
+        llm_steps = sum(1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"})
+        fallback_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
+        )
+        repaired_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+        )
+        total_steps = max(1, len(self.trace))
+        invalid_actions = _safe_invalid_action_count(final_state)
+        invalid_rate = float(invalid_actions) / float(total_steps)
+        repaired_rate = float(repaired_steps) / float(total_steps)
+        ranked_models: list[dict[str, Any]] = []
+        if self.llm_model_stats:
+            for (provider, model), stat in self.llm_model_stats.items():
+                calls = int(stat.get("calls", 0))
+                if calls <= 0:
+                    continue
+                ranked_models.append(
+                    {
+                        "provider": provider,
+                        "model": model,
+                        "calls": calls,
+                        "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
+                        "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
+                    }
+                )
+            ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
+        self.summary = {
+            "total_steps": getattr(final_state, "total_steps", len(self.trace)),
+            "total_completed": getattr(final_state, "total_completed", 0),
+            "total_backlog": getattr(final_state, "total_backlog", 0),
+            "total_sla_breaches": getattr(final_state, "total_sla_breaches", 0),
+            "fairness_gap": float(getattr(final_state, "fairness_gap", 0.0) or 0.0),
+            "total_invalid_actions": invalid_actions,
+            "invalid_action_rate": invalid_rate,
+            "llm_steps": llm_steps,
+            "heuristic_fallback_steps": fallback_steps,
+            "llm_repaired_steps": repaired_steps,
+            "repaired_action_rate": repaired_rate,
+            "auto_switch_count": self.auto_switch_count,
+            "last_switch_reason": self.last_switch_reason,
+            "effective_max_steps": self.max_steps,
+            "recommended_min_steps": _recommended_min_steps(self.task_id),
+        }
+        if self.agent_mode == SimulationAgentMode.LLM_INFERENCE:
+            self.summary["llm_route"] = list(self.llm_route)
+            self.summary["llm_model_performance"] = ranked_models
+        if self.agent_mode == SimulationAgentMode.TRAINED_RL:
+            self.summary["model_path"] = self.model_path
+            self.summary["model_type"] = self.model_type
+def run_simulation(
+    *,
+    task_id: str,
+    agent_mode: SimulationAgentMode,
+    max_steps: int,
+    seed: int | None,
+    policy_name: str | None = None,
+    model_path: str | None = None,
+    model_type: Literal["maskable", "recurrent"] = "maskable",
+) -> SimulationRun:
+    session = LiveSimulationSession(
+        task_id=task_id,
+        agent_mode=agent_mode,
+        max_steps=max_steps,
+        seed=seed,
+        policy_name=policy_name,
+        model_path=model_path,
+        model_type=model_type,
+    )
+    try:
+        while not session.done:
+            session.step_once()
+        return SimulationRun(
+            task_id=session.task_id,
+            agent_mode=session.agent_mode,
+            seed=session.seed,
+            total_reward=float(session.total_reward),
+            score=float(session.score or 0.0),
+            grader_name=str(session.grader_name or "unknown"),
+            summary=dict(session.summary or {}),
+            trace=list(session.trace),
+        )
+    finally:
+        session.close()
+def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
+    try:
+        from rl.feature_builder import ACTION_DECODE_TABLE
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    row = ACTION_DECODE_TABLE.get(int(action_idx))
+    if row is None:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    action_type, service, priority_mode, delta = row
+    try:
+        at = ActionType(str(action_type))
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    if at == ActionType.SET_PRIORITY_MODE:
+        action = _action_model_from_kwargs(at, priority_mode=priority_mode)
+    elif at == ActionType.ASSIGN_CAPACITY:
+        action = _action_model_from_kwargs(at, service=service, officer_delta=delta or 1)
+    elif at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        action = _action_model_from_kwargs(at, service=service)
+    elif at == ActionType.ESCALATE_SERVICE:
+        action = _action_model_from_kwargs(at, service=service)
+    elif at == ActionType.REALLOCATE_OFFICERS:
+        src = _enum_service(service)
+        action = (
+            _action_model_from_kwargs(at, service=src, target_service=src, officer_delta=delta or 1)
+            if src is not None
+            else ActionModel(action_type=ActionType.ADVANCE_TIME)
+        )
+    else:
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+    return action, at.value

app/env.py ADDED Viewed

	@@ -0,0 +1,553 @@

+"""
+env.py — Gov Workflow OpenEnv
+Gymnasium/OpenEnv-compatible environment aligned with Phase 1 schemas.
+"""
+from __future__ import annotations
+import random
+from uuid import uuid4
+from app.event_engine import EventEngine
+from app.models import (
+    ActionModel,
+    ActionType,
+    ApplicationCase,
+    EpisodeStateModel,
+    InternalSubstate,
+    ObservationModel,
+    OfficerPool,
+    PriorityMode,
+    QueueSnapshot,
+    RewardModel,
+    ScenarioMode,
+    ServiceType,
+    StepInfoModel,
+    TaskConfig,
+)
+from app.reward import compute_reward
+from app.signal_computer import SignalComputer
+from app.engine import DayResult, DaySimulator
+from app.tasks import get_task
+def completion_fairness_gap(
+    arrived_by_service: dict[ServiceType, int],
+    completed_by_service: dict[ServiceType, int],
+) -> float:
+    services = list(arrived_by_service.keys())
+    if len(services) < 2:
+        return 0.0
+    rates = []
+    for svc in services:
+        arrived = max(1, arrived_by_service.get(svc, 0))
+        completed = completed_by_service.get(svc, 0)
+        rates.append(completed / arrived)
+    return max(rates) - min(rates) if rates else 0.0
+class EpisodeMetrics:
+    def __init__(self):
+        self.total_arrived: int = 0
+        self.total_completed: int = 0
+        self.total_sla_breaches: int = 0
+        self.total_rejected: int = 0
+        self.total_invalid_actions: int = 0
+        self.total_escalations_used: int = 0
+        self.total_wasted_escalations: int = 0
+        self.total_docs_requested: int = 0
+        self.total_docs_cleared: int = 0
+        self.total_idle_officer_days: int = 0
+        self.total_capacity_days: int = 0
+        self.total_urgent_arrived: int = 0
+        self.total_urgent_completed: int = 0
+        self.cumulative_reward: float = 0.0
+    def to_reward_model(self) -> RewardModel:
+        return RewardModel(total_reward=self.cumulative_reward)
+class GovWorkflowEnv:
+    def __init__(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> None:
+        self.task_id = task_id
+        self.task: TaskConfig = get_task(task_id)
+        self.seed = seed
+        self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
+        self._init_episode_state()
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict | None = None,
+    ) -> tuple[ObservationModel, dict]:
+        task_id = (options or {}).get("task_id", self.task_id)
+        self.task = get_task(task_id)
+        self.task_id = self.task.task_id
+        self.seed = self.task.seed if seed is None else int(seed)
+        self.rng = random.Random(self.seed)
+        max_steps_override = (options or {}).get("max_steps_per_episode")
+        if max_steps_override is None:
+            self.max_steps_per_episode = max(1, int(self.task.max_days) * 10)
+        else:
+            self.max_steps_per_episode = max(1, int(max_steps_override))
+        self.episode_id = f"{self.task_id}-s{self.seed}-{uuid4().hex[:6]}"
+        self.day = 0
+        self.total_steps = 0
+        self.terminated = False
+        self.truncated = False
+        self.priority_mode = PriorityMode.BALANCED
+        pool = self.task.initial_officer_pool
+        self.officer_pool = OfficerPool(
+            total_officers=pool.total_officers,
+            available_officers=pool.available_officers,
+            allocated=dict(pool.allocated),
+            pending_reallocation=dict(getattr(pool, "pending_reallocation", {})),
+        )
+        self.active_cases: list[ApplicationCase] = []
+        self.completed_cases: list[ApplicationCase] = []
+        self.escalation_budget_remaining = self.task.escalation_budget
+        self.arrived_by_service = {s: 0 for s in self.task.enabled_services}
+        self.completed_by_service = {s: 0 for s in self.task.enabled_services}
+        self.metrics = EpisodeMetrics()
+        self.action_history: list[dict] = []
+        self.last_action_valid = True
+        self.last_action_message = "reset"
+        self.last_action_explanation = ""
+        self.event_engine = EventEngine(
+            seed=self.seed,
+            scenario_mode=self.task.scenario_mode,
+        )
+        self.simulator = DaySimulator(
+            task_config=self.task,
+            rng=self.rng,
+            event_engine=self.event_engine,
+        )
+        self.signal_computer = SignalComputer()
+        obs = self._build_observation(active_events=[])
+        info = {
+            "task_id": self.task_id,
+            "seed": self.seed,
+            "episode_id": self.episode_id,
+            "max_days": self.task.max_days,
+        }
+        return obs, info
+    def step(
+        self,
+        action: ActionModel | dict,
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        if isinstance(action, dict):
+            from app.models import ActionModel
+            action = ActionModel(**action)
+        if self.terminated or self.truncated:
+            raise RuntimeError("Episode ended — call reset() before stepping.")
+        self.total_steps += 1
+        invalid_action = False
+        day_result = DayResult()
+        try:
+            notes, day_result = self._apply_action(action, day_result)
+            self.last_action_valid = True
+            self.last_action_message = notes[-1] if notes else "ok"
+            self.last_action_explanation = self.last_action_message
+        except ValueError as exc:
+            invalid_action = True
+            self.metrics.total_invalid_actions += 1
+            self.last_action_valid = False
+            self.last_action_message = str(exc)
+            self.last_action_explanation = f"Invalid: {exc}"
+        fairness_gap = completion_fairness_gap(
+            self.arrived_by_service,
+            self.completed_by_service,
+        )
+        reward: RewardModel = compute_reward(
+            stage_advances=day_result.stage_advances,
+            completions=day_result.new_completions,
+            active_backlog=len(self.active_cases),
+            new_sla_breaches=day_result.new_sla_breaches,
+            fairness_gap=fairness_gap,
+            fairness_threshold=self.task.fairness_threshold or 0.0,
+            invalid_action=invalid_action,
+            idle_capacity=day_result.idle_officer_days,
+            award_stability_bonus=(action.action_type == ActionType.ADVANCE_TIME),
+        )
+        self.metrics.cumulative_reward += reward.total_reward
+        self.terminated = (
+            len(self.active_cases) == 0
+            and self.day > 0
+            and not invalid_action
+        )
+        self.truncated = (
+            (self.day >= self.task.max_days or self.total_steps >= self.max_steps_per_episode)
+            and not self.terminated
+        )
+        info = StepInfoModel(
+            reward_breakdown=reward,
+            newly_arrived_cases=day_result.new_arrivals,
+            newly_completed_cases=day_result.new_completions,
+            newly_sla_breached_cases=day_result.new_sla_breaches,
+            newly_resolved_doc_cases=day_result.newly_unblocked_missing,
+            invalid_action=invalid_action,
+            action_explanation=self.last_action_explanation,
+            active_events=day_result.active_events,
+            grader_preview_score=0.0,
+            effects_resolved_this_step=[],
+        )
+        self.action_history.append({
+            "step": self.total_steps,
+            "day": self.day,
+            "action": action.model_dump(mode="json"),
+            "invalid": invalid_action,
+            "message": self.last_action_message,
+            "reward": reward.total_reward,
+        })
+        obs = self._build_observation(active_events=day_result.active_events)
+        return obs, reward.total_reward, self.terminated, self.truncated, info
+    def count_pending_effects(self) -> int:
+        """Count all pending delayed effects waiting to resolve."""
+        if hasattr(self, '_pending_effects') and self._pending_effects:
+            return len(self._pending_effects)
+        if hasattr(self, 'simulator') and hasattr(self.simulator, 'pending_effects'):
+            return len(self.simulator.pending_effects)
+        if hasattr(self, 'pending_effects'):
+            return len(self.pending_effects)
+        return 0
+    def state(self) -> EpisodeStateModel:
+        fairness_gap = completion_fairness_gap(
+            self.arrived_by_service, self.completed_by_service
+        )
+        # Compute average waiting days across completed cases
+        avg_wait = (
+            sum(c.waiting_days for c in self.completed_cases) / len(self.completed_cases)
+            if self.completed_cases else 0.0
+        )
+        return EpisodeStateModel(
+            episode_id=self.episode_id,
+            task_id=self.task_id,
+            seed=self.seed,
+            scenario_mode=self.task.scenario_mode,
+            day=self.day,
+            max_days=self.task.max_days,
+            terminated=self.terminated,
+            truncated=self.truncated,
+            total_steps=self.total_steps,
+            total_completed=len(self.completed_cases),
+            total_backlog=len(self.active_cases),
+            total_sla_breaches=self.metrics.total_sla_breaches,
+            total_rejected=self.metrics.total_rejected,
+            action_history_count=len(self.action_history),
+            cumulative_reward=self.metrics.cumulative_reward,
+            officer_pool=self.officer_pool.model_copy(deep=True),
+            pending_effects_count=self.count_pending_effects(),
+            active_events_today=[],
+            # ── Grader-facing fields ──────────────────────────────────
+            fairness_gap=round(fairness_gap, 4),
+            total_arrived=self.metrics.total_arrived,
+            total_docs_requested=self.metrics.total_docs_requested,
+            total_docs_cleared=self.metrics.total_docs_cleared,
+            total_idle_officer_days=self.metrics.total_idle_officer_days,
+            total_capacity_days=self.metrics.total_capacity_days,
+            total_urgent_arrived=self.metrics.total_urgent_arrived,
+            total_urgent_completed=self.metrics.total_urgent_completed,
+            total_escalations_used=self.metrics.total_escalations_used,
+            total_wasted_escalations=self.metrics.total_wasted_escalations,
+            total_invalid_actions=self.metrics.total_invalid_actions,
+            avg_waiting_days=round(avg_wait, 2),
+            # Full action log — populated but stripped by API unless requested
+            action_history=list(self.action_history),
+        )
+    def _apply_action(
+        self,
+        action: ActionModel,
+        day_result: DayResult,
+    ) -> tuple[list[str], DayResult]:
+        notes: list[str] = []
+        if action.action_type == ActionType.SET_PRIORITY_MODE:
+            if action.priority_mode is None:
+                raise ValueError("priority_mode required for set_priority_mode")
+            old_mode = self.priority_mode
+            self.priority_mode = action.priority_mode
+            notes.append(f"Priority mode changed: {old_mode.value} -> {action.priority_mode.value}")
+            return notes, day_result
+        if action.action_type == ActionType.ASSIGN_CAPACITY:
+            cap = action.capacity_assignment
+            if not cap:
+                raise ValueError("capacity_assignment dict required for assign_capacity")
+            for svc_key, delta in cap.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                if svc not in self.task.enabled_services:
+                    raise ValueError(f"{svc.value} is not enabled in this task")
+                if delta <= 0:
+                    raise ValueError("capacity delta must be positive")
+                idle = self.officer_pool.idle_officers
+                if delta > idle:
+                    raise ValueError(f"Only {idle} idle officers available; requested {delta}")
+                self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + delta
+                notes.append(f"Assigned {delta} officer(s) to {svc.value}")
+            return notes, day_result
+        if action.action_type == ActionType.REQUEST_MISSING_DOCUMENTS:
+            svc = action.service_target
+            if svc is None:
+                raise ValueError("service_target required for request_missing_documents")
+            candidates = [
+                c for c in self.active_cases
+                if c.service_type == svc
+                and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            ]
+            if not candidates:
+                raise ValueError(f"No BLOCKED_MISSING_DOCS cases for {svc.value}")
+            candidates.sort(key=lambda c: (-c.sla_risk, c.arrival_day))
+            resolved = 0
+            for case in candidates[:3]:
+                case.doc_request_sent_day = self.day
+                case.doc_resolution_day = self.day + self.rng.randint(2, 3)
+                self.metrics.total_docs_requested += 1
+                resolved += 1
+            notes.append(f"Sent missing-doc requests for {resolved} case(s) in {svc.value}")
+            return notes, day_result
+        if action.action_type == ActionType.ESCALATE_SERVICE:
+            if self.escalation_budget_remaining <= 0:
+                self.metrics.total_wasted_escalations += 1
+                raise ValueError("Escalation budget exhausted")
+            svc = action.escalation_target or action.service_target
+            candidates = [
+                c for c in self.active_cases
+                if (svc is None or c.service_type == svc) and not c.is_urgent
+            ]
+            if not candidates:
+                self.metrics.total_wasted_escalations += 1
+                raise ValueError("No eligible non-urgent cases to escalate")
+            best = max(candidates, key=lambda c: (c.sla_risk, -c.arrival_day))
+            best.is_urgent = True
+            self.escalation_budget_remaining -= 1
+            self.metrics.total_escalations_used += 1
+            notes.append(f"Escalated case {best.case_id} ({best.service_type.value})")
+            return notes, day_result
+        if action.action_type == ActionType.ADVANCE_TIME:
+            day_result = self._advance_one_day()
+            notes.append(f"Day {self.day} simulated")
+            return notes, day_result
+        if action.action_type == ActionType.REALLOCATE_OFFICERS:
+            delta = action.reallocation_delta
+            if not delta or len(delta) < 2:
+                raise ValueError("reallocation_delta must have at least 2 entries")
+            total = sum(delta.values())
+            if total != 0:
+                raise ValueError(f"reallocation_delta must sum to 0 (got {total})")
+            for svc_key, change in delta.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                if svc not in self.task.enabled_services:
+                    raise ValueError(f"{svc.value} not in enabled services")
+                current = self.officer_pool.allocated.get(svc, 0)
+                if current + change < 0:
+                    raise ValueError(
+                        f"Cannot reduce {svc.value} below 0 (current={current}, change={change})"
+                    )
+            for svc_key, change in delta.items():
+                svc = ServiceType(svc_key) if isinstance(svc_key, str) else svc_key
+                self.officer_pool.allocated[svc] = self.officer_pool.allocated.get(svc, 0) + change
+            changes = ", ".join(f"{k}:{'+' if v > 0 else ''}{v}" for k, v in delta.items())
+            notes.append(f"Officers reallocated: {changes}")
+            return notes, day_result
+        raise ValueError(f"Unsupported action_type: {action.action_type.value}")
+    def _advance_one_day(self) -> DayResult:
+        self.day += 1
+        alloc = dict(self.officer_pool.allocated)
+        result = self.simulator.simulate_day(
+            day=self.day,
+            active_cases=self.active_cases,
+            completed_cases=self.completed_cases,
+            priority_mode=self.priority_mode,
+            officer_allocations=alloc,
+        )
+        for case in self.completed_cases:
+            if getattr(case, "_counted", False):
+                continue
+            case._counted = True
+            svc = case.service_type
+            self.completed_by_service[svc] = self.completed_by_service.get(svc, 0) + 1
+        for case in self.active_cases:
+            if getattr(case, "_arrival_counted", False):
+                continue
+            case._arrival_counted = True
+            svc = case.service_type
+            self.arrived_by_service[svc] = self.arrived_by_service.get(svc, 0) + 1
+            self.metrics.total_arrived += 1
+            if case.is_urgent:
+                self.metrics.total_urgent_arrived += 1
+        self.metrics.total_completed = len(self.completed_cases)
+        self.metrics.total_sla_breaches += result.new_sla_breaches
+        self.metrics.total_idle_officer_days += result.idle_officer_days
+        self.metrics.total_capacity_days += result.total_capacity_days
+        self.metrics.total_urgent_completed += result.urgent_completed
+        self.metrics.total_docs_cleared += result.newly_unblocked_missing
+        return result
+    def _build_observation(self, active_events: list = None) -> ObservationModel:
+        active_events = active_events or []
+        snapshots: dict[str, QueueSnapshot] = {}
+        todays_digital = 0
+        todays_arrivals = 0
+        today_completed: dict[ServiceType, int] = {}
+        for case in self.completed_cases:
+            today_completed[case.service_type] = today_completed.get(case.service_type, 0) + 1
+        for service in self.task.enabled_services:
+            snap = self.simulator.build_queue_snapshot(service, self.active_cases, self.day)
+            snap.total_completed_today = today_completed.get(service, 0)
+            snapshots[service.value] = snap
+        for case in self.active_cases:
+            if case.arrival_day == self.day:
+                todays_arrivals += 1
+                if case.intake_channel.value == "digital":
+                    todays_digital += 1
+        sigs = self.signal_computer.compute(
+            queue_snapshots=snapshots,
+            officer_pool=self.officer_pool,
+            todays_arrivals=todays_arrivals,
+            digital_arrivals=todays_digital,
+            capacity_per_day=max(1.0, float(self.officer_pool.available_officers)),
+        )
+        pending_doc = sum(
+            1 for c in self.active_cases
+            if c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+            and c.doc_resolution_day is not None
+        )
+        pending_officer = len(getattr(self.officer_pool, "pending_reallocation", {}))
+        return ObservationModel(
+            task_id=self.task_id,
+            episode_id=self.episode_id,
+            day=self.day,
+            max_days=self.task.max_days,
+            scenario_mode=self.task.scenario_mode,
+            officer_pool=self.officer_pool.model_copy(deep=True),
+            queue_snapshots=snapshots,
+            total_backlog=len(self.active_cases),
+            total_completed=len(self.completed_cases),
+            total_sla_breaches=self.metrics.total_sla_breaches,
+            total_rejected=self.metrics.total_rejected,
+            escalation_budget_remaining=self.escalation_budget_remaining,
+            backlog_pressure=sigs.backlog_pressure,
+            sla_risk_score=sigs.sla_risk_score,
+            fairness_index=sigs.fairness_index,
+            resource_utilization=sigs.resource_utilization,
+            digital_intake_ratio=sigs.digital_intake_ratio,
+            blocked_cases_missing_docs=sigs.blocked_cases_missing_docs,
+            field_verification_load=sigs.field_verification_load,
+            active_events=active_events,
+            last_action_valid=self.last_action_valid,
+            last_action_message=self.last_action_message,
+            last_action_explanation=self.last_action_explanation,
+            pending_doc_resolutions=pending_doc,
+            pending_officer_reallocations=pending_officer,
+        )
+    def _init_episode_state(self) -> None:
+        self.seed = self.task.seed
+        self.rng = random.Random(self.seed)
+        self.episode_id = f"{self.task_id}-s{self.seed}-init"
+        self.day = 0
+        self.total_steps = 0
+        self.terminated = False
+        self.truncated = False
+        self.priority_mode = PriorityMode.BALANCED
+        self.officer_pool = OfficerPool(
+            total_officers=1,
+            available_officers=1,
+            allocated={},
+            pending_reallocation={},
+        )
+        self.active_cases: list[ApplicationCase] = []
+        self.completed_cases: list[ApplicationCase] = []
+        self.escalation_budget_remaining = 0
+        self.arrived_by_service: dict[ServiceType, int] = {}
+        self.completed_by_service: dict[ServiceType, int] = {}
+        self.metrics = EpisodeMetrics()
+        self.action_history: list[dict] = []
+        self.last_action_valid = True
+        self.last_action_message = ""
+        self.last_action_explanation = ""
+        self.event_engine = EventEngine(seed=self.seed, scenario_mode=ScenarioMode.NORMAL)
+        self.simulator = DaySimulator(self.task, self.rng, self.event_engine)
+        self.signal_computer = SignalComputer()
+    def _count_pending_effects(self) -> int:
+        doc_pending = sum(
+            1 for c in self.active_cases
+            if c.doc_resolution_day is not None
+            and c.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS
+        )
+        fv_pending = sum(
+            1 for c in self.active_cases
+            if c.internal_substate == InternalSubstate.FIELD_VERIFICATION_PENDING
+            and c.field_verification_completion_day is not None
+        )
+        return doc_pending + fv_pending
+    @property
+    def fairness_gap(self) -> float:
+        return completion_fairness_gap(self.arrived_by_service, self.completed_by_service)
+    @property
+    def total_completed(self) -> int:
+        return len(self.completed_cases)
+    @property
+    def total_backlog(self) -> int:
+        return len(self.active_cases)

app/event_engine.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+event_engine.py — Gov Workflow OpenEnv v2.0
+Deterministic daily event system. Same seed + day + scenario = same events always.
+"""
+import random
+from typing import List
+from app.models import EventType, ScenarioMode, TaskConfig
+SCENARIO_MULTIPLIER = {
+    ScenarioMode.NORMAL:           1.0,
+    ScenarioMode.CRISIS:           2.0,
+    ScenarioMode.EXTREME_OVERLOAD: 3.5,
+}
+BASE_PROBS = {
+    EventType.SURGE_APPLICATIONS:       0.08,
+    EventType.OFFICER_UNAVAILABLE:      0.07,
+    EventType.DOCUMENT_REJECTION_SPIKE: 0.10,
+    EventType.REVENUE_DB_DELAY:         0.06,
+    EventType.SLA_ESCALATION_ORDER:     0.05,
+}
+EVENT_EFFECTS = {
+    EventType.SURGE_APPLICATIONS:
+        {ScenarioMode.NORMAL: 1.3, ScenarioMode.CRISIS: 1.5, ScenarioMode.EXTREME_OVERLOAD: 2.0},
+    EventType.OFFICER_UNAVAILABLE:
+        {ScenarioMode.NORMAL: 1,   ScenarioMode.CRISIS: 1,   ScenarioMode.EXTREME_OVERLOAD: 2},
+    EventType.DOCUMENT_REJECTION_SPIKE:
+        {ScenarioMode.NORMAL: 0.15, ScenarioMode.CRISIS: 0.20, ScenarioMode.EXTREME_OVERLOAD: 0.35},
+    EventType.REVENUE_DB_DELAY:
+        {ScenarioMode.NORMAL: 0.30, ScenarioMode.CRISIS: 0.40, ScenarioMode.EXTREME_OVERLOAD: 0.60},
+    EventType.SLA_ESCALATION_ORDER:
+        {ScenarioMode.NORMAL: 0.50, ScenarioMode.CRISIS: 0.50, ScenarioMode.EXTREME_OVERLOAD: 0.40},
+}
+class DayEventParams:
+    def __init__(self):
+        self.arrival_multiplier: float = 1.0
+        self.officer_reduction: int = 0
+        self.doc_defect_rate_boost: float = 0.0
+        self.system_dependency_boost: float = 0.0
+        self.sla_window_multiplier: float = 1.0
+        self.active_events: List[EventType] = []
+    def has_events(self) -> bool:
+        return bool(self.active_events)
+class EventEngine:
+    def __init__(self, seed: int, scenario_mode: ScenarioMode):
+        self.seed = seed
+        self.scenario_mode = scenario_mode
+        self._multiplier = SCENARIO_MULTIPLIER[scenario_mode]
+    def get_events_for_day(self, day: int, task_config: "TaskConfig") -> List[EventType]:
+        day_rng = random.Random(self.seed + day * 31337)
+        active = []
+        for event_type in task_config.allowed_events:
+            if event_type == EventType.NO_EVENT:
+                continue
+            base_prob = BASE_PROBS.get(event_type, 0.0)
+            effective_prob = min(0.80, base_prob * self._multiplier)
+            if day_rng.random() < effective_prob:
+                active.append(event_type)
+        return active if active else [EventType.NO_EVENT]
+    def apply_events(self, events: List[EventType], task_config: "TaskConfig") -> DayEventParams:
+        params = DayEventParams()
+        for event in events:
+            if event == EventType.NO_EVENT:
+                continue
+            params.active_events.append(event)
+            magnitude = EVENT_EFFECTS.get(event, {}).get(self.scenario_mode, 0)
+            if event == EventType.SURGE_APPLICATIONS:
+                params.arrival_multiplier *= magnitude
+            elif event == EventType.OFFICER_UNAVAILABLE:
+                params.officer_reduction += int(magnitude)
+            elif event == EventType.DOCUMENT_REJECTION_SPIKE:
+                params.doc_defect_rate_boost += magnitude
+            elif event == EventType.REVENUE_DB_DELAY:
+                params.system_dependency_boost += magnitude
+            elif event == EventType.SLA_ESCALATION_ORDER:
+                params.sla_window_multiplier = min(params.sla_window_multiplier, magnitude)
+        if not params.active_events:
+            params.active_events = [EventType.NO_EVENT]
+        return params
+    def describe_events(self, events: List[EventType]) -> str:
+        descriptions = {
+            EventType.SURGE_APPLICATIONS:       "Digital surge: arrivals increased",
+            EventType.OFFICER_UNAVAILABLE:      "Officer absent: reduced capacity",
+            EventType.DOCUMENT_REJECTION_SPIKE: "Doc rejection spike: higher defect rate",
+            EventType.REVENUE_DB_DELAY:         "Revenue DB delay: land records slower",
+            EventType.SLA_ESCALATION_ORDER:     "SLA escalation order: deadlines tightened",
+            EventType.NO_EVENT:                 "No active events today",
+        }
+        active = [e for e in events if e != EventType.NO_EVENT]
+        if not active:
+            return "No active events today"
+        return "; ".join(descriptions.get(e, str(e)) for e in active)

app/graders.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+graders.py — Gov Workflow OpenEnv: Deterministic Episode Graders
+Rules:
+  - All graders read ONLY from EpisodeStateModel flat fields.
+  - No access to env internals, EpisodeMetrics, or reward breakdown proxies.
+  - GraderResult uses the aligned schema (score, grader_name, named metric fields).
+  - grade_episode() dispatches by task_id.
+Grader weights:
+  Easy   — completion(0.45) + SLA(0.35) + idle_efficiency(0.20)          = 1.00
+  Medium — completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15) = 1.00
+  Hard   — completion(0.28) + SLA(0.24) + doc_rework(0.16)
+           + fairness(0.16) + escalation_discipline(0.16)                 = 1.00
+"""
+from __future__ import annotations
+from app.models import EpisodeStateModel, GraderResult
+# ─────────────────────────────────────────────────────────────────────────────
+# INTERNAL HELPERS
+# ─────────────────────────────────────────────────────────────────────────────
+def _safe_ratio(num: float, den: float, default: float = 1.0) -> float:
+    """Safe division, clamped to [0.0, 1.0]. Returns `default` when den ≤ 0."""
+    if den <= 0:
+        return max(0.0, min(1.0, default))
+    return max(0.0, min(1.0, num / den))
+def _b(value: float) -> float:
+    """Clamp any float to [0.0, 1.0]."""
+    return max(0.0, min(1.0, float(value)))
+def _extract(state: EpisodeStateModel) -> dict[str, float]:
+    """
+    Extract all grader input metrics from EpisodeStateModel flat fields.
+    Design note:
+      - total_arrived   : populated by env.state() from metrics.total_arrived
+      - fairness_gap    : computed by completion_fairness_gap() in env.state()
+      - All other fields are direct EpisodeStateModel attributes.
+    """
+    total_arrived      = max(1, state.total_arrived)
+    total_completed    = float(state.total_completed)
+    total_breaches     = float(state.total_sla_breaches)
+    total_docs_req     = float(state.total_docs_requested)
+    total_docs_cleared = float(state.total_docs_cleared)
+    total_urgent_arr   = float(state.total_urgent_arrived)
+    total_urgent_comp  = float(state.total_urgent_completed)
+    total_idle         = float(state.total_idle_officer_days)
+    total_capacity     = float(state.total_capacity_days)
+    total_escused      = float(state.total_escalations_used)
+    total_wasted_esc   = float(state.total_wasted_escalations)
+    fairness_gap       = float(state.fairness_gap)
+    return {
+        "completion_rate":         _b(_safe_ratio(total_completed, total_arrived, 0.0)),
+        "sla_compliance":          _b(1.0 - _safe_ratio(total_breaches, total_arrived, 0.0)),
+        "document_rework_quality": _b(_safe_ratio(total_docs_cleared, total_docs_req, 1.0)),
+        "urgent_served_rate":      _b(_safe_ratio(total_urgent_comp, total_urgent_arr, 1.0)),
+        "fairness_score":          _b(1.0 - fairness_gap),
+        "escalation_discipline":   _b(1.0 - _safe_ratio(total_wasted_esc, max(1.0, total_escused), 0.0)),
+        "idle_efficiency":         _b(1.0 - _safe_ratio(total_idle, max(1.0, total_capacity), 0.0)),
+        "fairness_gap":            round(fairness_gap, 4),
+    }
+def _build_result(
+    state: EpisodeStateModel,
+    score: float,
+    grader_name: str,
+    m: dict[str, float],
+) -> GraderResult:
+    """Assemble a fully-populated GraderResult from metric dict and state."""
+    total_arrived = max(0, state.total_arrived)
+    avg_wait = state.avg_waiting_days
+    return GraderResult(
+        task_id=state.task_id,
+        episode_id=state.episode_id,
+        grader_name=grader_name,
+        score=_b(score),
+        completion_rate=m["completion_rate"],
+        sla_compliance_rate=m["sla_compliance"],
+        idle_efficiency=m["idle_efficiency"],
+        document_rework_quality=m["document_rework_quality"],
+        urgent_served_rate=m["urgent_served_rate"],
+        fairness_score=m["fairness_score"],
+        escalation_discipline=m["escalation_discipline"],
+        fairness_gap=m["fairness_gap"],
+        total_cases_arrived=total_arrived,
+        total_completed=state.total_completed,
+        total_sla_breached=state.total_sla_breaches,
+        total_rejected=state.total_rejected,
+        avg_waiting_days=avg_wait,
+    )
+# ─────────────────────────────────────────────────────────────────────────────
+# TASK GRADERS
+# ─────────────────────────────────────────────────────────────────────────────
+def grade_easy(state: EpisodeStateModel) -> GraderResult:
+    """
+    district_backlog_easy grader.
+    Focus: raw throughput and SLA hygiene under simple single-service load.
+    Weights: completion(0.45) + SLA(0.35) + idle_efficiency(0.20)
+    """
+    m = _extract(state)
+    score = (
+        0.45 * m["completion_rate"]
+      + 0.35 * m["sla_compliance"]
+      + 0.20 * m["idle_efficiency"]
+    )
+    return _build_result(state, score, "easy", m)
+def grade_medium(state: EpisodeStateModel) -> GraderResult:
+    """
+    mixed_urgency_medium grader.
+    Focus: throughput + SLA + document quality + prioritizing urgent cases.
+    Weights: completion(0.35) + SLA(0.30) + doc_rework(0.20) + urgent(0.15)
+    """
+    m = _extract(state)
+    score = (
+        0.35 * m["completion_rate"]
+      + 0.30 * m["sla_compliance"]
+      + 0.20 * m["document_rework_quality"]
+      + 0.15 * m["urgent_served_rate"]
+    )
+    return _build_result(state, score, "medium", m)
+def grade_hard(state: EpisodeStateModel) -> GraderResult:
+    """
+    cross_department_hard grader.
+    Focus: all-round excellence including cross-service fairness and
+    restrained escalation use under crisis conditions.
+    Weights: completion(0.28) + SLA(0.24) + doc_rework(0.16)
+             + fairness(0.16) + escalation_discipline(0.16)
+    """
+    m = _extract(state)
+    score = (
+        0.28 * m["completion_rate"]
+      + 0.24 * m["sla_compliance"]
+      + 0.16 * m["document_rework_quality"]
+      + 0.16 * m["fairness_score"]
+      + 0.16 * m["escalation_discipline"]
+    )
+    return _build_result(state, score, "hard", m)
+# ─────────────────────────────────────────────────────────────────────────────
+# DISPATCHER
+# ─────────────────────────────────────────────────────────────────────────────
+_GRADER_MAP = {
+    "district_backlog_easy":          grade_easy,
+    "district_backlog_easy_extreme":  grade_easy,
+    "mixed_urgency_medium":           grade_medium,
+    "cross_department_hard":          grade_hard,
+}
+def grade_episode(state: EpisodeStateModel) -> GraderResult:
+    """
+    Dispatch to the correct task grader.
+    Falls back to grade_hard for unknown task IDs (safe default for new tasks).
+    """
+    grader_fn = _GRADER_MAP.get(state.task_id, grade_hard)
+    return grader_fn(state)

app/main.py ADDED Viewed

The diff for this file is too large to render. See raw diff

app/models.py ADDED Viewed

	@@ -0,0 +1,509 @@

+"""
+models.py — Gov Workflow OpenEnv v2.0 — Phase 2 FULL FILE
+Adds: DocEnrichmentType, doc_enrichment fields on ApplicationCase,
+      blocked_cases_enrichment / pending_enrichment_lookups on observation,
+      INTERNAL_TO_PUBLIC_STAGE mapping,
+      SectorProfile enrichment fields.
+"""
+from __future__ import annotations
+from enum import Enum
+from typing import Dict, List, Optional
+from pydantic import BaseModel, Field
+import uuid
+# ─────────────────────────────────────────────
+# ENUMS
+# ─────────────────────────────────────────────
+class ServiceType(str, Enum):
+    PASSPORT            = "passport"
+    DRIVING_LICENSE     = "driving_license"
+    AADHAAR_CARD        = "aadhaar_card"
+    GST_REGISTRATION    = "gst_registration"
+    INCOME_CERTIFICATE  = "income_certificate"
+    CASTE_CERTIFICATE   = "caste_certificate"
+    BIRTH_CERTIFICATE   = "birth_certificate"
+    LAND_REGISTRATION   = "land_registration"
+class StageType(str, Enum):
+    SUBMISSION            = "submission"
+    DOCUMENT_VERIFICATION = "document_verification"
+    FIELD_VERIFICATION    = "field_verification"
+    APPROVAL              = "approval"
+    ISSUANCE              = "issuance"
+class InternalSubstate(str, Enum):
+    PRE_SCRUTINY                 = "pre_scrutiny"
+    DOC_VALIDATION               = "doc_validation"
+    SERVICE_SPECIFIC_VALIDATION  = "service_specific_validation"
+    FIELD_VERIFICATION_PENDING   = "field_verification_pending"
+    DECISION_PENDING             = "decision_pending"
+    ISSUANCE_READY               = "issuance_ready"
+    BLOCKED_MISSING_DOCS         = "blocked_missing_docs"
+    BLOCKED_ENRICHMENT           = "blocked_enrichment"
+    COMPLETED                    = "completed"
+    REJECTED                     = "rejected"
+# ── Phase 2 addition ──────────────────────────────────────────────────────────
+class DocEnrichmentType(str, Enum):
+    """External lookup needed for document verification."""
+    NONE                  = "none"
+    PAST_LAND_RECORDS     = "past_land_records"       # Land Registration — Revenue DB
+    FAMILY_CASTE_HISTORY  = "family_caste_history"    # Caste Certificate — Caste Registry
+    POLICE_VERIFICATION   = "police_verification"     # Passport — Police Station
+    TAX_RECORD_CROSS_CHECK= "tax_record_cross_check"  # GST Registration — Tax DB
+# Public stage mapping — used by state_machine.build_public_stage
+INTERNAL_TO_PUBLIC_STAGE: dict = {
+    "pre_scrutiny":                  "submission",
+    "doc_validation":                "document_verification",
+    "service_specific_validation":   "document_verification",
+    "field_verification_pending":    "field_verification",
+    "decision_pending":              "approval",
+    "issuance_ready":                "issuance",
+    "blocked_missing_docs":          "document_verification",
+    "blocked_enrichment":            "document_verification",
+    "completed":                     "issuance",
+    "rejected":                      "approval",
+}
+class PriorityMode(str, Enum):
+    URGENT_FIRST       = "urgent_first"
+    OLDEST_FIRST       = "oldest_first"
+    BALANCED           = "balanced"
+    BACKLOG_CLEARANCE  = "backlog_clearance"
+class ActionType(str, Enum):
+    SET_PRIORITY_MODE         = "set_priority_mode"
+    ASSIGN_CAPACITY           = "assign_capacity"
+    REQUEST_MISSING_DOCUMENTS = "request_missing_documents"
+    ESCALATE_SERVICE          = "escalate_service"
+    ADVANCE_TIME              = "advance_time"
+    REALLOCATE_OFFICERS       = "reallocate_officers"
+class EventType(str, Enum):
+    SURGE_APPLICATIONS        = "surge_applications"
+    OFFICER_UNAVAILABLE       = "officer_unavailable"
+    DOCUMENT_REJECTION_SPIKE  = "document_rejection_spike"
+    REVENUE_DB_DELAY          = "revenue_db_delay"
+    SLA_ESCALATION_ORDER      = "sla_escalation_order"
+    NO_EVENT                  = "no_event"
+class ScenarioMode(str, Enum):
+    NORMAL           = "normal"
+    CRISIS           = "crisis"
+    EXTREME_OVERLOAD = "extreme_overload"
+class UrgencyProfile(str, Enum):
+    LOW             = "low"
+    MODERATE        = "moderate"
+    HIGH            = "high"
+    LOW_BUT_STICKY  = "low_but_sticky"
+class IntakeChannel(str, Enum):
+    DIGITAL = "digital"
+    PAPER   = "paper"
+    HYBRID  = "hybrid"
+class DelayedEffectType(str, Enum):
+    DOC_REQUEST_RESOLUTION = "doc_request_resolution"
+    OFFICER_REALLOCATION   = "officer_reallocation"
+    ESCALATION_RELIEF      = "escalation_relief"
+# ─────────────────────────────────────────────
+# SECTOR / SERVICE CONFIGURATION
+# ─────────────────────────────────────────────
+class SectorProfile(BaseModel):
+    service_type:                   ServiceType
+    sector_name:                    str
+    missing_docs_probability:       float = Field(ge=0.0, le=1.0)
+    doc_defect_rate_digital:        float = Field(ge=0.0, le=1.0)
+    doc_defect_rate_paper:          float = Field(ge=0.0, le=1.0)
+    field_verification_probability: float = Field(ge=0.0, le=1.0)
+    manual_scrutiny_intensity:      float = Field(ge=0.0, le=1.0)
+    decision_backlog_sensitivity:   float = Field(ge=0.0, le=1.0)
+    system_dependency_risk:         float = Field(ge=0.0, le=1.0)
+    sla_days:                       int   = Field(ge=1)
+    urgency_profile:                UrgencyProfile
+    base_processing_rate:           float = Field(ge=0.1)
+    field_verification_days:        int   = Field(ge=1)
+    # ── Phase 2: enrichment ─────────────────────────────────────────
+    doc_enrichment_type:                DocEnrichmentType  = DocEnrichmentType.NONE
+    doc_enrichment_probability:         float              = Field(default=0.0, ge=0.0, le=1.0)
+    doc_enrichment_delay_days_min:      int                = Field(default=1, ge=1)
+    doc_enrichment_delay_days_max:      int                = Field(default=3, ge=1)
+class OfficerPool(BaseModel):
+    total_officers:       int           = Field(ge=1)
+    available_officers:   int           = Field(ge=0)
+    allocated:            Dict[str, int] = Field(default_factory=dict)
+    pending_reallocation: Dict[str, int] = Field(default_factory=dict)
+    @property
+    def idle_officers(self) -> int:
+        return self.available_officers - sum(self.allocated.values())
+# ─────────────────────────────────────────────
+# CASE MODEL  (Phase 2: enrichment fields added)
+# ─────────────────────────────────────────────
+class ApplicationCase(BaseModel):
+    case_id:              str               = Field(default_factory=lambda: str(uuid.uuid4())[:8])
+    service_type:         ServiceType
+    internal_substate:    InternalSubstate  = InternalSubstate.PRE_SCRUTINY
+    public_stage:         StageType         = StageType.SUBMISSION
+    arrival_day:          int               = Field(ge=0)
+    current_day:          int               = Field(ge=0)
+    sla_deadline_day:     int               = Field(ge=0)
+    days_in_current_stage:int               = Field(default=0, ge=0)
+    waiting_days:         int               = Field(default=0, ge=0)
+    is_urgent:            bool              = False
+    intake_channel:       IntakeChannel     = IntakeChannel.DIGITAL
+    has_missing_docs:     bool              = False
+    doc_request_sent_day: Optional[int]     = None
+    doc_resolution_day:   Optional[int]     = None
+    field_verification_required:          bool           = False
+    field_verification_completion_day:    Optional[int]  = None
+    sla_breached:         bool              = False
+    completed:            bool              = False
+    rejected:             bool              = False
+    # ── Phase 2: enrichment ─────────────────────────────────────────
+    doc_enrichment_type:     DocEnrichmentType  = DocEnrichmentType.NONE
+    doc_enrichment_triggered:bool               = False
+    enrichment_resolution_day:Optional[int]     = None
+    doc_enrichment_reason:   Optional[str]      = None
+    @property
+    def days_until_sla(self) -> int:
+        return max(0, self.sla_deadline_day - self.current_day)
+    @property
+    def sla_risk(self) -> float:
+        total_window = self.sla_deadline_day - self.arrival_day
+        if total_window <= 0:
+            return 1.0
+        elapsed = self.current_day - self.arrival_day
+        return min(1.0, elapsed / total_window)
+class QueueSnapshot(BaseModel):
+    service_type:              ServiceType
+    public_stage_counts:       Dict[str, int] = Field(default_factory=dict)
+    total_pending:             int            = Field(default=0, ge=0)
+    total_completed_today:     int            = Field(default=0, ge=0)
+    total_sla_breached:        int            = Field(default=0, ge=0)
+    urgent_pending:            int            = Field(default=0, ge=0)
+    blocked_missing_docs:      int            = Field(default=0, ge=0)
+    blocked_enrichment:        int            = Field(default=0, ge=0)   # Phase 2
+    field_verification_pending:int            = Field(default=0, ge=0)
+    oldest_case_age_days:      int            = Field(default=0, ge=0)
+    avg_waiting_days:          float          = Field(default=0.0, ge=0.0)
+    current_sla_risk:          float          = Field(default=0.0, ge=0.0, le=1.0)
+# ─────────────────────────────────────────────
+# DELAYED EFFECT MODEL
+# ─────────────────────────────────────────────
+class DelayedEffect(BaseModel):
+    effect_id:       str                    = Field(default_factory=lambda: str(uuid.uuid4())[:8])
+    effect_type:     DelayedEffectType
+    target_service:  Optional[ServiceType]  = None
+    target_case_id:  Optional[str]          = None
+    resolution_day:  int                    = Field(ge=0)
+    magnitude:       float                  = Field(default=1.0)
+    description:     str                    = Field(default="")
+# ─────────────────────────────────────────────
+# OBSERVATION MODEL  (Phase 2: enrichment signals added)
+# ─────────────────────────────────────────────
+class ObservationModel(BaseModel):
+    task_id:         str
+    episode_id:      str
+    day:             int                    = Field(ge=0)
+    max_days:        int                    = Field(ge=1)
+    scenario_mode:   ScenarioMode           = ScenarioMode.NORMAL
+    officer_pool:    OfficerPool
+    queue_snapshots: Dict[str, QueueSnapshot] = Field(default_factory=dict)
+    total_backlog:             int          = Field(default=0, ge=0)
+    total_completed:           int          = Field(default=0, ge=0)
+    total_sla_breaches:        int          = Field(default=0, ge=0)
+    total_rejected:            int          = Field(default=0, ge=0)
+    escalation_budget_remaining:int         = Field(default=0, ge=0)
+    # Compressed signals
+    backlog_pressure:          float        = Field(default=0.0, ge=0.0, le=1.0)
+    sla_risk_score:            float        = Field(default=0.0, ge=0.0, le=1.0)
+    fairness_index:            float        = Field(default=1.0, ge=0.0, le=1.0)
+    resource_utilization:      float        = Field(default=0.0, ge=0.0, le=1.0)
+    digital_intake_ratio:      float        = Field(default=0.5, ge=0.0, le=1.0)
+    blocked_cases_missing_docs:int          = Field(default=0, ge=0)
+    blocked_cases_enrichment:  int          = Field(default=0, ge=0)   # Phase 2
+    field_verification_load:   float        = Field(default=0.0, ge=0.0, le=1.0)
+    active_events:             List[EventType] = Field(default_factory=list)
+    last_action_valid:         bool         = True
+    last_action_message:       str          = ""
+    last_action_explanation:   str          = Field(default="")
+    pending_doc_resolutions:   int          = Field(default=0, ge=0)
+    pending_enrichment_lookups:int          = Field(default=0, ge=0)  # Phase 2
+    pending_officer_reallocations:int       = Field(default=0, ge=0)
+# ─────────────────────────────────────────────
+# ACTION / REWARD / STATE MODELS (unchanged)
+# ─────────────────────────────────────────────
+class ActionModel(BaseModel):
+    action_type:          ActionType
+    service_target:       Optional[ServiceType]  = None
+    priority_mode:        Optional[PriorityMode] = None
+    reallocation_delta:   Optional[Dict[str, int]] = None
+    escalation_target:    Optional[ServiceType]  = None
+    capacity_assignment:  Optional[Dict[str, int]] = None
+    notes:                Optional[str]           = None
+class RewardModel(BaseModel):
+    total_reward:              float = 0.0
+    progress_reward:           float = 0.0
+    completion_reward:         float = 0.0
+    recovery_reward:           float = 0.0
+    stability_bonus:           float = 0.0
+    waiting_penalty:           float = 0.0
+    sla_penalty:               float = 0.0
+    fairness_penalty:          float = 0.0
+    invalid_action_penalty:    float = 0.0
+    idle_capacity_penalty:     float = 0.0
+    oscillation_penalty:       float = 0.0
+class EpisodeStateModel(BaseModel):
+    """Internal episode state exposed via GET /state and POST /state endpoints."""
+    episode_id: str
+    task_id: str
+    seed: int
+    scenario_mode: ScenarioMode
+    day: int = Field(ge=0)
+    max_days: int = Field(ge=1)
+    terminated: bool = False
+    truncated: bool = False
+    total_steps: int = Field(default=0, ge=0)
+    total_completed: int = Field(default=0, ge=0)
+    total_backlog: int = Field(default=0, ge=0)
+    total_sla_breaches: int = Field(default=0, ge=0)
+    total_rejected: int = Field(default=0, ge=0)
+    action_history_count: int = Field(default=0, ge=0)
+    cumulative_reward: float = 0.0
+    cumulative_reward_breakdown: RewardModel = Field(default_factory=RewardModel)
+    officer_pool: Optional[OfficerPool] = None
+    pending_effects_count: int = Field(default=0, ge=0)
+    active_events_today: List[EventType] = Field(default_factory=list)
+    # ── Grader-facing fields ──────────────────────────────────────
+    # These are populated by env.state() so graders never need to
+    # reach into private EpisodeMetrics.
+    fairness_gap: float = Field(
+        default=0.0, ge=0.0, le=1.0,
+        description="Cross-service completion fairness gap at episode end"
+    )
+    total_arrived: int = Field(
+        default=0, ge=0,
+        description="Total cases that arrived across all services"
+    )
+    total_docs_requested: int = Field(
+        default=0, ge=0,
+        description="Total missing-doc requests sent"
+    )
+    total_docs_cleared: int = Field(
+        default=0, ge=0,
+        description="Total missing-doc cases subsequently resolved"
+    )
+    total_idle_officer_days: int = Field(
+        default=0, ge=0,
+        description="Cumulative officer-days wasted idle"
+    )
+    total_capacity_days: int = Field(
+        default=0, ge=0,
+        description="Cumulative total officer-days available"
+    )
+    total_urgent_arrived: int = Field(
+        default=0, ge=0,
+        description="Total urgent cases that arrived"
+    )
+    total_urgent_completed: int = Field(
+        default=0, ge=0,
+        description="Total urgent cases completed"
+    )
+    total_escalations_used: int = Field(
+        default=0, ge=0,
+        description="Total escalation actions consumed"
+    )
+    total_wasted_escalations: int = Field(
+        default=0, ge=0,
+        description="Escalations used on already-urgent or ineligible cases"
+    )
+    total_invalid_actions: int = Field(
+        default=0, ge=0,
+        description="Total invalid actions submitted by agent"
+    )
+    avg_waiting_days: float = Field(
+        default=0.0, ge=0.0,
+        description="Mean waiting days across all completed cases"
+    )
+    # ── Full action log (optional, stripped by default) ──────────
+    action_history: Optional[List[dict]] = Field(
+        default=None,
+        description="Step-by-step action log. Stripped in normal API responses."
+    )
+class StepInfoModel(BaseModel):
+    reward_breakdown:              RewardModel  = Field(default_factory=RewardModel)
+    newly_arrived_cases:           int          = Field(default=0, ge=0)
+    newly_completed_cases:         int          = Field(default=0, ge=0)
+    newly_sla_breached_cases:      int          = Field(default=0, ge=0)
+    newly_resolved_doc_cases:      int          = Field(default=0, ge=0)
+    invalid_action:                bool         = False
+    action_explanation:            str          = ""
+    active_events:                 List[EventType] = Field(default_factory=list)
+    grader_preview_score:          float        = Field(default=0.0, ge=0.0, le=1.0)
+    effects_resolved_this_step:    List[str]    = Field(default_factory=list)
+class TaskConfig(BaseModel):
+    task_id:                str
+    display_name:           str
+    difficulty:             str
+    scenario_mode:          ScenarioMode
+    seed:                   int
+    max_days:               int                    = Field(ge=1)
+    enabled_services:       List[ServiceType]
+    arrival_rate_per_day:   Dict[str, float]
+    digital_intake_ratio:   float                  = Field(default=0.6, ge=0.0, le=1.0)
+    initial_officer_pool:   OfficerPool
+    missing_docs_probability_override:       Optional[Dict[str, float]] = None
+    field_verification_probability_override: Optional[Dict[str, float]] = None
+    escalation_budget:      int                    = Field(ge=0)
+    fairness_threshold:     Optional[float]        = Field(default=None, ge=0.0, le=1.0)
+    event_probability:      float                  = Field(default=0.1, ge=0.0, le=1.0)
+    allowed_events:         List[EventType]        = Field(default_factory=list)
+class GraderResult(BaseModel):
+    """
+    Final deterministic score for a completed or in-progress episode.
+    Range: [0.0, 1.0].
+    Design decision: exposes .score and .grader_name as convenience aliases,
+    plus a .metrics dict for easy serialization to JSON by main.py endpoints.
+    The named fields (completion_rate, sla_compliance_rate, etc.) remain
+    for typed access in tests and baselines.
+    """
+    task_id: str = ""
+    episode_id: str = ""
+    grader_name: str = ""                          # "easy" | "medium" | "hard"
+    # Primary scalar — use result.score everywhere
+    score: float = Field(default=0.0, ge=0.0, le=1.0)
+    # Named metric components
+    completion_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+    sla_compliance_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+    idle_efficiency: float = Field(default=1.0, ge=0.0, le=1.0)
+    document_rework_quality: float = Field(default=1.0, ge=0.0, le=1.0)
+    urgent_served_rate: float = Field(default=1.0, ge=0.0, le=1.0)
+    fairness_score: float = Field(default=1.0, ge=0.0, le=1.0)
+    escalation_discipline: float = Field(default=1.0, ge=0.0, le=1.0)
+    fairness_gap: float = Field(default=0.0, ge=0.0, le=1.0)
+    # Episode counters — populated from EpisodeStateModel
+    total_cases_arrived: int = 0
+    total_completed: int = 0
+    total_sla_breached: int = 0
+    total_rejected: int = 0
+    avg_waiting_days: float = 0.0
+    @property
+    def metrics(self) -> dict:
+        """
+        Convenience dict for JSON serialization in API endpoints.
+        main.py uses result.metrics directly in GradeResponse.
+        """
+        return {
+            "completion_rate":         round(self.completion_rate, 4),
+            "sla_compliance_rate":     round(self.sla_compliance_rate, 4),
+            "idle_efficiency":         round(self.idle_efficiency, 4),
+            "document_rework_quality": round(self.document_rework_quality, 4),
+            "urgent_served_rate":      round(self.urgent_served_rate, 4),
+            "fairness_score":          round(self.fairness_score, 4),
+            "escalation_discipline":   round(self.escalation_discipline, 4),
+            "fairness_gap":            round(self.fairness_gap, 4),
+            "total_cases_arrived":     self.total_cases_arrived,
+            "total_completed":         self.total_completed,
+            "total_sla_breached":      self.total_sla_breached,
+            "total_rejected":          self.total_rejected,
+            "avg_waiting_days":        round(self.avg_waiting_days, 2),
+        }
+class ResetRequest(BaseModel):
+    task_id:        str
+    seed:           Optional[int]           = None
+    scenario_mode:  Optional[ScenarioMode]  = None
+class ResetResponse(BaseModel):
+    observation:    ObservationModel
+    info:           dict
+    episode_id:     str
+class StepRequest(BaseModel):
+    episode_id:     str
+    action:         ActionModel
+class StepResponse(BaseModel):
+    observation:    ObservationModel
+    reward:         float
+    terminated:     bool
+    truncated:      bool
+    info:           StepInfoModel
+class StateResponse(BaseModel):
+    state:          EpisodeStateModel
+class HealthResponse(BaseModel):
+    status:         str = "ok"
+    version:        str = "2.0.0"
+    active_episodes:int = 0

app/persistence.py ADDED Viewed

	@@ -0,0 +1,335 @@

+from __future__ import annotations
+import json
+import os
+import sqlite3
+import time
+from pathlib import Path
+from threading import Lock
+from typing import Any
+from uuid import uuid4
+def _now() -> float:
+    return time.time()
+def _as_json(payload: dict[str, Any]) -> str:
+    return json.dumps(payload, separators=(",", ":"), ensure_ascii=True)
+def _from_json(payload: str) -> dict[str, Any]:
+    data = json.loads(payload)
+    return data if isinstance(data, dict) else {}
+def _resolve_data_dir(repo_root: Path) -> Path:
+    configured = os.getenv("OPENENV_DATA_DIR") or os.getenv("STORAGE_DATA_DIR")
+    if configured:
+        return Path(configured).expanduser().resolve()
+    if Path("/data").exists():
+        return Path("/data/openenv_rl").resolve()
+    return (repo_root / "outputs" / "persist").resolve()
+def _default_fallback_data_dirs(repo_root: Path) -> list[Path]:
+    return [
+        (repo_root / "outputs" / "persist").resolve(),
+        Path("/tmp/openenv_rl").resolve(),
+    ]
+def _storage_enabled() -> bool:
+    raw = str(os.getenv("STORAGE_ENABLED", "true")).strip().lower()
+    return raw not in {"0", "false", "no", "off"}
+class PersistenceStore:
+    def __init__(self, repo_root: Path) -> None:
+        self.repo_root = repo_root.resolve()
+        self.enabled = _storage_enabled()
+        self.data_dir = _resolve_data_dir(self.repo_root)
+        self.db_path = self.data_dir / "openenv_state.sqlite3"
+        self.training_runs_dir = self.data_dir / "training_runs"
+        self._lock = Lock()
+        if not self.enabled:
+            return
+        self._initialize_storage_dirs()
+    def _initialize_storage_dirs(self) -> None:
+        candidates: list[Path] = [self.data_dir]
+        for fallback in _default_fallback_data_dirs(self.repo_root):
+            if fallback not in candidates:
+                candidates.append(fallback)
+        last_error: Exception | None = None
+        for candidate in candidates:
+            try:
+                candidate.mkdir(parents=True, exist_ok=True)
+                self.data_dir = candidate
+                self.db_path = self.data_dir / "openenv_state.sqlite3"
+                self.training_runs_dir = self.data_dir / "training_runs"
+                self.training_runs_dir.mkdir(parents=True, exist_ok=True)
+                self._init_schema()
+                return
+            except (OSError, sqlite3.Error) as exc:
+                last_error = exc
+        self.enabled = False
+        # Keep service startup alive in restricted runtimes (e.g. HF Spaces without writable /data).
+        print(
+            f"[persistence] disabled: no writable storage directory. "
+            f"requested={candidates[0]} last_error={last_error!r}"
+        )
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(self.db_path, timeout=30)
+        conn.row_factory = sqlite3.Row
+        return conn
+    def _init_schema(self) -> None:
+        with self._connect() as conn:
+            conn.executescript(
+                """
+                CREATE TABLE IF NOT EXISTS training_jobs (
+                    job_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    payload_json TEXT NOT NULL
+                );
+                CREATE TABLE IF NOT EXISTS simulation_runs (
+                    run_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    task_id TEXT,
+                    agent_mode TEXT,
+                    status TEXT,
+                    payload_json TEXT NOT NULL
+                );
+                CREATE TABLE IF NOT EXISTS comparison_runs (
+                    comparison_id TEXT PRIMARY KEY,
+                    created_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    task_id TEXT,
+                    payload_json TEXT NOT NULL
+                );
+                """
+            )
+            conn.commit()
+    # Training jobs ---------------------------------------------------------
+    def upsert_training_job(self, snapshot: dict[str, Any]) -> None:
+        if not self.enabled:
+            return
+        job_id = str(snapshot.get("job_id") or "")
+        if not job_id:
+            return
+        created_at = float(snapshot.get("created_at") or _now())
+        updated_at = float(snapshot.get("updated_at") or _now())
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO training_jobs (job_id, created_at, updated_at, payload_json)
+                VALUES (?, ?, ?, ?)
+                ON CONFLICT(job_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    payload_json = excluded.payload_json
+                """,
+                (job_id, created_at, updated_at, _as_json(snapshot)),
+            )
+            conn.commit()
+    def list_training_jobs(self, limit: int = 500) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        rows: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM training_jobs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                rows.append(_from_json(str(row["payload_json"])))
+        return rows
+    def clear_training_jobs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM training_jobs")
+            conn.commit()
+            return int(cur.rowcount or 0)
+    def delete_training_job(self, job_id: str) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM training_jobs WHERE job_id = ?", (str(job_id),))
+            conn.commit()
+            return int(cur.rowcount or 0)
+    # Simulation runs -------------------------------------------------------
+    def upsert_simulation_run(
+        self,
+        *,
+        run_id: str,
+        task_id: str,
+        agent_mode: str,
+        status: str,
+        payload: dict[str, Any],
+    ) -> None:
+        if not self.enabled:
+            return
+        now = _now()
+        created_at = float(payload.get("created_at") or now)
+        payload = dict(payload)
+        payload["run_id"] = run_id
+        payload["created_at"] = created_at
+        payload["updated_at"] = now
+        payload["task_id"] = task_id
+        payload["agent_mode"] = agent_mode
+        payload["status"] = status
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO simulation_runs (run_id, created_at, updated_at, task_id, agent_mode, status, payload_json)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                ON CONFLICT(run_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    task_id = excluded.task_id,
+                    agent_mode = excluded.agent_mode,
+                    status = excluded.status,
+                    payload_json = excluded.payload_json
+                """,
+                (
+                    run_id,
+                    created_at,
+                    now,
+                    task_id,
+                    agent_mode,
+                    status,
+                    _as_json(payload),
+                ),
+            )
+            conn.commit()
+    def list_simulation_runs(self, limit: int = 50) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        out: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM simulation_runs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                data = _from_json(str(row["payload_json"]))
+                if isinstance(data.get("trace"), list):
+                    data["trace_len"] = len(data["trace"])
+                    data["has_trace"] = bool(data["trace"])
+                    data.pop("trace", None)
+                out.append(data)
+        return out
+    def get_simulation_run(self, run_id: str) -> dict[str, Any] | None:
+        if not self.enabled:
+            return None
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                "SELECT payload_json FROM simulation_runs WHERE run_id = ?",
+                (run_id,),
+            )
+            row = cur.fetchone()
+        if row is None:
+            return None
+        return _from_json(str(row["payload_json"]))
+    def clear_simulation_runs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM simulation_runs")
+            conn.commit()
+            return int(cur.rowcount or 0)
+    # Comparison runs -------------------------------------------------------
+    def create_comparison_run(self, payload: dict[str, Any]) -> str | None:
+        if not self.enabled:
+            return None
+        comparison_id = str(payload.get("comparison_id") or uuid4())
+        now = _now()
+        body = dict(payload)
+        body["comparison_id"] = comparison_id
+        body["created_at"] = float(body.get("created_at") or now)
+        body["updated_at"] = now
+        task_id = str(body.get("task_id") or "")
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO comparison_runs (comparison_id, created_at, updated_at, task_id, payload_json)
+                VALUES (?, ?, ?, ?, ?)
+                ON CONFLICT(comparison_id) DO UPDATE SET
+                    updated_at = excluded.updated_at,
+                    task_id = excluded.task_id,
+                    payload_json = excluded.payload_json
+                """,
+                (
+                    comparison_id,
+                    float(body["created_at"]),
+                    now,
+                    task_id,
+                    _as_json(body),
+                ),
+            )
+            conn.commit()
+        return comparison_id
+    def list_comparison_runs(self, limit: int = 50) -> list[dict[str, Any]]:
+        if not self.enabled:
+            return []
+        out: list[dict[str, Any]] = []
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT payload_json FROM comparison_runs
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (max(1, int(limit)),),
+            )
+            for row in cur.fetchall():
+                out.append(_from_json(str(row["payload_json"])))
+        return out
+    def get_comparison_run(self, comparison_id: str) -> dict[str, Any] | None:
+        if not self.enabled:
+            return None
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                "SELECT payload_json FROM comparison_runs WHERE comparison_id = ?",
+                (comparison_id,),
+            )
+            row = cur.fetchone()
+        if row is None:
+            return None
+        return _from_json(str(row["payload_json"]))
+    def clear_comparison_runs(self) -> int:
+        if not self.enabled:
+            return 0
+        with self._lock, self._connect() as conn:
+            cur = conn.execute("DELETE FROM comparison_runs")
+            conn.commit()
+            return int(cur.rowcount or 0)

app/reward.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+reward.py — Gov Workflow OpenEnv Phase 4: Dense Reward Shaping
+Formula (per step):
+  R_t = progress_reward + completion_reward + recovery_reward + stability_bonus
+        - waiting_penalty - sla_penalty - fairness_penalty
+        - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
+All coefficients are named constants — never magic numbers inline.
+"""
+from __future__ import annotations
+from app.models import RewardModel
+# ── Positive coefficients ─────────────────────────────────────────
+COEFF_PROGRESS     = 0.7   # per stage advance
+COEFF_COMPLETION   = 4.0   # per completed case
+COEFF_RECOVERY     = 1.5   # per unblocked missing-doc case resolved
+COEFF_STABILITY    = 0.1   # per step with zero SLA breaches and zero invalid actions
+# ── Negative coefficients ─────────────────────────────────────────
+COEFF_WAITING      = 0.04  # per case per day in backlog
+COEFF_SLA          = 1.5   # per new SLA breach
+COEFF_FAIRNESS     = 2.0   # per unit of fairness excess above threshold
+COEFF_INVALID      = 1.5   # flat penalty per invalid action
+COEFF_IDLE         = 0.05  # per idle officer-day
+COEFF_OSCILLATION  = 0.15  # per oscillation event (repeated contradictory actions)
+# ── Fairness default tolerance (when no threshold set by task) ────
+DEFAULT_FAIRNESS_TOLERANCE = 0.40
+def compute_reward(
+    *,
+    stage_advances: int,
+    completions: int,
+    active_backlog: int,
+    new_sla_breaches: int,
+    fairness_gap: float,
+    fairness_threshold: float | None,
+    invalid_action: bool,
+    idle_capacity: int,
+    newly_unblocked_docs: int = 0,
+    oscillation_detected: bool = False,
+    award_stability_bonus: bool = True,
+) -> RewardModel:
+    """
+    Compute one-step dense reward.
+    Args:
+        stage_advances:       Number of applications that moved forward one stage today.
+        completions:          Number of applications fully completed today.
+        active_backlog:       Total cases still pending (creates waiting pressure).
+        new_sla_breaches:     New SLA deadline violations this step.
+        fairness_gap:         Cross-service completion fairness gap [0.0, 1.0].
+        fairness_threshold:   Task-defined acceptable fairness gap (or None → default).
+        invalid_action:       Whether the submitted action was invalid.
+        idle_capacity:        Officer-days wasted idle while backlog exists.
+        newly_unblocked_docs: Cases unblocked after missing-doc resolution (positive signal).
+        oscillation_detected: True if agent is rapidly reversing recent decisions.
+    Returns:
+        RewardModel with all components filled and total_reward as the scalar.
+    """
+    # ── Positive components ───────────────────────────────────────
+    progress_reward   = COEFF_PROGRESS   * stage_advances
+    completion_reward = COEFF_COMPLETION * completions
+    recovery_reward   = COEFF_RECOVERY   * newly_unblocked_docs
+    stability_bonus = (
+        COEFF_STABILITY
+        if (award_stability_bonus and new_sla_breaches == 0 and not invalid_action)
+        else 0.0
+    )
+    # ── Negative components ───────────────────────────────────────
+    waiting_penalty = COEFF_WAITING * active_backlog
+    sla_penalty = COEFF_SLA * new_sla_breaches
+    tolerance = fairness_threshold if fairness_threshold is not None else DEFAULT_FAIRNESS_TOLERANCE
+    unfairness_excess = max(0.0, fairness_gap - tolerance)
+    fairness_penalty = COEFF_FAIRNESS * unfairness_excess
+    invalid_action_penalty = COEFF_INVALID if invalid_action else 0.0
+    idle_capacity_penalty = COEFF_IDLE * idle_capacity
+    oscillation_penalty = COEFF_OSCILLATION if oscillation_detected else 0.0
+    # ── Total ─────────────────────────────────────────────────────
+    total_reward = (
+        progress_reward + completion_reward + recovery_reward + stability_bonus
+        - waiting_penalty - sla_penalty - fairness_penalty
+        - invalid_action_penalty - idle_capacity_penalty - oscillation_penalty
+    )
+    return RewardModel(
+        total_reward=round(total_reward, 4),
+        progress_reward=round(progress_reward, 4),
+        completion_reward=round(completion_reward, 4),
+        recovery_reward=round(recovery_reward, 4),
+        stability_bonus=round(stability_bonus, 4),
+        waiting_penalty=round(-waiting_penalty, 4),
+        sla_penalty=round(-sla_penalty, 4),
+        fairness_penalty=round(-fairness_penalty, 4),
+        invalid_action_penalty=round(-invalid_action_penalty, 4),
+        idle_capacity_penalty=round(-idle_capacity_penalty, 4),
+        oscillation_penalty=round(-oscillation_penalty, 4),
+    )

app/sector_profiles.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+sector_profiles.py — Phase 2 update: enrichment type, probability, delay range per service.
+"""
+from app.models import (
+    DocEnrichmentType, SectorProfile, ServiceType, UrgencyProfile
+)
+INCOME_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.INCOME_CERTIFICATE,
+    sector_name="Revenue Sector — Income Certificate",
+    missing_docs_probability=0.45,
+    doc_defect_rate_digital=0.30,
+    doc_defect_rate_paper=0.65,
+    field_verification_probability=0.30,
+    manual_scrutiny_intensity=0.60,
+    decision_backlog_sensitivity=0.70,
+    system_dependency_risk=0.20,
+    sla_days=21,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=8.0,
+    field_verification_days=3,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=2,
+)
+LAND_REGISTRATION_PROFILE = SectorProfile(
+    service_type=ServiceType.LAND_REGISTRATION,
+    sector_name="Land Sector — 7/12 Mutation",
+    missing_docs_probability=0.35,
+    doc_defect_rate_digital=0.25,
+    doc_defect_rate_paper=0.55,
+    field_verification_probability=0.65,
+    manual_scrutiny_intensity=0.75,
+    decision_backlog_sensitivity=0.85,
+    system_dependency_risk=0.55,
+    sla_days=30,
+    urgency_profile=UrgencyProfile.LOW_BUT_STICKY,
+    base_processing_rate=4.0,
+    field_verification_days=5,
+    doc_enrichment_type=DocEnrichmentType.PAST_LAND_RECORDS,
+    doc_enrichment_probability=0.70,
+    doc_enrichment_delay_days_min=2,
+    doc_enrichment_delay_days_max=5,   # REVENUE_DB_DELAY event adds 1-2 more
+)
+CASTE_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.CASTE_CERTIFICATE,
+    sector_name="Revenue Sector — Caste Certificate",
+    missing_docs_probability=0.40,
+    doc_defect_rate_digital=0.25,
+    doc_defect_rate_paper=0.60,
+    field_verification_probability=0.35,
+    manual_scrutiny_intensity=0.65,
+    decision_backlog_sensitivity=0.65,
+    system_dependency_risk=0.25,
+    sla_days=21,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=7.0,
+    field_verification_days=3,
+    doc_enrichment_type=DocEnrichmentType.FAMILY_CASTE_HISTORY,
+    doc_enrichment_probability=0.55,
+    doc_enrichment_delay_days_min=2,
+    doc_enrichment_delay_days_max=4,
+)
+BIRTH_CERTIFICATE_PROFILE = SectorProfile(
+    service_type=ServiceType.BIRTH_CERTIFICATE,
+    sector_name="Municipal Sector — Birth Certificate",
+    missing_docs_probability=0.20,
+    doc_defect_rate_digital=0.15,
+    doc_defect_rate_paper=0.35,
+    field_verification_probability=0.05,
+    manual_scrutiny_intensity=0.30,
+    decision_backlog_sensitivity=0.40,
+    system_dependency_risk=0.30,
+    sla_days=7,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=15.0,
+    field_verification_days=1,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=1,
+)
+PASSPORT_PROFILE = SectorProfile(
+    service_type=ServiceType.PASSPORT,
+    sector_name="National Sector — Passport",
+    missing_docs_probability=0.25,
+    doc_defect_rate_digital=0.20,
+    doc_defect_rate_paper=0.50,
+    field_verification_probability=0.90,
+    manual_scrutiny_intensity=0.80,
+    decision_backlog_sensitivity=0.75,
+    system_dependency_risk=0.35,
+    sla_days=30,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=5.0,
+    field_verification_days=14,
+    doc_enrichment_type=DocEnrichmentType.POLICE_VERIFICATION,
+    doc_enrichment_probability=0.85,
+    doc_enrichment_delay_days_min=7,
+    doc_enrichment_delay_days_max=14,
+)
+GST_REGISTRATION_PROFILE = SectorProfile(
+    service_type=ServiceType.GST_REGISTRATION,
+    sector_name="Tax Sector — GST Registration",
+    missing_docs_probability=0.30,
+    doc_defect_rate_digital=0.20,
+    doc_defect_rate_paper=0.50,
+    field_verification_probability=0.20,
+    manual_scrutiny_intensity=0.55,
+    decision_backlog_sensitivity=0.60,
+    system_dependency_risk=0.45,
+    sla_days=7,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=10.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.TAX_RECORD_CROSS_CHECK,
+    doc_enrichment_probability=0.50,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=3,
+)
+DRIVING_LICENSE_PROFILE = SectorProfile(
+    service_type=ServiceType.DRIVING_LICENSE,
+    sector_name="Transport Sector — Driving License",
+    missing_docs_probability=0.28,
+    doc_defect_rate_digital=0.18,
+    doc_defect_rate_paper=0.45,
+    field_verification_probability=0.40,
+    manual_scrutiny_intensity=0.50,
+    decision_backlog_sensitivity=0.55,
+    system_dependency_risk=0.30,
+    sla_days=14,
+    urgency_profile=UrgencyProfile.MODERATE,
+    base_processing_rate=12.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=1,
+)
+AADHAAR_CARD_PROFILE = SectorProfile(
+    service_type=ServiceType.AADHAAR_CARD,
+    sector_name="National Identity Sector - Aadhaar Card",
+    missing_docs_probability=0.22,
+    doc_defect_rate_digital=0.12,
+    doc_defect_rate_paper=0.30,
+    field_verification_probability=0.18,
+    manual_scrutiny_intensity=0.42,
+    decision_backlog_sensitivity=0.50,
+    system_dependency_risk=0.38,
+    sla_days=10,
+    urgency_profile=UrgencyProfile.HIGH,
+    base_processing_rate=13.0,
+    field_verification_days=2,
+    doc_enrichment_type=DocEnrichmentType.NONE,
+    doc_enrichment_probability=0.0,
+    doc_enrichment_delay_days_min=1,
+    doc_enrichment_delay_days_max=2,
+)
+SECTOR_REGISTRY: dict = {
+    ServiceType.INCOME_CERTIFICATE: INCOME_CERTIFICATE_PROFILE,
+    ServiceType.LAND_REGISTRATION:  LAND_REGISTRATION_PROFILE,
+    ServiceType.CASTE_CERTIFICATE:  CASTE_CERTIFICATE_PROFILE,
+    ServiceType.BIRTH_CERTIFICATE:  BIRTH_CERTIFICATE_PROFILE,
+    ServiceType.PASSPORT:           PASSPORT_PROFILE,
+    ServiceType.GST_REGISTRATION:   GST_REGISTRATION_PROFILE,
+    ServiceType.DRIVING_LICENSE:    DRIVING_LICENSE_PROFILE,
+    ServiceType.AADHAAR_CARD:       AADHAAR_CARD_PROFILE,
+}
+def get_sector_profile(service_type: ServiceType) -> SectorProfile:
+    if service_type not in SECTOR_REGISTRY:
+        raise KeyError(f"No SectorProfile for {service_type}")
+    return SECTOR_REGISTRY[service_type]

app/signal_computer.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+signal_computer.py — Gov Workflow OpenEnv v2.0
+Computes normalized compressed state signals for observations.
+All signals are deterministic and normalized to [0.0, 1.0].
+"""
+from typing import Dict
+from app.models import QueueSnapshot, OfficerPool
+class ComputedSignals:
+    def __init__(self):
+        self.backlog_pressure: float = 0.0
+        self.sla_risk_score: float = 0.0
+        self.fairness_index: float = 1.0
+        self.resource_utilization: float = 0.0
+        self.digital_intake_ratio: float = 0.5
+        self.blocked_cases_missing_docs: int = 0
+        self.blocked_cases_enrichment: int = 0
+        self.field_verification_load: float = 0.0
+class SignalComputer:
+    def compute(
+        self,
+        queue_snapshots: Dict[str, QueueSnapshot],
+        officer_pool: OfficerPool,
+        todays_arrivals: int = 0,
+        digital_arrivals: int = 0,
+        capacity_per_day: float = 1.0,
+    ) -> ComputedSignals:
+        signals = ComputedSignals()
+        snapshots = list(queue_snapshots.values())
+        if not snapshots:
+            return signals
+        total_pending = sum(s.total_pending for s in snapshots)
+        # Backlog pressure
+        capacity_ceiling = max(1.0, capacity_per_day * 5.0)
+        signals.backlog_pressure = min(1.0, total_pending / capacity_ceiling)
+        # SLA risk score (weighted average)
+        total_nonzero = max(1, total_pending)
+        signals.sla_risk_score = min(1.0, max(0.0,
+            sum(s.current_sla_risk * s.total_pending for s in snapshots) / total_nonzero
+        ))
+        # Fairness index (1 - coefficient of variation of completion rates)
+        if len(snapshots) < 2:
+            signals.fairness_index = 1.0
+        else:
+            rates = []
+            for s in snapshots:
+                total = s.total_pending + s.total_completed_today
+                rates.append(s.total_completed_today / max(1, total) if total > 0 else 0.0)
+            mean = sum(rates) / len(rates)
+            if mean > 0:
+                variance = sum((r - mean) ** 2 for r in rates) / len(rates)
+                cv = (variance ** 0.5) / mean
+                signals.fairness_index = max(0.0, 1.0 - min(1.0, cv))
+            else:
+                signals.fairness_index = 1.0
+        # Resource utilization
+        allocated = sum(officer_pool.allocated.values())
+        signals.resource_utilization = min(1.0, allocated / max(1, officer_pool.available_officers))
+        # Digital intake ratio
+        signals.digital_intake_ratio = (
+            min(1.0, digital_arrivals / todays_arrivals) if todays_arrivals > 0 else 0.5
+        )
+        # Blocked cases
+        signals.blocked_cases_missing_docs = sum(s.blocked_missing_docs for s in snapshots)
+        signals.blocked_cases_enrichment   = sum(s.blocked_enrichment for s in snapshots)
+        # Field verification load
+        total_in_field = sum(s.field_verification_pending for s in snapshots)
+        signals.field_verification_load = total_in_field / total_nonzero if total_nonzero > 0 else 0.0
+        return signals

app/simulator.py ADDED Viewed

	@@ -0,0 +1,1106 @@

+from __future__ import annotations
+import json
+import os
+import random
+import re
+from dataclasses import dataclass
+from typing import Any, Literal
+from openai import OpenAI
+from app.baselines import POLICIES, backlog_clearance_policy
+from app.env import GovWorkflowEnv
+from app.graders import grade_episode
+from app.models import ActionModel, ActionType, ObservationModel, PriorityMode, ServiceType
+from app.engine import DayResult, DaySimulator
+from enum import Enum
+SimulationAgentMode = Literal["baseline_policy", "llm_inference", "trained_rl"]
+class SimulationAgentModeEnum(str, Enum):
+    baseline_policy = "baseline_policy"
+    llm_inference = "llm_inference"
+    trained_rl = "trained_rl"
+SimulationAgentMode = SimulationAgentModeEnum
+LEGACY_NVIDIA_MODEL_POOL = [
+    "meta/llama-3.3-70b-instruct",
+    "qwen/qwen3-next-80b-a3b-instruct",
+    "moonshotai/kimi-k2-instruct-0905",
+    "meta/llama-3.1-405b-instruct",
+    "deepseek-ai/deepseek-v3.2",
+    "qwen/qwq-32b",
+    "mistralai/mixtral-8x22b-instruct-v0.1",
+    "google/gemma-3-27b-it",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+@dataclass
+class SimulationRun:
+    task_id: str
+    agent_mode: SimulationAgentMode
+    seed: int
+    total_reward: float
+    score: float
+    grader_name: str
+    summary: dict[str, Any]
+    trace: list[dict[str, Any]]
+def _dedupe(values: list[str | None]) -> list[str]:
+    out: list[str] = []
+    for value in values:
+        if value is None:
+            continue
+        v = value.strip()
+        if v and v not in out:
+            out.append(v)
+    return out
+def _env_csv_list(name: str) -> list[str]:
+    raw = os.getenv(name, "").strip()
+    if not raw:
+        return []
+    return [x.strip() for x in raw.split(",") if x.strip()]
+def _extract_json_object(text: str) -> dict[str, Any] | None:
+    text = (text or "").strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+def _coerce_action(payload: dict[str, Any] | None) -> ActionModel:
+    if not payload:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+    try:
+        # Remap legacy Phase 1 field names to Phase 2
+        remapped = dict(payload)
+        if "service" in remapped and "service_target" not in remapped:
+            remapped["service_target"] = remapped.pop("service")
+        if "target_service" in remapped:
+            src = remapped.pop("service_target", None)
+            tgt = remapped.pop("target_service", None)
+            delta = remapped.pop("officer_delta", 1)
+            remapped["reallocation_delta"] = {
+                (src.value if hasattr(src, 'value') else str(src)): -int(delta),
+                (tgt.value if hasattr(tgt, 'value') else str(tgt)): int(delta),
+            } if src and tgt else None
+        if "officer_delta" in remapped and "capacity_assignment" not in remapped:
+            svc = remapped.get("service_target")
+            if svc:
+                svc_key = svc.value if hasattr(svc, 'value') else str(svc)
+                remapped["capacity_assignment"] = {svc_key: int(remapped.pop("officer_delta"))}
+            else:
+                remapped.pop("officer_delta", None)
+        if "case_id" in remapped:
+            remapped.pop("case_id", None)
+        return ActionModel(**remapped)
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+def _queue_rows(obs: ObservationModel) -> list[dict[str, Any]]:
+    return [
+        {
+            "service": q.service_type.value,
+            "active_cases": q.total_pending,
+            "missing_docs_cases": q.blocked_missing_docs,
+            "urgent_cases": q.urgent_pending,
+            "breached_cases": q.total_sla_breached,
+            "avg_age_days": q.avg_waiting_days,
+        }
+        for q in obs.queue_snapshots.values()
+    ]
+def _recommended_min_steps(task_id: str) -> int:
+    if task_id == "cross_department_hard":
+        return 70
+    if task_id == "mixed_urgency_medium":
+        return 60
+    return 40
+def _alloc_for(obs: ObservationModel, service: ServiceType) -> int:
+    pool = obs.officer_pool
+    # Phase 2 uses 'allocated'; Phase 1 used 'allocations'
+    alloc_dict = getattr(pool, "allocated", None) or getattr(pool, "allocations", {})
+    raw = alloc_dict.get(service)
+    if raw is None:
+        raw = alloc_dict.get(service.value if hasattr(service, 'value') else str(service), 0)
+    return int(raw or 0)
+def _top_backlog_service(
+    obs: ObservationModel,
+    *,
+    exclude: ServiceType | None = None,
+) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    ranked = [q for q in snapshots if getattr(q, 'service_type', getattr(q, 'service', None)) != exclude]
+    if not ranked:
+        return None
+    ranked.sort(
+        key=lambda q: (
+            getattr(q, 'total_pending', getattr(q, 'active_cases', 0))
+            + 2 * getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0))
+            + getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
+            getattr(q, 'avg_waiting_days', getattr(q, 'avg_age_days', 0)),
+        ),
+        reverse=True,
+    )
+    return getattr(ranked[0], 'service_type', getattr(ranked[0], 'service', None))
+def _service_with_missing_docs(obs: ObservationModel) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    candidates = [
+        q for q in snapshots
+        if getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
+    ]
+    if not candidates:
+        return None
+    candidates.sort(
+        key=lambda q: (
+            getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)),
+            getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
+        ),
+        reverse=True,
+    )
+    return getattr(candidates[0], 'service_type', getattr(candidates[0], 'service', None))
+def _service_with_officers(obs: ObservationModel) -> ServiceType | None:
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    services = [
+        getattr(q, 'service_type', getattr(q, 'service', None))
+        for q in snapshots
+    ]
+    services.sort(key=lambda s: _alloc_for(obs, s), reverse=True)
+    for service in services:
+        if service and _alloc_for(obs, service) > 0:
+            return service
+    return None
+def _compute_action_mask(obs: ObservationModel) -> dict[ActionType, bool]:
+    pool = obs.officer_pool
+    has_reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0))) > 0
+    qs = obs.queue_snapshots
+    snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+    has_missing = any(
+        getattr(q, 'blocked_missing_docs', getattr(q, 'missing_docs_cases', 0)) > 0
+        for q in snapshots
+    )
+    has_backlog = any(
+        getattr(q, 'total_pending', getattr(q, 'active_cases', 0)) > 0
+        for q in snapshots
+    )
+    has_budget = int(obs.escalation_budget_remaining) > 0
+    staffed_services = [
+        getattr(q, 'service_type', getattr(q, 'service', None))
+        for q in snapshots
+        if _alloc_for(obs, getattr(q, 'service_type', getattr(q, 'service', None))) > 0
+    ]
+    can_reallocate = len(staffed_services) >= 1 and len(snapshots) >= 2
+    return {
+        ActionType.SET_PRIORITY_MODE: True,
+        ActionType.ADVANCE_TIME: True,
+        ActionType.ASSIGN_CAPACITY: has_reserve and has_backlog,
+        ActionType.REQUEST_MISSING_DOCUMENTS: has_missing,
+        ActionType.ESCALATE_SERVICE: has_budget and has_backlog,
+        ActionType.REALLOCATE_OFFICERS: can_reallocate,
+    }
+def _masked_action_type_hints(obs: ObservationModel) -> tuple[list[str], list[str]]:
+    mask = _compute_action_mask(obs)
+    allowed = [k.value for k, ok in mask.items() if ok]
+    blocked = [k.value for k, ok in mask.items() if not ok]
+    return allowed, blocked
+def _best_high_impact_action(obs: ObservationModel) -> tuple[ActionModel, str]:
+    top_backlog = _top_backlog_service(obs)
+    top_missing = _service_with_missing_docs(obs)
+    if int(obs.officer_pool.idle_officers) > 0 and top_backlog is not None:
+        return (
+            ActionModel(action_type=ActionType.ASSIGN_CAPACITY, service=top_backlog, officer_delta=1),
+            "high-impact: assign reserve capacity to top backlog service",
+        )
+    if top_missing is not None:
+        return (
+            ActionModel(action_type=ActionType.REQUEST_MISSING_DOCUMENTS, service=top_missing),
+            "high-impact: clear missing-document bottleneck",
+        )
+    if int(obs.escalation_budget_remaining) > 0:
+        qs = obs.queue_snapshots
+        snapshots = list(qs.values()) if isinstance(qs, dict) else list(qs)
+        hot = sorted(
+            snapshots,
+            key=lambda q: (
+                getattr(q, 'total_sla_breached', getattr(q, 'breached_cases', 0)),
+                getattr(q, 'total_pending', getattr(q, 'active_cases', 0)),
+                getattr(q, 'urgent_pending', getattr(q, 'urgent_cases', 0)),
+            ),
+            reverse=True,
+        )
+        if hot and (
+            getattr(hot[0], 'total_sla_breached', getattr(hot[0], 'breached_cases', 0)) > 0
+            or getattr(hot[0], 'total_pending', getattr(hot[0], 'active_cases', 0)) > 0
+        ):
+            svc = getattr(hot[0], 'service_type', getattr(hot[0], 'service', None))
+            return (
+                ActionModel(action_type=ActionType.ESCALATE_SERVICE, escalation_target=svc),
+                "high-impact: escalate highest SLA-risk service",
+            )
+    source = _service_with_officers(obs)
+    if source is not None and _alloc_for(obs, source) > 0:
+        target = _top_backlog_service(obs, exclude=source)
+        if target is not None and target != source:
+            return (
+                ActionModel(
+                    action_type=ActionType.REALLOCATE_OFFICERS,
+                    service_target=source,
+                    reallocation_delta={source.value: -1, target.value: 1},
+                ),
+                "high-impact: reallocate one officer toward highest backlog",
+            )
+    return ActionModel(action_type=ActionType.ADVANCE_TIME), "fallback: no high-impact action available"
+def _repair_action_for_observation(
+    action: ActionModel,
+    obs: ObservationModel,
+) -> tuple[ActionModel, str | None]:
+    mask = _compute_action_mask(obs)
+    at = action.action_type
+    if not bool(mask.get(at, True)):
+        fallback, why = _best_high_impact_action(obs)
+        return fallback, f"masked {at.value}; {why}"
+    if at == ActionType.ADVANCE_TIME:
+        return action, None
+    if at == ActionType.SET_PRIORITY_MODE:
+        if action.priority_mode is None:
+            return (
+                ActionModel(action_type=ActionType.SET_PRIORITY_MODE, priority_mode=PriorityMode.BACKLOG_CLEARANCE),
+                "missing priority_mode, defaulted to backlog_clearance",
+            )
+        return action, None
+    if at == ActionType.ASSIGN_CAPACITY:
+        pool = obs.officer_pool
+        reserve = int(getattr(pool, 'idle_officers', getattr(pool, 'reserve_officers', 0)))
+        if reserve <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"reserve officers exhausted; {why}"
+        service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _top_backlog_service(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no service available for assign_capacity; {why}"
+        cap = action.capacity_assignment or {}
+        delta = cap.get(service.value, cap.get(str(service), 1))
+        delta = max(1, min(int(delta), reserve))
+        repaired = ActionModel(
+            action_type=ActionType.ASSIGN_CAPACITY,
+            service_target=service,
+            capacity_assignment={service.value: delta},
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired assign_capacity payload"
+        return repaired, note
+    if at == ActionType.REQUEST_MISSING_DOCUMENTS:
+        service = getattr(action, 'service_target', None) or getattr(action, 'service', None) or _service_with_missing_docs(obs)
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no missing-doc queue available; {why}"
+        repaired = ActionModel(
+            action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
+            service_target=service,
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired request_missing_documents payload"
+        return repaired, note
+    if at == ActionType.ESCALATE_SERVICE:
+        if int(obs.escalation_budget_remaining) <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"escalation budget exhausted; {why}"
+        service = (
+            getattr(action, 'escalation_target', None)
+            or getattr(action, 'service_target', None)
+            or getattr(action, 'service', None)
+            or _top_backlog_service(obs)
+        )
+        if service is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no escalation target available; {why}"
+        repaired = ActionModel(
+            action_type=ActionType.ESCALATE_SERVICE,
+            escalation_target=service,
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired escalate_service payload"
+        return repaired, note
+    if at == ActionType.REALLOCATE_OFFICERS:
+        source = (
+            getattr(action, 'service_target', None)
+            or getattr(action, 'service', None)
+            or _service_with_officers(obs)
+        )
+        if source is None:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"no staffed source service; {why}"
+        source_alloc = _alloc_for(obs, source)
+        if source_alloc <= 0:
+            source = _service_with_officers(obs)
+            source_alloc = _alloc_for(obs, source) if source is not None else 0
+        if source is None or source_alloc <= 0:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"insufficient source officers; {why}"
+        # Phase 2: target comes from reallocation_delta; Phase 1 from target_service
+        rdelta = action.reallocation_delta or {}
+        target = None
+        for k, v in rdelta.items():
+            if v > 0:
+                try:
+                    target = ServiceType(k)
+                except Exception:
+                    pass
+                break
+        if target is None:
+            target = getattr(action, 'target_service', None)
+        if target is None or target == source:
+            target = _top_backlog_service(obs, exclude=source)
+        if target is None or target == source:
+            fallback, why = _best_high_impact_action(obs)
+            return fallback, f"missing distinct target_service; {why}"
+        delta = max(1, min(abs(rdelta.get(source.value, 1)), source_alloc))
+        repaired = ActionModel(
+            action_type=ActionType.REALLOCATE_OFFICERS,
+            service_target=source,
+            reallocation_delta={source.value: -delta, target.value: delta},
+        )
+        note = None if repaired.model_dump(exclude_none=True) == action.model_dump(exclude_none=True) else "repaired reallocate_officers payload"
+        return repaired, note
+    return action, None
+"""
+The high-level simulation orchestration now lives in app.engine.
+This module re-exports the public runtime API so existing imports
+from app.simulator continue to work unchanged.
+"""
+def _model_label_for_mode(agent_mode: SimulationAgentMode) -> str:
+    if agent_mode == "baseline_policy":
+        return "baseline_policy"
+    if agent_mode == "trained_rl":
+        return "trained_rl"
+    return os.getenv("MODEL_NAME", "llm_inference")
+def _log_step_line(step_row: dict[str, Any]) -> str:
+    done = "true" if bool(step_row.get("done")) else "false"
+    error = step_row.get("last_action_error") or "null"
+    action = json.dumps(step_row.get("action_payload", {}), separators=(",", ":"))
+    source = step_row.get("decision_source") or "unknown"
+    model = step_row.get("model_used") or "null"
+    repair = step_row.get("repair_note") or "null"
+    switch_note = step_row.get("switch_note") or "null"
+    return (
+        f"[STEP] step={step_row.get('step', 0)} action={action} "
+        f"reward={float(step_row.get('reward', 0.0)):.2f} done={done} "
+        f"error={error} source={source} model={model} repair={repair} switch={switch_note}"
+    )
+class LiveSimulationSession:
+    def __init__(
+        self,
+        *,
+        task_id: str,
+        agent_mode: SimulationAgentMode,
+        max_steps: int,
+        seed: int | None,
+        policy_name: str | None = None,
+        model_path: str | None = None,
+        model_type: Literal["maskable", "recurrent"] = "maskable",
+    ) -> None:
+        self.task_id = task_id
+        self.agent_mode = agent_mode
+        recommended = _recommended_min_steps(task_id)
+        if agent_mode == "llm_inference":
+            self.max_steps = max(int(max_steps), int(recommended))
+        else:
+            self.max_steps = int(max_steps)
+        self.seed = int(seed if seed is not None else random.randint(1, 999999))
+        self.policy_name = policy_name or "backlog_clearance"
+        self.model_path = model_path
+        self.model_type = model_type
+        self.trace: list[dict[str, Any]] = []
+        self.total_reward = 0.0
+        self.step_idx = 0
+        self.done = False
+        self.summary: dict[str, Any] | None = None
+        self.score: float | None = None
+        self.grader_name: str | None = None
+        self.env: GovWorkflowEnv | None = None
+        self.obs: ObservationModel | Any = None
+        self.policy = None
+        self.rl_env: Any = None
+        self.rl_model: Any = None
+        self.rl_lstm_state: Any = None
+        self.rl_episode_start: Any = None
+        self.llm_runtimes: list[dict[str, Any]] = []
+        self.llm_route: list[str] = []
+        self.llm_model_stats: dict[tuple[str, str], dict[str, Any]] = {}
+        self.consecutive_failure_steps = 0
+        self.recovery_steps_remaining = 0
+        self.auto_switch_count = 0
+        self.last_switch_reason: str | None = None
+        if self.agent_mode == "trained_rl":
+            self._init_trained()
+        else:
+            self._init_core()
+    def start_line(self) -> str:
+        return (
+            f"[START] task={self.task_id} env=gov-workflow-openenv "
+            f"model={_model_label_for_mode(self.agent_mode)}"
+        )
+    def _init_core(self) -> None:
+        self.env = GovWorkflowEnv(task_id=self.task_id)
+        self.obs, _ = self.env.reset(seed=self.seed)
+        if self.agent_mode == "baseline_policy":
+            self.policy = POLICIES.get(self.policy_name, backlog_clearance_policy)
+        else:
+            self.policy = self._llm_action_with_meta
+            self._init_llm_runtimes()
+    def _init_llm_runtimes(self) -> None:
+        openai_base = os.getenv("API_BASE_URL") or os.getenv("OPENAI_API_BASE_URL") or "https://api.openai.com/v1"
+        nvidia_base = os.getenv("NVIDIA_API_BASE_URL", "https://integrate.api.nvidia.com/v1")
+        openai_keys = _dedupe(
+            [
+                os.getenv("HF_TOKEN"),
+                os.getenv("OPENAI_API_KEY"),
+                os.getenv("API_KEY"),
+            ]
+        )
+        nvidia_keys = _dedupe(
+            [
+                os.getenv("NVIDIA_API_KEY"),
+                os.getenv("NVIDIA_API_KEY_2"),
+            ]
+        )
+        openai_models = _dedupe(
+            [
+                os.getenv("MODEL_NAME", "meta/llama-3.3-70b-instruct"),
+                *_env_csv_list("MODEL_FALLBACKS"),
+            ]
+        )
+        nvidia_models = _dedupe(
+            [
+                os.getenv("NVIDIA_MODEL"),
+                *_env_csv_list("NVIDIA_MODEL_FALLBACKS"),
+                *LEGACY_NVIDIA_MODEL_POOL,
+            ]
+        )
+        runtimes: list[dict[str, Any]] = []
+        if openai_keys and openai_models:
+            clients: list[tuple[OpenAI, str]] = []
+            for idx, key in enumerate(openai_keys, start=1):
+                try:
+                    clients.append((OpenAI(base_url=openai_base, api_key=key, timeout=8.0, max_retries=0), f"openai_key_{idx}"))
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "openai-compatible",
+                        "base_url": openai_base,
+                        "clients": clients,
+                        "models": openai_models,
+                    }
+                )
+        if nvidia_keys and nvidia_models:
+            clients = []
+            for idx, key in enumerate(nvidia_keys, start=1):
+                try:
+                    clients.append((OpenAI(base_url=nvidia_base, api_key=key, timeout=8.0, max_retries=0), f"nvidia_key_{idx}"))
+                except Exception:
+                    continue
+            if clients:
+                runtimes.append(
+                    {
+                        "provider": "nvidia",
+                        "base_url": nvidia_base,
+                        "clients": clients,
+                        "models": nvidia_models,
+                    }
+                )
+        self.llm_runtimes = runtimes
+        self.llm_model_stats = {}
+        for runtime in runtimes:
+            provider = str(runtime.get("provider"))
+            for model in runtime.get("models", []):
+                self.llm_model_stats[(provider, str(model))] = {
+                    "calls": 0,
+                    "invalid": 0,
+                    "repaired": 0,
+                    "failures": 0,
+                    "cooldown_until_step": 0,
+                }
+        openai_runtime = next((rt for rt in runtimes if rt.get("provider") == "openai-compatible"), None)
+        nvidia_runtime = next((rt for rt in runtimes if rt.get("provider") == "nvidia"), None)
+        if openai_runtime is not None:
+            openai_route = (
+                f"openai-compatible ({len(openai_runtime['clients'])} keys, "
+                f"{len(openai_runtime['models'])} models)"
+            )
+        else:
+            openai_route = "openai-compatible (unavailable: missing API key/model)"
+        if nvidia_runtime is not None:
+            nvidia_route = (
+                f"nvidia ({len(nvidia_runtime['clients'])} keys, "
+                f"{len(nvidia_runtime['models'])} models)"
+            )
+        else:
+            nvidia_route = "nvidia (unavailable: missing API key/model)"
+        self.llm_route = [
+            openai_route,
+            nvidia_route,
+            "adaptive ranking: prefer models with lower invalid/repaired rates",
+            "heuristic fallback (backlog_clearance_policy)",
+        ]
+    def _rank_runtime_models(self, provider: str, models: list[str]) -> list[str]:
+        def _score(model_name: str) -> tuple[float, int]:
+            stat = self.llm_model_stats.get((provider, model_name), {})
+            calls = max(1, int(stat.get("calls", 0)))
+            invalid_rate = float(stat.get("invalid", 0)) / calls
+            repaired_rate = float(stat.get("repaired", 0)) / calls
+            fail_rate = float(stat.get("failures", 0)) / calls
+            cooldown = int(stat.get("cooldown_until_step", 0))
+            cooldown_penalty = 1.0 if self.step_idx < cooldown else 0.0
+            return (invalid_rate * 2.0 + repaired_rate * 1.25 + fail_rate * 1.5 + cooldown_penalty, -calls)
+        return sorted([str(m) for m in models], key=_score)
+    def _llm_action_with_meta(self, obs: ObservationModel) -> tuple[ActionModel, dict[str, Any]]:
+        if self.recovery_steps_remaining > 0:
+            self.recovery_steps_remaining -= 1
+            action, why = _best_high_impact_action(obs)
+            return action, {
+                "decision_source": "auto_recovery_policy",
+                "provider": "heuristic",
+                "model_used": "backlog_clearance_policy",
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+                "repair_note": why,
+            }
+        attempts = 0
+        last_error = ""
+        allowed_actions, blocked_actions = _masked_action_type_hints(obs)
+        schema_hint = {
+            "required_fields": {
+                "set_priority_mode": ["action_type", "priority_mode"],
+                "assign_capacity": ["action_type", "service", "officer_delta"],
+                "request_missing_documents": ["action_type", "service"],
+                "escalate_service": ["action_type", "service"],
+                "advance_time": ["action_type"],
+                "reallocate_officers": ["action_type", "service", "target_service", "officer_delta"],
+            },
+            "allowed_priority_mode": [m.value for m in PriorityMode],
+            "allowed_services": [s.value for s in ServiceType],
+        }
+        system_prompt = (
+            "You are controlling a government workflow simulator. "
+            "Return exactly one JSON object only. No markdown. No explanation. "
+            "Allowed action_type: set_priority_mode, assign_capacity, request_missing_documents, "
+            "escalate_service, advance_time, reallocate_officers. "
+            "Rules: "
+            "1) reallocate_officers requires service + target_service + officer_delta>0 and source!=target. "
+            "2) assign_capacity requires service + officer_delta>0. "
+            "3) request_missing_documents requires service with missing_docs_cases>0. "
+            "4) set_priority_mode requires priority_mode in [urgent_first, oldest_first, balanced, backlog_clearance]. "
+            "5) Always prefer high-impact actions that reduce backlog/SLA risk over no-op loops. "
+            "Use lowercase enum values."
+        )
+        user_prompt = (
+            "Observation:\n"
+            f"{obs.model_dump_json()}\n"
+            f"Allowed action types now: {allowed_actions}\n"
+            f"Blocked action types now: {blocked_actions}\n"
+            f"Action schema hints: {json.dumps(schema_hint, separators=(',', ':'))}\n"
+            f"Last action validity: {obs.last_action_valid}\n"
+            f"Last action message: {obs.last_action_message}\n"
+            "Return action JSON."
+        )
+        for runtime in self.llm_runtimes:
+            provider = str(runtime["provider"])
+            ranked_models = self._rank_runtime_models(provider, list(runtime["models"]))
+            for client, key_label in runtime["clients"]:
+                for model in ranked_models:
+                    attempts += 1
+                    stat_key = (provider, model)
+                    try:
+                        out = client.chat.completions.create(
+                            model=model,
+                            messages=[
+                                {"role": "system", "content": system_prompt},
+                                {"role": "user", "content": user_prompt},
+                            ],
+                            temperature=0.0,
+                            max_tokens=200,
+                            stream=False,
+                        )
+                        content = (out.choices[0].message.content or "").strip()
+                        action = _coerce_action(_extract_json_object(content))
+                        if stat_key in self.llm_model_stats:
+                            self.llm_model_stats[stat_key]["calls"] += 1
+                        return action, {
+                            "decision_source": "llm",
+                            "provider": provider,
+                            "model_used": model,
+                            "llm_attempts": attempts,
+                            "llm_error": None,
+                            "llm_key_label": key_label,
+                        }
+                    except Exception as exc:
+                        last_error = str(exc)
+                        stat = self.llm_model_stats.get(stat_key)
+                        if stat is not None:
+                            stat["calls"] += 1
+                            stat["failures"] += 1
+                            if stat["failures"] >= 2:
+                                stat["cooldown_until_step"] = self.step_idx + 5
+                        continue
+        action, why = _best_high_impact_action(obs)
+        if not self.llm_runtimes:
+            last_error = "No LLM credentials configured."
+        return action, {
+            "decision_source": "heuristic_fallback",
+            "provider": "heuristic",
+            "model_used": "backlog_clearance_policy",
+            "llm_attempts": attempts,
+            "llm_error": last_error or None,
+            "llm_key_label": None,
+            "repair_note": why,
+        }
+    def _init_trained(self) -> None:
+        import numpy as np
+        from app.main import _load_model_cached_or_503, _resolve_model_path_or_422
+        from rl.gym_wrapper import GovWorkflowGymEnv
+        if not self.model_path:
+            raise ValueError("model_path is required for trained_rl simulation.")
+        model_abs = _resolve_model_path_or_422(self.model_path)
+        self.rl_model = _load_model_cached_or_503(model_abs, self.model_type)
+        self.rl_env = GovWorkflowGymEnv(task_id=self.task_id, seed=self.seed, hard_action_mask=True)
+        self.obs, _ = self.rl_env.reset(seed=self.seed)
+        self.rl_lstm_state = None
+        self.rl_episode_start = np.array([True], dtype=bool)
+    def step_once(self) -> tuple[dict[str, Any], str, bool]:
+        if self.done:
+            raise RuntimeError("Simulation already finished.")
+        self.step_idx += 1
+        if self.agent_mode == "trained_rl":
+            row = self._step_trained()
+        else:
+            row = self._step_core()
+        self.trace.append(row)
+        self.total_reward += float(row["reward"])
+        step_log = _log_step_line(row)
+        if row["done"] or self.step_idx >= self.max_steps:
+            self._finalize()
+            row["done"] = True
+            return row, step_log, True
+        return row, step_log, False
+    def end_line(self) -> str:
+        if self.score is None:
+            return "[END] success=false steps=0 score=0.00 rewards="
+        rewards = ",".join(f"{float(x.get('reward', 0.0)):.2f}" for x in self.trace)
+        success = "true" if self.score >= 0.5 else "false"
+        return (
+            f"[END] success={success} steps={len(self.trace)} "
+            f"score={self.score:.2f} rewards={rewards}"
+        )
+    def snapshot(self) -> dict[str, Any]:
+        return {
+            "task_id": self.task_id,
+            "agent_mode": self.agent_mode,
+            "seed": self.seed,
+            "max_steps": self.max_steps,
+            "step_idx": self.step_idx,
+            "done": self.done,
+            "total_reward": float(self.total_reward),
+            "score": self.score,
+            "grader_name": self.grader_name,
+            "summary": self.summary,
+            "trace_len": len(self.trace),
+            "llm_route": list(self.llm_route),
+        }
+    def close(self) -> None:
+        try:
+            if self.env is not None and hasattr(self.env, "close"):
+                self.env.close()
+        except Exception:
+            pass
+        try:
+            if self.rl_env is not None and hasattr(self.rl_env, "close"):
+                self.rl_env.close()
+        except Exception:
+            pass
+    def _step_core(self) -> dict[str, Any]:
+        if self.env is None:
+            raise RuntimeError("Core simulation env not initialized.")
+        if self.agent_mode == "baseline_policy":
+            action = self.policy(self.obs)
+            meta = {
+                "decision_source": "baseline_policy",
+                "provider": "local_policy",
+                "model_used": self.policy_name,
+                "llm_attempts": 0,
+                "llm_error": None,
+                "llm_key_label": None,
+            }
+        else:
+            raw_decision = self.policy(self.obs)
+            if isinstance(raw_decision, tuple) and len(raw_decision) == 2:
+                action, meta = raw_decision
+            else:
+                action, meta = raw_decision, {}
+            if not isinstance(meta, dict):
+                meta = {}
+            if not isinstance(action, ActionModel):
+                if isinstance(action, dict):
+                    action = _coerce_action(action)
+                else:
+                    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+                    meta["repair_note"] = "non-action output from llm policy, coerced to advance_time"
+            allowed_mask = _compute_action_mask(self.obs)
+            if not bool(allowed_mask.get(action.action_type, True)):
+                masked_fallback, why = _best_high_impact_action(self.obs)
+                action = masked_fallback
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = f"action masked at runtime; {why}"
+            repaired_action, repair_note = _repair_action_for_observation(action, self.obs)
+            if repair_note:
+                action = repaired_action
+                if meta.get("decision_source") == "llm":
+                    meta["decision_source"] = "llm_repaired"
+                meta["repair_note"] = repair_note
+        self.obs, reward, terminated, truncated, info = self.env.step(action)
+        done = bool(terminated or truncated)
+        # Read observation fields safely for both Phase 1 and Phase 2 model shapes
+        fairness_gap = float(
+            getattr(self.obs, 'fairness_gap',
+                    1.0 - getattr(self.obs, 'fairness_index', 1.0))
+        )
+        row = {
+            "step": self.step_idx,
+            "day": self.obs.day,
+            "action_type": action.action_type.value,
+            "action_payload": action.model_dump(exclude_none=True, mode="json"),
+            "reward": float(reward),
+            "done": done,
+            "backlog": self.obs.total_backlog,
+            "completed": self.obs.total_completed,
+            "sla_breaches": self.obs.total_sla_breaches,
+            "fairness_gap": fairness_gap,
+            "escalation_budget_remaining": self.obs.escalation_budget_remaining,
+            "invalid_action": bool(getattr(info, 'invalid_action', False)),
+            "last_action_error": getattr(info, 'last_action_error', None),
+            "queue_rows": _queue_rows(self.obs),
+        }
+        row.update(meta)
+        if self.agent_mode == "llm_inference":
+            is_repaired = row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+            is_invalid = bool(row.get("invalid_action")) or bool(row.get("last_action_error"))
+            model_used = str(row.get("model_used") or "")
+            provider = str(row.get("provider") or "")
+            stat_key = (provider, model_used)
+            stat = self.llm_model_stats.get(stat_key)
+            if stat is not None:
+                if is_repaired:
+                    stat["repaired"] += 1
+                if is_invalid:
+                    stat["invalid"] += 1
+                    stat["failures"] += 1
+                else:
+                    stat["failures"] = max(0, int(stat.get("failures", 0)) - 1)
+            is_failure_pattern = is_invalid or is_repaired
+            if is_failure_pattern:
+                self.consecutive_failure_steps += 1
+            else:
+                self.consecutive_failure_steps = 0
+            if self.consecutive_failure_steps >= 4:
+                if stat is not None:
+                    stat["cooldown_until_step"] = self.step_idx + 6
+                self.recovery_steps_remaining = max(self.recovery_steps_remaining, 3)
+                self.auto_switch_count += 1
+                self.last_switch_reason = "repeated invalid/repaired pattern detected"
+                row["switch_note"] = "auto-switched to recovery policy and deprioritized failing model"
+                self.consecutive_failure_steps = 0
+        return row
+    def _step_trained(self) -> dict[str, Any]:
+        import numpy as np
+        masks = self.rl_env.action_masks()
+        if self.model_type == "recurrent":
+            action, self.rl_lstm_state = self.rl_model.predict(
+                self.obs,
+                state=self.rl_lstm_state,
+                episode_start=self.rl_episode_start,
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+            if not (0 <= action_idx < masks.shape[0] and bool(masks[action_idx])):
+                valid = np.flatnonzero(masks)
+                action_idx = int(valid[0]) if valid.size > 0 else 18
+        else:
+            from sb3_contrib.common.maskable.utils import get_action_masks
+            action, _ = self.rl_model.predict(
+                self.obs,
+                action_masks=get_action_masks(self.rl_env),
+                deterministic=True,
+            )
+            action_idx = int(action.item() if hasattr(action, "item") else action)
+        self.obs, reward, terminated, truncated, info = self.rl_env.step(action_idx)
+        done = bool(terminated or truncated)
+        if self.model_type == "recurrent":
+            self.rl_episode_start = np.array([done], dtype=bool)
+        core_obs = self.rl_env._core_env._build_observation()
+        action_model, action_label = _decode_action_idx(action_idx)
+        return {
+            "step": self.step_idx,
+            "day": core_obs.day,
+            "action_type": action_label,
+            "action_payload": action_model.model_dump(exclude_none=True, mode="json"),
+            "action_index": action_idx,
+            "reward": float(reward),
+            "done": done,
+            "backlog": core_obs.total_backlog,
+            "completed": core_obs.total_completed,
+            "sla_breaches": core_obs.total_sla_breaches,
+            "fairness_gap": float(core_obs.fairness_gap),
+            "escalation_budget_remaining": core_obs.escalation_budget_remaining,
+            "invalid_action": bool(info.get("invalid_action", False)),
+            "last_action_error": info.get("last_action_error"),
+            "queue_rows": _queue_rows(core_obs),
+            "decision_source": "trained_rl",
+            "provider": "rl",
+            "model_used": self.model_path or "trained_rl",
+            "llm_attempts": 0,
+            "llm_error": None,
+            "llm_key_label": None,
+        }
+    def _finalize(self) -> None:
+        if self.done:
+            return
+        self.done = True
+        if self.agent_mode == "trained_rl":
+            final_state = self.rl_env._core_env.state()
+        else:
+            final_state = self.env.state()
+        gr = grade_episode(final_state)
+        self.score = float(gr.score)
+        self.grader_name = gr.grader_name
+        llm_steps = sum(
+            1 for row in self.trace if row.get("decision_source") in {"llm", "llm_repaired"}
+        )
+        fallback_steps = sum(
+            1
+            for row in self.trace
+            if row.get("decision_source") in {"heuristic_fallback", "auto_recovery_policy"}
+        )
+        repaired_steps = sum(
+            1
+            for row in self.trace
+            if row.get("decision_source") in {"llm_repaired", "auto_recovery_policy"}
+        )
+        total_steps = max(1, len(self.trace))
+        invalid_actions = int(final_state.metrics.total_invalid_actions)
+        invalid_rate = float(invalid_actions) / float(total_steps)
+        repaired_rate = float(repaired_steps) / float(total_steps)
+        ranked_models: list[dict[str, Any]] = []
+        if self.llm_model_stats:
+            for (provider, model), stat in self.llm_model_stats.items():
+                calls = int(stat.get("calls", 0))
+                if calls <= 0:
+                    continue
+                ranked_models.append(
+                    {
+                        "provider": provider,
+                        "model": model,
+                        "calls": calls,
+                        "invalid_rate": float(stat.get("invalid", 0)) / max(1, calls),
+                        "repaired_rate": float(stat.get("repaired", 0)) / max(1, calls),
+                    }
+                )
+            ranked_models.sort(key=lambda x: (x["invalid_rate"], x["repaired_rate"], -x["calls"]))
+        self.summary = {
+            "total_steps": final_state.total_steps,
+            "total_completed": final_state.total_completed,
+            "total_backlog": final_state.total_backlog,
+            "total_sla_breaches": final_state.total_sla_breaches,
+            "fairness_gap": float(final_state.fairness_gap),
+            "total_invalid_actions": final_state.metrics.total_invalid_actions,
+            "invalid_action_rate": invalid_rate,
+            "llm_steps": llm_steps,
+            "heuristic_fallback_steps": fallback_steps,
+            "llm_repaired_steps": repaired_steps,
+            "repaired_action_rate": repaired_rate,
+            "auto_switch_count": self.auto_switch_count,
+            "last_switch_reason": self.last_switch_reason,
+            "effective_max_steps": self.max_steps,
+            "recommended_min_steps": _recommended_min_steps(self.task_id),
+        }
+        if self.agent_mode == "llm_inference":
+            self.summary["llm_route"] = list(self.llm_route)
+            self.summary["llm_model_performance"] = ranked_models
+        if self.agent_mode == "trained_rl":
+            self.summary["model_path"] = self.model_path
+            self.summary["model_type"] = self.model_type
+def run_simulation(
+    *,
+    task_id: str,
+    agent_mode: SimulationAgentMode,
+    max_steps: int,
+    seed: int | None,
+    policy_name: str | None = None,
+    model_path: str | None = None,
+    model_type: Literal["maskable", "recurrent"] = "maskable",
+) -> SimulationRun:
+    session = LiveSimulationSession(
+        task_id=task_id,
+        agent_mode=agent_mode,
+        max_steps=max_steps,
+        seed=seed,
+        policy_name=policy_name,
+        model_path=model_path,
+        model_type=model_type,
+    )
+    try:
+        while not session.done:
+            session.step_once()
+        return SimulationRun(
+            task_id=session.task_id,
+            agent_mode=session.agent_mode,
+            seed=session.seed,
+            total_reward=float(session.total_reward),
+            score=float(session.score or 0.0),
+            grader_name=str(session.grader_name or "unknown"),
+            summary=dict(session.summary or {}),
+            trace=list(session.trace),
+        )
+    finally:
+        session.close()
+def _decode_action_idx(action_idx: int) -> tuple[ActionModel, str]:
+    try:
+        from rl.feature_builder import ACTION_DECODE_TABLE
+        from app.models import PriorityMode, ServiceType
+    except Exception:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+    row = ACTION_DECODE_TABLE.get(int(action_idx))
+    if row is None:
+        return ActionModel(action_type=ActionType.ADVANCE_TIME), f"action_{action_idx}"
+from app.engine import (
+    DayResult,
+    DaySimulator,
+    LiveSimulationSession,
+    SimulationAgentMode,
+    SimulationRun,
+    run_simulation,
+)
+__all__ = [
+    "DayResult",
+    "DaySimulator",
+    "SimulationAgentMode",
+    "SimulationRun",
+    "LiveSimulationSession",
+    "run_simulation",
+]

app/state_machine.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""
+state_machine.py — Gov Workflow OpenEnv
+Deterministic workflow transition engine aligned with Phase 1 schemas.
+"""
+from __future__ import annotations
+from app.models import ApplicationCase, InternalSubstate, StageType
+INTERNAL_TO_PUBLIC_STAGE: dict[InternalSubstate, StageType] = {
+    InternalSubstate.PRE_SCRUTINY: StageType.SUBMISSION,
+    InternalSubstate.DOC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.SERVICE_SPECIFIC_VALIDATION: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.FIELD_VERIFICATION_PENDING: StageType.FIELD_VERIFICATION,
+    InternalSubstate.DECISION_PENDING: StageType.APPROVAL,
+    InternalSubstate.ISSUANCE_READY: StageType.ISSUANCE,
+    InternalSubstate.BLOCKED_MISSING_DOCS: StageType.DOCUMENT_VERIFICATION,
+    InternalSubstate.COMPLETED: StageType.ISSUANCE,
+    InternalSubstate.REJECTED: StageType.APPROVAL,
+}
+def build_public_stage(substate: InternalSubstate) -> StageType:
+    return INTERNAL_TO_PUBLIC_STAGE.get(substate, StageType.SUBMISSION)
+def transition_case(case: ApplicationCase, new_substate: InternalSubstate) -> None:
+    case.internal_substate = new_substate
+    case.public_stage = build_public_stage(new_substate)
+    case.days_in_current_stage = 0
+def can_advance(case: ApplicationCase) -> bool:
+    if case.completed or case.rejected:
+        return False
+    if case.internal_substate == InternalSubstate.BLOCKED_MISSING_DOCS:
+        return False
+    return True
+def advance_case(case: ApplicationCase, rng: object = None) -> tuple[bool, bool]:
+    """
+    Returns (progressed, completed).
+    """
+    if not can_advance(case):
+        return False, False
+    early_stages = {
+        InternalSubstate.PRE_SCRUTINY,
+        InternalSubstate.DOC_VALIDATION,
+    }
+    if case.has_missing_docs and case.internal_substate in early_stages:
+        transition_case(case, InternalSubstate.BLOCKED_MISSING_DOCS)
+        return True, False
+    current = case.internal_substate
+    if current == InternalSubstate.PRE_SCRUTINY:
+        transition_case(case, InternalSubstate.DOC_VALIDATION)
+        return True, False
+    if current == InternalSubstate.DOC_VALIDATION:
+        if case.field_verification_required:
+            transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
+        else:
+            transition_case(case, InternalSubstate.DECISION_PENDING)
+        return True, False
+    if current == InternalSubstate.SERVICE_SPECIFIC_VALIDATION:
+        if case.field_verification_required:
+            transition_case(case, InternalSubstate.FIELD_VERIFICATION_PENDING)
+        else:
+            transition_case(case, InternalSubstate.DECISION_PENDING)
+        return True, False
+    if current == InternalSubstate.FIELD_VERIFICATION_PENDING:
+        return False, False
+    if current == InternalSubstate.DECISION_PENDING:
+        transition_case(case, InternalSubstate.ISSUANCE_READY)
+        return True, False
+    if current == InternalSubstate.ISSUANCE_READY:
+        transition_case(case, InternalSubstate.COMPLETED)
+        case.completed = True
+        return True, True
+    return False, False
+def unblock_missing_docs(case: ApplicationCase) -> bool:
+    if case.internal_substate != InternalSubstate.BLOCKED_MISSING_DOCS:
+        return False
+    case.has_missing_docs = False
+    case.doc_resolution_day = None
+    transition_case(case, InternalSubstate.DOC_VALIDATION)
+    return True
+def complete_field_verification(case: ApplicationCase) -> bool:
+    if case.internal_substate != InternalSubstate.FIELD_VERIFICATION_PENDING:
+        return False
+    case.field_verification_completion_day = None
+    transition_case(case, InternalSubstate.DECISION_PENDING)
+    return True

app/story_router.py ADDED Viewed

	@@ -0,0 +1,407 @@

+"""
+app/story_router.py
+FastAPI router that serves LLM training story data.
+All 7 endpoints are READ-ONLY - they serve pre-saved JSON files.
+No frontend elements are invoked from backend.
+No training runs happen here - only data serving.
+Mount in main.py with:
+  from app.story_router import router as story_router
+  app.include_router(story_router)
+"""
+from __future__ import annotations
+import asyncio
+import json
+from pathlib import Path
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+router = APIRouter(prefix="/training", tags=["Training Story"])
+# --- Data directory --------------------------------------------------
+DATA_DIR = Path("data/training_logs")
+HEURISTIC_BASELINES: dict[str, dict] = {
+    "district_backlog_easy": {
+        "score": 0.527, "completed": 41,
+        "breaches": 184, "reward": -79.86, "avg_wait": 6.9,
+    },
+    "mixed_urgency_medium": {
+        "score": 0.454, "completed": 58,
+        "breaches": 34,  "reward": -684.22, "avg_wait": 12.4,
+    },
+    "cross_department_hard": {
+        "score": 0.606, "completed": 83,
+        "breaches": 723, "reward": -2318.78, "avg_wait": 15.6,
+    },
+}
+# --- Internal helpers ------------------------------------------------
+def _load_log(task_id: str) -> dict:
+    """Load JSON training log for given task. Raises 404 if missing."""
+    path = DATA_DIR / f"{task_id}_training_log.json"
+    if not path.exists():
+        raise HTTPException(
+            status_code=404,
+            detail=(
+                f"Training log not found for task '{task_id}'. "
+                f"Run: python scripts/convert_grpo_csv.py "
+                f"--csv <your_csv> --task {task_id}"
+            ),
+        )
+    with open(path, encoding="utf-8") as f:
+        return json.load(f)
+def _dominant_action(episodes: list[dict]) -> str:
+    """Returns the action name with the highest total weight across episodes."""
+    totals: dict[str, float] = {}
+    for ep in episodes:
+        for action, val in ep.get("actions", {}).items():
+            totals[action] = totals.get(action, 0.0) + float(val)
+    return max(totals, key=totals.get) if totals else "advance_time"
+def _phase_message(ep: dict) -> str:
+    """Returns a human-readable learning message for one episode."""
+    phase = ep.get("phase", "random")
+    reward = ep.get("total_reward", 0)
+    score = ep.get("score", 0)
+    fn1 = ep.get("fn1_valid", 1.0)
+    fn2 = ep.get("fn2_no_halluc", 1.0)
+    episode = ep.get("episode", 0)
+    validity_note = "" if fn1 >= 1.0 else f" WARNING: Invalid action at step {episode}."
+    halluc_note = "" if fn2 >= 1.0 else " WARNING: Hallucination detected."
+    messages = {
+        "random": (
+            f"Step {episode}: LLM is exploring. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "exploring": (
+            f"Step {episode}: LLM finding patterns. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "learning": (
+            f"Step {episode}: LLM reinforcing good actions. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+        "converged": (
+            f"Step {episode}: LLM converged. "
+            f"Reward={reward:.3f}, Score={score:.3f}.{validity_note}{halluc_note}"
+        ),
+    }
+    return messages.get(phase, f"Step {episode}: reward={reward:.3f}")
+# ================================================================
+# ENDPOINT 1 - GET /training/tasks
+# ================================================================
+@router.get("/tasks")
+async def list_trained_tasks() -> dict:
+    """
+    Returns all tasks that have a saved training log JSON file.
+    Frontend calls this first to populate task selector.
+    """
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    available = []
+    for path in sorted(DATA_DIR.glob("*_training_log.json")):
+        task_id = path.stem.replace("_training_log", "")
+        try:
+            log = _load_log(task_id)
+            available.append({
+                "task_id":            task_id,
+                "total_episodes":     log["total_episodes"],
+                "final_score":        log["summary"]["last_episode_score"],
+                "reward_improvement": log["summary"]["reward_improvement_pct"],
+                "base_model":         log.get("base_model", ""),
+                "training_method":    log.get("training_method", "GRPO"),
+            })
+        except HTTPException:
+            pass
+    return {"tasks": available}
+# ================================================================
+# ENDPOINT 2 - GET /training/summary/{task_id}
+# ================================================================
+@router.get("/summary/{task_id}")
+async def training_summary(task_id: str) -> dict:
+    """Returns overview stats + narrative for the ACT 2 header card."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    n = len(eps)
+    q1, q2, q3 = n // 4, n // 2, 3 * n // 4
+    p1_dom = _dominant_action(eps[:q1])
+    p2_dom = _dominant_action(eps[q1:q2])
+    p3_dom = _dominant_action(eps[q2:q3])
+    p4_dom = _dominant_action(eps[q3:])
+    avg_p1_r = sum(e["total_reward"] for e in eps[:q1]) / max(q1, 1)
+    avg_p4_r = sum(e["total_reward"] for e in eps[q3:]) / max(n - q3, 1)
+    return {
+        "task_id":          log["task_id"],
+        "base_model":       log.get("base_model", ""),
+        "training_method":  log.get("training_method", "GRPO"),
+        "lora_rank":        log.get("lora_rank", 16),
+        "total_episodes":   n,
+        "reward_functions": log.get("reward_functions", {}),
+        "summary":          log["summary"],
+        "narrative": {
+            "phase_1": (
+                f"Steps 1-{q1}: LLM chose '{p1_dom}' most often. "
+                f"Avg reward {avg_p1_r:.2f}. Still exploring randomly."
+            ),
+            "phase_2": (
+                f"Steps {q1}-{q2}: LLM discovered '{p2_dom}'. "
+                "Reward started improving as valid patterns emerged."
+            ),
+            "phase_3": (
+                f"Steps {q2}-{q3}: LLM reinforced '{p3_dom}'. "
+                "Action validity reaching near-perfect levels."
+            ),
+            "phase_4": (
+                f"Steps {q3}-{n}: LLM converged on '{p4_dom}'. "
+                f"Avg reward {avg_p4_r:.2f}. "
+                f"Final score {log['summary']['last_episode_score']:.1%}."
+            ),
+        },
+    }
+# ================================================================
+# ENDPOINT 3 - GET /training/curve/{task_id}
+# ================================================================
+@router.get("/curve/{task_id}")
+async def training_curve(
+    task_id: str,
+    downsample: int = 1,
+) -> dict:
+    """
+    Returns episode-by-episode reward + score for chart rendering.
+    downsample=5 -> returns every 5th step.
+    """
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    sampled = eps[::max(1, downsample)]
+    return {
+        "task_id":      task_id,
+        "total_points": len(sampled),
+        "curve": [
+            {
+                "episode":       e["episode"],
+                "reward":        e["total_reward"],
+                "score":         e["score"],
+                "fn1_valid":     e.get("fn1_valid", 1.0),
+                "fn2_no_halluc": e.get("fn2_no_halluc", 1.0),
+                "fn3_env_score": e.get("fn3_env_score", 0.0),
+                "phase":         e["phase"],
+            }
+            for e in sampled
+        ],
+    }
+# ================================================================
+# ENDPOINT 4 - GET /training/actions/{task_id}
+# ================================================================
+@router.get("/actions/{task_id}")
+async def action_evolution(task_id: str) -> dict:
+    """Returns action distribution at 5 checkpoints across training."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    n = len(eps)
+    idxs = [0, n // 4, n // 2, 3 * n // 4, n - 1]
+    result = []
+    for idx in idxs:
+        ep = eps[idx]
+        result.append({
+            "episode": ep["episode"],
+            "phase":   ep["phase"],
+            "actions": ep.get("actions", {}),
+            "reward":  ep["total_reward"],
+            "score":   ep["score"],
+        })
+    avg_fn1_start = sum(e.get("fn1_valid", 1.0) for e in eps[:n // 4]) / max(n // 4, 1)
+    avg_fn1_end = sum(e.get("fn1_valid", 1.0) for e in eps[3 * n // 4:]) / max(n - 3 * n // 4, 1)
+    insight = (
+        f"Action validity improved from {avg_fn1_start:.1%} (early) "
+        f"to {avg_fn1_end:.1%} (final). "
+        "LLM learned to output valid government workflow JSON consistently."
+    )
+    return {
+        "task_id":     task_id,
+        "checkpoints": result,
+        "insight":     insight,
+    }
+# ================================================================
+# ENDPOINT 5 - GET /training/episode/{task_id}/{episode_num}
+# ================================================================
+@router.get("/episode/{task_id}/{episode_num}")
+async def episode_detail(task_id: str, episode_num: int) -> dict:
+    """Returns detail for one specific training step."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    if episode_num < 1 or episode_num > len(eps):
+        raise HTTPException(
+            status_code=400,
+            detail=f"episode_num must be 1-{len(eps)}. Got {episode_num}.",
+        )
+    ep = eps[episode_num - 1]
+    rewards_so_far = [e["total_reward"] for e in eps[:episode_num]]
+    scores_so_far = [e["score"] for e in eps[:episode_num]]
+    return {
+        "task_id":             task_id,
+        "episode":             ep["episode"],
+        "total_episodes":      len(eps),
+        "reward":              ep["total_reward"],
+        "score":               ep["score"],
+        "fn1_valid":           ep.get("fn1_valid", 1.0),
+        "fn2_no_halluc":       ep.get("fn2_no_halluc", 1.0),
+        "fn3_env_score":       ep.get("fn3_env_score", 0.0),
+        "phase":               ep["phase"],
+        "actions":             ep.get("actions", {}),
+        "running_best_reward": max(rewards_so_far),
+        "running_avg_score":   round(sum(scores_so_far) / len(scores_so_far), 4),
+        "message":             _phase_message(ep),
+    }
+# ================================================================
+# ENDPOINT 6 - GET /training/stream/{task_id} [SSE]
+# ================================================================
+@router.get("/stream/{task_id}")
+async def stream_training_replay(
+    task_id: str,
+    delay_ms: int = 100,
+    start_episode: int = 1,
+    end_episode: Optional[int] = None,
+) -> StreamingResponse:
+    """Server-Sent Events endpoint for animated chart replay."""
+    log = _load_log(task_id)
+    eps = log["episodes"]
+    end = min(end_episode or len(eps), len(eps))
+    subset = eps[start_episode - 1: end]
+    async def generate():
+        meta_event = json.dumps({
+            "type":             "meta",
+            "task_id":          task_id,
+            "total_episodes":   len(eps),
+            "summary":          log["summary"],
+            "reward_functions": log.get("reward_functions", {}),
+        })
+        yield f"data: {meta_event}\n\n"
+        rewards_so_far: list[float] = []
+        scores_so_far: list[float] = []
+        for ep in subset:
+            rewards_so_far.append(ep["total_reward"])
+            scores_so_far.append(ep["score"])
+            event = json.dumps({
+                "type":              "episode",
+                "episode":           ep["episode"],
+                "total_episodes":    len(eps),
+                "reward":            ep["total_reward"],
+                "score":             ep["score"],
+                "fn1_valid":         ep.get("fn1_valid",     1.0),
+                "fn2_no_halluc":     ep.get("fn2_no_halluc", 1.0),
+                "fn3_env_score":     ep.get("fn3_env_score", 0.0),
+                "phase":             ep["phase"],
+                "actions":           ep.get("actions", {}),
+                "running_best":      max(rewards_so_far),
+                "running_avg_score": round(
+                    sum(scores_so_far) / len(scores_so_far), 4
+                ),
+                "message":           _phase_message(ep),
+            })
+            yield f"data: {event}\n\n"
+            await asyncio.sleep(delay_ms / 1000.0)
+        done_event = json.dumps({
+            "type":        "done",
+            "final_score": scores_so_far[-1] if scores_so_far else 0.0,
+            "best_reward": max(rewards_so_far) if rewards_so_far else 0.0,
+            "total_steps": len(subset),
+        })
+        yield f"data: {done_event}\n\n"
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+            "Connection": "keep-alive",
+        },
+    )
+# ================================================================
+# ENDPOINT 7 - GET /training/comparison/{task_id}
+# ================================================================
+@router.get("/comparison/{task_id}")
+async def before_after_comparison(task_id: str) -> dict:
+    """Returns before (heuristic) vs after (trained LLM)."""
+    log = _load_log(task_id)
+    baseline = HEURISTIC_BASELINES.get(task_id, {})
+    summary = log["summary"]
+    bef_score = baseline.get("score", 0.0)
+    after_score = summary["last_episode_score"]
+    delta = round(after_score - bef_score, 4)
+    pct = round((delta / bef_score) * 100, 1) if bef_score else 0.0
+    return {
+        "task_id": task_id,
+        "before": {
+            "label":     "Heuristic Baseline (no AI)",
+            "score":     bef_score,
+            "reward":    baseline.get("reward",    0.0),
+            "completed": baseline.get("completed", 0),
+            "breaches":  baseline.get("breaches",  0),
+            "avg_wait":  baseline.get("avg_wait",  0.0),
+        },
+        "after": {
+            "label":               f"GRPO Trained LLM ({log.get('base_model','')})",
+            "score":               after_score,
+            "reward":              summary["last_episode_reward"],
+            "avg_fn1_valid":       summary.get("avg_fn1_valid",     0.0),
+            "avg_fn2_no_halluc":   summary.get("avg_fn2_no_halluc", 0.0),
+            "invalid_steps":       summary.get("invalid_action_steps", 0),
+            "hallucination_steps": summary.get("hallucination_steps",  0),
+        },
+        "improvement": {
+            "score_delta": delta,
+            "score_pct":   pct,
+            "verdict": (
+                "LLM significantly outperforms baseline"
+                if delta > 0.10 else
+                "LLM moderately outperforms baseline"
+                if delta > 0.0 else
+                "LLM needs more training"
+            ),
+        },
+    }

app/tasks.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""
+tasks.py — Gov Workflow OpenEnv v2.0
+Three deterministic benchmark tasks: easy, medium, hard.
+"""
+from app.models import (
+    TaskConfig, ServiceType, ScenarioMode, EventType, OfficerPool
+)
+TASK_EASY = TaskConfig(
+    task_id="district_backlog_easy",
+    display_name="District Backlog Clearance — Revenue Office",
+    difficulty="easy",
+    scenario_mode=ScenarioMode.NORMAL,
+    seed=42,
+    max_days=30,
+    enabled_services=[ServiceType.INCOME_CERTIFICATE],
+    arrival_rate_per_day={ServiceType.INCOME_CERTIFICATE: 12.0},
+    digital_intake_ratio=0.65,
+    initial_officer_pool=OfficerPool(
+        total_officers=8, available_officers=8,
+        allocated={ServiceType.INCOME_CERTIFICATE: 8},
+    ),
+    missing_docs_probability_override={ServiceType.INCOME_CERTIFICATE: 0.20},
+    field_verification_probability_override={ServiceType.INCOME_CERTIFICATE: 0.15},
+    escalation_budget=5,
+    fairness_threshold=None,
+    event_probability=0.05,
+    allowed_events=[EventType.NO_EVENT],
+)
+TASK_MEDIUM = TaskConfig(
+    task_id="mixed_urgency_medium",
+    display_name="Mixed Urgency Backlog — Taluka Office",
+    difficulty="medium",
+    scenario_mode=ScenarioMode.NORMAL,
+    seed=123,
+    max_days=45,
+    enabled_services=[
+        ServiceType.INCOME_CERTIFICATE,
+        ServiceType.LAND_REGISTRATION,
+        ServiceType.PASSPORT,
+        ServiceType.DRIVING_LICENSE,
+        ServiceType.AADHAAR_CARD,
+    ],
+    arrival_rate_per_day={
+        ServiceType.INCOME_CERTIFICATE: 8.0,
+        ServiceType.LAND_REGISTRATION:  4.0,
+        ServiceType.PASSPORT:           4.0,
+        ServiceType.DRIVING_LICENSE:    5.0,
+        ServiceType.AADHAAR_CARD:       6.0,
+    },
+    digital_intake_ratio=0.72,
+    initial_officer_pool=OfficerPool(
+        total_officers=14, available_officers=14,
+        allocated={
+            ServiceType.INCOME_CERTIFICATE: 4,
+            ServiceType.LAND_REGISTRATION:  2,
+            ServiceType.PASSPORT:           2,
+            ServiceType.DRIVING_LICENSE:    3,
+            ServiceType.AADHAAR_CARD:       3,
+        },
+    ),
+    missing_docs_probability_override=None,
+    field_verification_probability_override=None,
+    escalation_budget=8,
+    fairness_threshold=None,
+    event_probability=0.15,
+    allowed_events=[EventType.DOCUMENT_REJECTION_SPIKE],
+)
+TASK_HARD = TaskConfig(
+    task_id="cross_department_hard",
+    display_name="Cross-Department Crisis — District Collectorate",
+    difficulty="hard",
+    scenario_mode=ScenarioMode.CRISIS,
+    seed=999,
+    max_days=60,
+    enabled_services=[
+        ServiceType.INCOME_CERTIFICATE,
+        ServiceType.LAND_REGISTRATION,
+        ServiceType.PASSPORT,
+        ServiceType.DRIVING_LICENSE,
+        ServiceType.AADHAAR_CARD,
+    ],
+    arrival_rate_per_day={
+        ServiceType.INCOME_CERTIFICATE: 11.0,
+        ServiceType.LAND_REGISTRATION:  6.0,
+        ServiceType.PASSPORT:           6.0,
+        ServiceType.DRIVING_LICENSE:    7.0,
+        ServiceType.AADHAAR_CARD:       8.0,
+    },
+    digital_intake_ratio=0.80,
+    initial_officer_pool=OfficerPool(
+        total_officers=18, available_officers=18,
+        allocated={
+            ServiceType.INCOME_CERTIFICATE: 5,
+            ServiceType.LAND_REGISTRATION:  3,
+            ServiceType.PASSPORT:           3,
+            ServiceType.DRIVING_LICENSE:    3,
+            ServiceType.AADHAAR_CARD:       4,
+        },
+    ),
+    missing_docs_probability_override=None,
+    field_verification_probability_override=None,
+    escalation_budget=10,
+    fairness_threshold=0.70,
+    event_probability=0.30,
+    allowed_events=[
+        EventType.SURGE_APPLICATIONS,
+        EventType.OFFICER_UNAVAILABLE,
+        EventType.DOCUMENT_REJECTION_SPIKE,
+        EventType.REVENUE_DB_DELAY,
+        EventType.SLA_ESCALATION_ORDER,
+    ],
+)
+def make_extreme_variant(base_task: TaskConfig) -> TaskConfig:
+    variant = base_task.model_copy(deep=True)
+    variant.task_id = base_task.task_id + "_extreme"
+    variant.display_name = base_task.display_name + " [EXTREME]"
+    variant.scenario_mode = ScenarioMode.EXTREME_OVERLOAD
+    variant.event_probability = min(1.0, base_task.event_probability * 3.0)
+    variant.allowed_events = [e for e in EventType if e != EventType.NO_EVENT]
+    return variant
+TASK_REGISTRY: dict = {
+    "district_backlog_easy":         TASK_EASY,
+    "mixed_urgency_medium":          TASK_MEDIUM,
+    "cross_department_hard":         TASK_HARD,
+    "district_backlog_easy_extreme": make_extreme_variant(TASK_EASY),
+}
+def get_task(task_id: str) -> TaskConfig:
+    if task_id not in TASK_REGISTRY:
+        raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASK_REGISTRY)}")
+    return TASK_REGISTRY[task_id]
+def list_tasks() -> list:
+    return list(TASK_REGISTRY.keys())
+def list_benchmark_tasks() -> list:
+    return ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]
+TASKS = TASK_REGISTRY

app/training_jobs.py ADDED Viewed

	@@ -0,0 +1,634 @@

+from __future__ import annotations
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+import math
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Literal
+from uuid import uuid4
+from app.persistence import PersistenceStore
+Status = Literal["queued", "running", "completed", "failed", "stopped"]
+_PROGRESS_RE = re.compile(r"(\d[\d,]*)/(\d[\d,]*)")
+_METRIC_ROW_RE = re.compile(r"\|\s*([a-zA-Z0-9_ ]+?)\s*\|\s*(-?\d+(?:\.\d+)?)\s*\|")
+_EVAL_PROGRESS_RE = re.compile(
+    r"Eval\s+num_timesteps=(\d+),\s*episode_reward=([-]?\d+(?:\.\d+)?)",
+    re.IGNORECASE,
+)
+_EVAL_ROW_RE = re.compile(
+    r"^\[Eval\]\s+([a-z_]+)\s+score=([0-9.]+)\s+reward=([-0-9.]+)\s+completed=(\d+)\s+sla_breaches=(\d+)$"
+)
+_AVG_RE = re.compile(r"^\[Eval\]\s+Average grader score:\s+([0-9.]+)$")
+_BEST_GRADER_RE = re.compile(
+    r"\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)",
+    re.IGNORECASE,
+)
+def _now() -> float:
+    return time.time()
+def _tail_append(lines: list[str], line: str, max_size: int = 500) -> None:
+    lines.append(line.rstrip("\n"))
+    if len(lines) > max_size:
+        del lines[: len(lines) - max_size]
+def _normalize_metric_key(raw: str) -> str:
+    return raw.strip().lower().replace(" ", "_")
+def _parse_eval(stdout: str) -> tuple[list[dict[str, Any]], float | None]:
+    rows: list[dict[str, Any]] = []
+    avg: float | None = None
+    for line in stdout.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        row = _EVAL_ROW_RE.match(line)
+        if row:
+            rows.append(
+                {
+                    "task_id": row.group(1),
+                    "grader_score": float(row.group(2)),
+                    "total_reward": float(row.group(3)),
+                    "total_completed": int(row.group(4)),
+                    "total_sla_breaches": int(row.group(5)),
+                }
+            )
+            continue
+        m = _AVG_RE.match(line)
+        if m:
+            avg = float(m.group(1))
+    return rows, avg
+@dataclass
+class TrainingJob:
+    job_id: str
+    phase: int
+    timesteps: int
+    n_envs: int
+    seed: int
+    config_path: str
+    created_at: float = field(default_factory=_now)
+    started_at: float | None = None
+    updated_at: float = field(default_factory=_now)
+    ended_at: float | None = None
+    status: Status = "queued"
+    progress: float = 0.0
+    process_id: int | None = None
+    command: list[str] = field(default_factory=list)
+    output_model_path: str | None = None
+    output_model_name: str | None = None
+    latest_metrics: dict[str, float] = field(default_factory=dict)
+    metric_history: list[dict[str, Any]] = field(default_factory=list)
+    evaluation_rows: list[dict[str, Any]] = field(default_factory=list)
+    evaluation_avg_score: float | None = None
+    logs_tail: list[str] = field(default_factory=list)
+    error_message: str | None = None
+    return_code: int | None = None
+    process: subprocess.Popen[str] | None = field(default=None, repr=False)
+    lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
+    last_persist_at: float = field(default_factory=lambda: 0.0, repr=False)
+    def snapshot(self) -> dict[str, Any]:
+        with self.lock:
+            return {
+                "job_id": self.job_id,
+                "phase": self.phase,
+                "timesteps": self.timesteps,
+                "n_envs": self.n_envs,
+                "seed": self.seed,
+                "config_path": self.config_path,
+                "created_at": self.created_at,
+                "started_at": self.started_at,
+                "updated_at": self.updated_at,
+                "ended_at": self.ended_at,
+                "status": self.status,
+                "progress": self.progress,
+                "process_id": self.process_id,
+                "command": self.command,
+                "output_model_path": self.output_model_path,
+                "output_model_name": self.output_model_name,
+                "latest_metrics": dict(self.latest_metrics),
+                "metric_history": list(self.metric_history),
+                "evaluation_rows": list(self.evaluation_rows),
+                "evaluation_avg_score": self.evaluation_avg_score,
+                "logs_tail": list(self.logs_tail),
+                "error_message": self.error_message,
+                "return_code": self.return_code,
+            }
+class TrainingJobManager:
+    def __init__(self, repo_root: Path, persistence: PersistenceStore | None = None) -> None:
+        self._repo_root = repo_root
+        self._persistence = persistence
+        self._jobs: dict[str, TrainingJob] = {}
+        self._lock = threading.Lock()
+        self._training_runs_root = (
+            self._persistence.training_runs_dir
+            if self._persistence is not None and self._persistence.enabled
+            else self._repo_root / "results" / "training_runs"
+        )
+        self._load_persisted_jobs()
+    def _load_persisted_jobs(self) -> None:
+        if self._persistence is None or not self._persistence.enabled:
+            return
+        persisted = self._persistence.list_training_jobs(limit=500)
+        with self._lock:
+            for snap in persisted:
+                try:
+                    job = TrainingJob(
+                        job_id=str(snap["job_id"]),
+                        phase=int(snap["phase"]),
+                        timesteps=int(snap["timesteps"]),
+                        n_envs=int(snap["n_envs"]),
+                        seed=int(snap["seed"]),
+                        config_path=str(snap.get("config_path") or ""),
+                        created_at=float(snap.get("created_at") or _now()),
+                        started_at=float(snap["started_at"]) if snap.get("started_at") is not None else None,
+                        updated_at=float(snap.get("updated_at") or _now()),
+                        ended_at=float(snap["ended_at"]) if snap.get("ended_at") is not None else None,
+                        status=str(snap.get("status") or "failed"),
+                        progress=float(snap.get("progress") or 0.0),
+                        process_id=int(snap["process_id"]) if snap.get("process_id") is not None else None,
+                        command=list(snap.get("command") or []),
+                        output_model_path=snap.get("output_model_path"),
+                        output_model_name=snap.get("output_model_name"),
+                        latest_metrics=dict(snap.get("latest_metrics") or {}),
+                        metric_history=list(snap.get("metric_history") or []),
+                        evaluation_rows=list(snap.get("evaluation_rows") or []),
+                        evaluation_avg_score=(
+                            float(snap["evaluation_avg_score"])
+                            if snap.get("evaluation_avg_score") is not None
+                            else None
+                        ),
+                        logs_tail=list(snap.get("logs_tail") or []),
+                        error_message=snap.get("error_message"),
+                        return_code=int(snap["return_code"]) if snap.get("return_code") is not None else None,
+                    )
+                except Exception:
+                    continue
+                # Process handles cannot survive a server restart. Recover to terminal state.
+                if job.status in ("queued", "running"):
+                    job.status = "failed"
+                    msg = "Recovered after restart: previous process state unavailable."
+                    job.error_message = f"{job.error_message} {msg}".strip() if job.error_message else msg
+                    if job.ended_at is None:
+                        job.ended_at = _now()
+                job.process = None
+                self._jobs[job.job_id] = job
+    def clear_jobs(self, *, clear_artifacts: bool = False) -> int:
+        to_stop: list[subprocess.Popen[str]] = []
+        with self._lock:
+            removed = len(self._jobs)
+            for job in self._jobs.values():
+                with job.lock:
+                    proc = job.process
+                    if proc is not None and job.status in ("queued", "running"):
+                        to_stop.append(proc)
+            self._jobs.clear()
+        for proc in to_stop:
+            try:
+                proc.terminate()
+            except Exception:
+                pass
+        if self._persistence is not None and self._persistence.enabled:
+            self._persistence.clear_training_jobs()
+        if clear_artifacts:
+            try:
+                if self._training_runs_root.exists():
+                    shutil.rmtree(self._training_runs_root, ignore_errors=True)
+                self._training_runs_root.mkdir(parents=True, exist_ok=True)
+            except Exception:
+                pass
+        return removed
+    def _persist_job(self, job: TrainingJob) -> None:
+        if self._persistence is None or not self._persistence.enabled:
+            return
+        snapshot = job.snapshot()
+        self._persistence.upsert_training_job(snapshot)
+        with job.lock:
+            job.last_persist_at = _now()
+    def list_jobs(self) -> list[dict[str, Any]]:
+        with self._lock:
+            jobs = list(self._jobs.values())
+        jobs.sort(key=lambda x: x.created_at, reverse=True)
+        return [job.snapshot() for job in jobs]
+    def get_job(self, job_id: str) -> dict[str, Any] | None:
+        with self._lock:
+            job = self._jobs.get(job_id)
+        return None if job is None else job.snapshot()
+    def start_job(
+        self,
+        *,
+        phase: int,
+        timesteps: int,
+        n_envs: int,
+        seed: int | None,
+        config_path: str | None,
+    ) -> dict[str, Any]:
+        job_id = str(uuid4())
+        job_seed = int(seed if seed is not None else int(time.time()) % 1_000_000)
+        cfg = config_path or (
+            "rl/configs/ppo_easy.yaml" if phase == 1 else "rl/configs/curriculum.yaml"
+        )
+        job = TrainingJob(
+            job_id=job_id,
+            phase=phase,
+            timesteps=timesteps,
+            n_envs=n_envs,
+            seed=job_seed,
+            config_path=cfg,
+        )
+        with self._lock:
+            self._jobs[job_id] = job
+        cmd = [
+            sys.executable,
+            "-u",
+            "-m",
+            "rl.train_ppo",
+            "--phase",
+            str(phase),
+            "--timesteps",
+            str(timesteps),
+            "--n-envs",
+            str(n_envs),
+            "--seed",
+            str(job_seed),
+        ]
+        if phase == 1:
+            # Keep Phase 1 UI responsive by emitting multiple eval checkpoints
+            # across the requested run length instead of only near the end.
+            phase1_eval_freq = max(128, int((timesteps / max(n_envs, 1)) / 15))
+            cmd.extend(
+                [
+                    "--phase1-config",
+                    cfg,
+                    "--phase1-eval-freq",
+                    str(phase1_eval_freq),
+                ]
+            )
+        else:
+            cmd.extend(["--phase2-config", cfg])
+        env = os.environ.copy()
+        env["PYTHONUNBUFFERED"] = "1"
+        proc = subprocess.Popen(
+            cmd,
+            cwd=str(self._repo_root),
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+        with job.lock:
+            job.command = cmd
+            job.status = "running"
+            job.started_at = _now()
+            job.updated_at = _now()
+            job.process_id = proc.pid
+            job.process = proc
+            _tail_append(job.logs_tail, f"[training_jobs] started pid={proc.pid}")
+            _tail_append(job.logs_tail, f"[training_jobs] command: {' '.join(cmd)}")
+        self._persist_job(job)
+        t = threading.Thread(target=self._watch_job, args=(job,), daemon=True)
+        t.start()
+        return job.snapshot()
+    @staticmethod
+    def _append_metric_point_locked(
+        job: TrainingJob,
+        *,
+        timesteps: float | None,
+        reward: float | None = None,
+        score: float | None = None,
+        source: str | None = None,
+        max_points: int = 5000,
+    ) -> None:
+        """
+        Append (or merge) a structured metric point while holding job.lock.
+        """
+        if timesteps is None or not math.isfinite(float(timesteps)):
+            return
+        payload: dict[str, Any] = {"t": float(timesteps)}
+        if reward is not None and math.isfinite(float(reward)):
+            payload["ep_rew_mean"] = float(reward)
+        if score is not None and math.isfinite(float(score)):
+            payload["grader_score"] = float(score)
+        if source:
+            payload["source"] = str(source)
+        if "ep_rew_mean" not in payload and "grader_score" not in payload:
+            return
+        if job.metric_history and float(job.metric_history[-1].get("t", -1.0)) == float(payload["t"]):
+            job.metric_history[-1].update(payload)
+        else:
+            job.metric_history.append(payload)
+        if len(job.metric_history) > max_points:
+            del job.metric_history[: len(job.metric_history) - max_points]
+    def stop_job(self, job_id: str) -> dict[str, Any] | None:
+        with self._lock:
+            job = self._jobs.get(job_id)
+        if job is None:
+            return None
+        with job.lock:
+            proc = job.process
+            if proc is None or job.status not in ("running", "queued"):
+                return job.snapshot()
+            job.status = "stopped"
+            job.updated_at = _now()
+        self._persist_job(job)
+        try:
+            proc.terminate()
+        except Exception:
+            pass
+        return job.snapshot()
+    def delete_job(self, job_id: str, *, clear_artifacts: bool = False) -> bool:
+        with self._lock:
+            job = self._jobs.pop(job_id, None)
+        if job is None:
+            return False
+        with job.lock:
+            proc = job.process
+            status = job.status
+            output_model_path = job.output_model_path
+        if proc is not None and status in ("queued", "running"):
+            try:
+                proc.terminate()
+            except Exception:
+                pass
+        if self._persistence is not None and self._persistence.enabled:
+            self._persistence.delete_training_job(job_id)
+        if clear_artifacts and output_model_path:
+            try:
+                out = Path(output_model_path)
+                if out.exists() and out.is_file():
+                    out.unlink(missing_ok=True)
+                parent = out.parent
+                if parent.exists() and parent.is_dir() and not any(parent.iterdir()):
+                    parent.rmdir()
+            except Exception:
+                pass
+        return True
+    def _watch_job(self, job: TrainingJob) -> None:
+        proc = job.process
+        if proc is None or proc.stdout is None:
+            with job.lock:
+                job.status = "failed"
+                job.error_message = "Training process failed to start."
+                job.updated_at = _now()
+                job.ended_at = _now()
+            self._persist_job(job)
+            return
+        for line in proc.stdout:
+            self._update_from_line(job, line)
+        return_code = proc.wait()
+        with job.lock:
+            job.return_code = int(return_code)
+            if job.status == "stopped":
+                job.ended_at = _now()
+                job.updated_at = _now()
+                job.process = None
+                return
+            if return_code == 0:
+                job.status = "completed"
+                job.progress = 1.0
+            else:
+                job.status = "failed"
+                base_error = f"Training exited with code {return_code}."
+                if not job.logs_tail:
+                    _tail_append(
+                        job.logs_tail,
+                        "[training_jobs] Process ended before producing logs. "
+                        "Check RL dependencies/environment and training command arguments.",
+                    )
+                job.error_message = base_error
+            job.ended_at = _now()
+            job.updated_at = _now()
+            job.process = None
+        self._persist_job(job)
+        if return_code == 0:
+            self._finalize_artifacts(job)
+    def _update_from_line(self, job: TrainingJob, line: str) -> None:
+        line = line.rstrip("\n")
+        should_persist = False
+        with job.lock:
+            _tail_append(job.logs_tail, line)
+            job.updated_at = _now()
+            p = _PROGRESS_RE.search(line)
+            if p:
+                num = int(p.group(1).replace(",", ""))
+                den = int(p.group(2).replace(",", ""))
+                if den > 0:
+                    job.progress = max(0.0, min(1.0, num / den))
+            ep = _EVAL_PROGRESS_RE.search(line)
+            if ep:
+                ts = int(ep.group(1))
+                rew = float(ep.group(2))
+                job.latest_metrics["total_timesteps"] = float(ts)
+                job.latest_metrics["ep_rew_mean"] = rew
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=float(ts),
+                    reward=rew,
+                    source="eval_progress",
+                )
+                if job.timesteps > 0:
+                    job.progress = max(0.0, min(1.0, ts / float(job.timesteps)))
+            m = _METRIC_ROW_RE.search(line)
+            if m:
+                key = _normalize_metric_key(m.group(1))
+                val = float(m.group(2))
+                interesting = {
+                    "total_timesteps",
+                    "ep_rew_mean",
+                    "ep_len_mean",
+                    "grader_score",
+                    "mean_reward",
+                    "mean_ep_length",
+                    "episode_mean_sla_penalty",
+                    "episode_mean_fairness_penalty",
+                    "explained_variance",
+                    "approx_kl",
+                }
+                if key in interesting:
+                    job.latest_metrics[key] = val
+                    current_ts = job.latest_metrics.get("total_timesteps")
+                    if key == "total_timesteps":
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=val,
+                            reward=job.latest_metrics.get("ep_rew_mean"),
+                            score=job.latest_metrics.get("grader_score") or job.latest_metrics.get("avg_grader_score"),
+                            source="metrics_row_ts",
+                        )
+                    elif key in {"ep_rew_mean", "mean_reward"}:
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=float(current_ts) if current_ts is not None else None,
+                            reward=val,
+                            source="metrics_row_reward",
+                        )
+                    elif key in {"grader_score", "avg_grader_score"}:
+                        self._append_metric_point_locked(
+                            job,
+                            timesteps=float(current_ts) if current_ts is not None else None,
+                            score=val,
+                            source="metrics_row_score",
+                        )
+            best = _BEST_GRADER_RE.search(line)
+            if best:
+                score = float(best.group(1))
+                job.latest_metrics["grader_score"] = score
+                fallback_ts = (
+                    float(job.latest_metrics.get("total_timesteps"))
+                    if "total_timesteps" in job.latest_metrics
+                    else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
+                )
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                    score=score,
+                    source="best_grader",
+                )
+            avg_line = _AVG_RE.match(line.strip())
+            if avg_line:
+                avg_score = float(avg_line.group(1))
+                job.latest_metrics["avg_grader_score"] = avg_score
+                fallback_ts = (
+                    float(job.latest_metrics.get("total_timesteps"))
+                    if "total_timesteps" in job.latest_metrics
+                    else float(job.metric_history[-1]["t"]) if job.metric_history else 0.0
+                )
+                self._append_metric_point_locked(
+                    job,
+                    timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                    score=avg_score,
+                    source="avg_grader",
+                )
+            if job.updated_at - job.last_persist_at >= 1.5:
+                should_persist = True
+        if should_persist:
+            self._persist_job(job)
+    def _finalize_artifacts(self, job: TrainingJob) -> None:
+        src_name = "phase1_final.zip" if job.phase == 1 else "phase2_final.zip"
+        src = self._repo_root / "results" / "best_model" / src_name
+        run_dir = self._training_runs_root / job.job_id
+        run_dir.mkdir(parents=True, exist_ok=True)
+        # Keep a mirror under repo/results for local developer convenience.
+        mirror_dir = self._repo_root / "results" / "training_runs" / job.job_id
+        if mirror_dir != run_dir:
+            mirror_dir.mkdir(parents=True, exist_ok=True)
+        if src.exists():
+            ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+            unique_name = f"phase{job.phase}_seed{job.seed}_{ts}_{job.job_id[:8]}.zip"
+            out = run_dir / unique_name
+            shutil.copy2(src, out)
+            if mirror_dir != run_dir:
+                try:
+                    shutil.copy2(src, mirror_dir / unique_name)
+                except Exception:
+                    pass
+            with job.lock:
+                job.output_model_path = str(out.resolve())
+                job.output_model_name = unique_name
+                job.updated_at = _now()
+            model_type = "maskable"
+            eval_cmd = [
+                sys.executable,
+                "-m",
+                "rl.evaluate",
+                "--model",
+                str(out),
+                "--episodes",
+                "3",
+                "--model-type",
+                model_type,
+            ]
+            proc = subprocess.run(
+                eval_cmd,
+                cwd=str(self._repo_root),
+                env=os.environ.copy(),
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            rows, avg = _parse_eval(proc.stdout or "")
+            with job.lock:
+                job.evaluation_rows = rows
+                job.evaluation_avg_score = avg
+                if avg is not None:
+                    job.latest_metrics["avg_grader_score"] = float(avg)
+                    fallback_ts = (
+                        float(job.latest_metrics.get("total_timesteps"))
+                        if "total_timesteps" in job.latest_metrics
+                        else float(job.timesteps)
+                    )
+                    self._append_metric_point_locked(
+                        job,
+                        timesteps=fallback_ts if fallback_ts > 0 else float(len(job.metric_history) + 1),
+                        score=float(avg),
+                        source="final_eval_avg",
+                    )
+                _tail_append(job.logs_tail, "----- EVALUATION -----")
+                for ln in (proc.stdout or "").splitlines():
+                    _tail_append(job.logs_tail, ln)
+                if proc.returncode != 0 and not job.error_message:
+                    job.error_message = f"Evaluation exited with code {proc.returncode}."
+                job.updated_at = _now()
+            self._persist_job(job)
+        else:
+            self._persist_job(job)

app/utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""
+utils.py — Shared pure-function helpers.
+No imports from env.py or simulator.py (prevents circular imports).
+"""
+from __future__ import annotations
+from app.models import ServiceType
+def completion_fairness_gap(
+    arrived_by_service: dict,
+    completed_by_service: dict,
+) -> float:
+    """
+    Fairness gap = max completion rate difference across services.
+    Returns 0.0 if only one service, 1.0 if perfectly unfair.
+    """
+    rates = []
+    for svc in arrived_by_service:
+        arrived   = arrived_by_service.get(svc, 0)
+        completed = completed_by_service.get(svc, 0)
+        if arrived > 0:
+            rates.append(completed / arrived)
+    if len(rates) < 2:
+        return 0.0
+    return round(max(rates) - min(rates), 4)

audit.py ADDED Viewed

	@@ -0,0 +1,367 @@

+import os
+import sys
+import json
+import inspect
+import requests
+import numpy as np
+import yaml
+import gymnasium as gym
+from stable_baselines3.common.env_checker import check_env
+from sb3_contrib import MaskablePPO
+def print_result(check_num, desc, status, detail=""):
+    print(f"[CHECK {check_num}] {desc}\nSTATUS: {status}\nDETAIL: {detail}\n")
+# B1
+try:
+    from app.models import (
+        ServiceType, StageType, PriorityMode, ActionType,
+        OfficerPool, QueueSnapshot, ObservationModel, ActionModel,
+        RewardModel, EpisodeStateModel, StepInfoModel,
+        SimulationConfig, TaskConfig, GraderResult,
+        BenchmarkResult, LiveRunResult, EpisodeMetrics
+    )
+    print_result("B1", "All 17 Schemas Present", "PASS", "All 17 names resolve")
+except Exception as e:
+    print_result("B1", "All 17 Schemas Present", "FAIL", str(e))
+# B2
+try:
+    fields = QueueSnapshot.model_fields
+    assert 'total_pending' in fields, "total_pending missing"
+    assert 'blocked_missing_docs' in fields, "blocked_missing_docs missing"
+    assert 'active_cases' not in fields, "legacy field active_cases found"
+    assert 'missing_docs_cases' not in fields, "legacy field found"
+    m_fields = EpisodeMetrics.model_fields
+    assert 'total_invalid_actions' in m_fields, "total_invalid_actions missing"
+    print_result("B2", "Canonical Field Name Verification", "PASS", "Fields verified")
+except Exception as e:
+    print_result("B2", "Canonical Field Name Verification", "FAIL", str(e))
+# B3
+try:
+    from app.simulator import SimulationAgentMode
+    assert hasattr(SimulationAgentMode, 'BASELINE_POLICY'), "BASELINE_POLICY missing"
+    assert hasattr(SimulationAgentMode, 'RANDOM'), "RANDOM missing"
+    assert hasattr(SimulationAgentMode, 'LLM_AGENT'), "LLM_AGENT missing"
+    assert hasattr(SimulationAgentMode, 'HEURISTIC'), "HEURISTIC missing"
+    try:
+        _ = SimulationAgentMode.baseline_policy
+        print_result("B3", "Enum Casing Check", "FAIL", "lowercase alias exists")
+    except AttributeError:
+        print_result("B3", "Enum Casing Check", "PASS", "No lowercase alias")
+except Exception as e:
+    print_result("B3", "Enum Casing Check", "FAIL", str(e))
+# C1
+try:
+    from app.env import GovWorkflowEnv
+    env = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, info = env.reset(seed=42)
+    assert isinstance(obs, dict), f"obs is {type(obs)}, expected dict"
+    assert isinstance(info, dict), f"info is {type(info)}, expected dict"
+    assert len(obs) > 0, "empty observation"
+    print_result("C1", "reset() Returns (observation, info)", "PASS", "Valid dicts returned")
+except Exception as e:
+    print_result("C1", "reset() Returns (observation, info)", "FAIL", str(e))
+# C2
+try:
+    from app.models import ActionModel, ActionType
+    action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+    result = env.step(action)
+    assert len(result) == 5, f"step() returned {len(result)} values, expected 5"
+    obs2, reward, terminated, truncated, info2 = result
+    assert isinstance(reward, float), f"reward type {type(reward)}"
+    assert isinstance(terminated, bool), "terminated not bool"
+    assert isinstance(truncated, bool), "truncated not bool"
+    print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "PASS", "Valid step signature")
+except Exception as e:
+    print_result("C2", "step() Returns (obs, reward, terminated, truncated, info)", "FAIL", str(e))
+# C3 (Skipping dictionary check since MaskablePPO actually uses rl.gov_workflow_env for gym.Env spaces, doing that in J instead)
+# Wait, let's just check the wrapper.
+try:
+    from rl.gov_workflow_env import GovWorkflowGymEnv
+    genv = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
+    gobs, _ = genv.reset(seed=42)
+    def check_dtype(obs_dict, path="obs"):
+        for k, v in obs_dict.items():
+            if isinstance(v, np.ndarray):
+                assert v.dtype == np.float32 or v.dtype == np.int64, f"FAIL: {path}.{k} dtype={v.dtype}"
+            elif isinstance(v, dict):
+                check_dtype(v, f"{path}.{k}")
+    check_dtype(gobs)
+    print_result("C3", "Observation Space Dtype (SB3 Requirement)", "PASS", "Wrapper dict is fine")
+except Exception as e:
+    print_result("C3", "Observation Space Dtype (SB3 Requirement)", "FAIL", str(e))
+# C4
+try:
+    env1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    env2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs1, _ = env1.reset(seed=42)
+    obs2, _ = env2.reset(seed=42)
+    # Strip volatile message field before comparison (as in tests)
+    obs1.last_action_explanation = ""
+    obs2.last_action_explanation = ""
+    obs1.episode_id = ""
+    obs2.episode_id = ""
+    assert json.dumps(obs1.model_dump(), sort_keys=True, default=str) == json.dumps(obs2.model_dump(), sort_keys=True, default=str), "Different observations"
+    print_result("C4", "Determinism Check", "PASS", "Observations match")
+except Exception as e:
+    print_result("C4", "Determinism Check", "FAIL", str(e))
+# C5
+try:
+    env_c5 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_c5.reset(seed=42)
+    terminated = False
+    truncated = False
+    steps = 0
+    max_steps = 500
+    while not (terminated or truncated) and steps < max_steps:
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+        obs, reward, terminated, truncated, info = env_c5.step(action)
+        steps += 1
+    assert terminated or truncated, f"episode never ended after {max_steps} steps"
+    print_result("C5", "Episode Termination Check", "PASS", f"ended at step {steps}")
+except Exception as e:
+    print_result("C5", "Episode Termination Check", "FAIL", str(e))
+# D1
+try:
+    env_d1 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_d1.reset(seed=42)
+    rewards = []
+    for _ in range(20):
+        action = ActionModel(action_type=ActionType.ADVANCE_TIME)
+        obs, reward, term, trunc, info = env_d1.step(action)
+        rewards.append(reward)
+        if term or trunc: break
+    nonzero = sum(1 for r in rewards if abs(r) > 1e-6)
+    assert nonzero > len(rewards) * 0.5, f"Only {nonzero}/{len(rewards)} steps had nonzero reward"
+    print_result("D1", "Reward is Dense", "PASS", f"{nonzero}/{len(rewards)} steps nonzero")
+except Exception as e:
+    print_result("D1", "Reward is Dense", "FAIL", str(e))
+# D2
+try:
+    for r in rewards:
+        assert -100 <= r <= 100, f"reward {r} outside [-100, 100]"
+    print_result("D2", "Reward Range Sanity Check", "PASS", "Rewards in bounds")
+except Exception as e:
+    print_result("D2", "Reward Range Sanity Check", "FAIL", str(e))
+# D3
+try:
+    from app.models import ServiceType
+    env_d3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs, _ = env_d3.reset(seed=42)
+    # Using a valid enum but perhaps invalid context to cause penalty
+    # The framework doesn't allow 'nonexistent' string if it's an Enum, so let's use valid enum but no cases.
+    bad_action = ActionModel(action_type=ActionType.ESCALATE_SERVICE, service_target=ServiceType.PASSPORT)
+    obs, reward, term, trunc, info = env_d3.step(bad_action)
+    assert reward <= 0, f"invalid action produced positive reward {reward}"
+    print_result("D3", "Invalid Action Penalty Fires", "PASS", f"reward={reward:.3f}")
+except Exception as e:
+    print_result("D3", "Invalid Action Penalty Fires", "FAIL", str(e))
+# E1
+try:
+    from app.tasks import get_task
+    for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        cfg = get_task(task_id)
+        assert cfg.seed is not None, f"{task_id} has no seed"
+        assert cfg.max_days > 0, f"{task_id} max_days={cfg.max_days}"
+    print_result("E1", "All 3 Tasks Loadable", "PASS", "All config loaded")
+except Exception as e:
+    print_result("E1", "All 3 Tasks Loadable", "FAIL", str(e))
+# E2
+try:
+    from app.graders import grade_episode
+    for task_id in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        env_e2 = GovWorkflowEnv(task_id=task_id, seed=42)
+        obs, _ = env_e2.reset(seed=42)
+        terminated = truncated = False
+        while not (terminated or truncated):
+            obs, reward, terminated, truncated, info = env_e2.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
+        episode_state = env_e2.state()
+        score_res = grade_episode(episode_state)
+        assert isinstance(score_res.score, float), f"grader returned {type(score_res.score)}"
+        assert 0.0 <= score_res.score <= 1.0, f"score={score_res.score} outside [0.0, 1.0]"
+    print_result("E2", "Graders Return [0.0, 1.0]", "PASS", "Valid scores returned")
+except Exception as e:
+    print_result("E2", "Graders Return [0.0, 1.0]", "FAIL", str(e))
+# E3
+try:
+    scores = []
+    for _ in range(2):
+        env_e3 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+        obs, _ = env_e3.reset(seed=42)
+        terminated = truncated = False
+        while not (terminated or truncated):
+            obs, r, terminated, truncated, info = env_e3.step(ActionModel(action_type=ActionType.ADVANCE_TIME))
+        scores.append(grade_episode(env_e3.state()).score)
+    assert scores[0] == scores[1], f"grader is non-deterministic: {scores}"
+    print_result("E3", "Grader Scores Are Deterministic", "PASS", f"score={scores[0]:.4f} both runs")
+except Exception as e:
+    print_result("E3", "Grader Scores Are Deterministic", "FAIL", str(e))
+# F1
+try:
+    from app.state_machine import StateMachine, StageType, WorkflowAction
+    sm = StateMachine()
+    stages = [StageType.SUBMISSION, StageType.DOCUMENT_VERIFICATION, StageType.FIELD_VERIFICATION, StageType.APPROVAL, StageType.ISSUANCE]
+    for i in range(len(stages) - 1):
+        current = stages[i]
+        next_stage = stages[i + 1]
+        result = sm.transition(current, WorkflowAction.ADVANCE)
+        assert result == next_stage, f"{current} -> {result}, expected {next_stage}"
+    print_result("F1", "All Legal Transitions Work", "PASS", "Transitions validated")
+except Exception as e:
+    print_result("F1", "All Legal Transitions Work", "FAIL", str(e))
+# F2
+try:
+    assert sm.is_terminal(StageType.ISSUANCE) == True, "issuance not recognized as terminal"
+    assert sm.is_terminal(StageType.SUBMISSION) == False, "submission wrongly marked terminal"
+    print_result("F2", "Terminal State Recognized", "PASS", "Terminal states correct")
+except Exception as e:
+    print_result("F2", "Terminal State Recognized", "FAIL", str(e))
+# G1
+try:
+    import app.simulator as sim_module
+    source = inspect.getfile(sim_module.LiveSimulationSession)
+    assert 'engine' in source.lower(), f"LiveSimulationSession defined in {source}, not engine.py"
+    print_result("G1", "simulator.py Is a Pure Shim", "PASS", "Shim logic confirmed")
+except Exception as e:
+    print_result("G1", "simulator.py Is a Pure Shim", "FAIL", str(e))
+# G2
+try:
+    from app.simulator import LiveSimulationSession, SimulationAgentMode, run_simulation
+    assert callable(run_simulation), "run_simulation not callable"
+    assert callable(LiveSimulationSession), "LiveSimulationSession not callable"
+    print_result("G2", "All 3 Engine Exports Importable", "PASS", "Exports valid")
+except Exception as e:
+    print_result("G2", "All 3 Engine Exports Importable", "FAIL", str(e))
+# G3
+try:
+    session = LiveSimulationSession(
+        task_id="district_backlog_easy",
+        agent_mode=SimulationAgentMode.BASELINE_POLICY,
+        seed=42,
+        max_steps=10
+    )
+    start_info = session.start_line()
+    assert isinstance(start_info, str), "start_line() did not return str"
+    step_result, _, _ = session.step_once()
+    assert "observation" in step_result, "step_once missing 'observation'"
+    assert "reward" in step_result, "step_once missing 'reward'"
+    print_result("G3", "LiveSimulationSession Full Lifecycle", "PASS", "Lifecycle valid")
+    session.close()
+except Exception as e:
+    print_result("G3", "LiveSimulationSession Full Lifecycle", "FAIL", str(e))
+# H2 / H3
+# We will do H checks via curl/pytest in bash to test the live server.
+# I1
+try:
+    from app.baselines import (
+        random_policy,
+        backlog_clearance_policy as baseline_policy,
+        greedy_sla_policy,
+        fairness_aware_policy,
+    )
+    for name, fn in [
+        ("random_policy", random_policy),
+        ("baseline_policy", baseline_policy),
+        ("greedy_sla_policy", greedy_sla_policy),
+        ("fairness_aware_policy", fairness_aware_policy),
+    ]:
+        assert callable(fn), f"{name} is not callable"
+    print_result("I1", "All 4 Policies Are Callable", "PASS", "Policies callable")
+except Exception as e:
+    print_result("I1", "All 4 Policies Are Callable", "FAIL", str(e))
+# I2
+try:
+    from app.baselines import greedy_sla_policy
+    env_i2 = GovWorkflowEnv(task_id="district_backlog_easy", seed=42)
+    obs_i2, _ = env_i2.reset(seed=42)
+    action_i2 = greedy_sla_policy(obs_i2)
+    assert isinstance(action_i2, ActionModel), f"policy returned {type(action_i2)}"
+    print_result("I2", "Policy Returns Valid Action", "PASS", f"action_type={action_i2.action_type}")
+except Exception as e:
+    print_result("I2", "Policy Returns Valid Action", "FAIL", str(e))
+# J1
+try:
+    env_j1 = GovWorkflowGymEnv(task_id="district_backlog_easy", seed=42)
+    assert hasattr(env_j1, 'observation_space'), "no observation_space"
+    assert hasattr(env_j1, 'action_space'), "no action_space"
+    print_result("J1", "Gymnasium API Compliance", "PASS", "Spaces defined")
+except Exception as e:
+    print_result("J1", "Gymnasium API Compliance", "FAIL", str(e))
+# J2
+try:
+    obs, _ = env_j1.reset(seed=42)
+    assert hasattr(env_j1, 'action_masks'), "action_masks() method missing"
+    masks = env_j1.action_masks()
+    assert hasattr(masks, '__len__'), "action_masks() must return array-like"
+    assert len(masks) == env_j1.action_space.n, f"mask length {len(masks)} != action_space.n {env_j1.action_space.n}"
+    print_result("J2", "action_masks() Method Required by MaskablePPO", "PASS", f"n={len(masks)}")
+except Exception as e:
+    print_result("J2", "action_masks() Method Required by MaskablePPO", "FAIL", str(e))
+# J3
+try:
+    check_env(env_j1, warn=True)
+    print_result("J3", "SB3 VecEnv Compatibility", "PASS", "check_env passed")
+except Exception as e:
+    print_result("J3", "SB3 VecEnv Compatibility", "FAIL", str(e))
+# J4
+try:
+    model = MaskablePPO("MlpPolicy", env_j1, verbose=0, seed=42)
+    print_result("J4", "MaskablePPO Can Initialize", "PASS", "Model initialized")
+except Exception as e:
+    print_result("J4", "MaskablePPO Can Initialize", "FAIL", str(e))
+# J5
+try:
+    obs, _ = env_j1.reset(seed=42)
+    for step in range(10):
+        masks = env_j1.action_masks()
+        valid_actions = [i for i, m in enumerate(masks) if m]
+        action = valid_actions[0] if valid_actions else 0
+        obs, reward, terminated, truncated, info = env_j1.step(action)
+        if terminated or truncated:
+            obs, _ = env_j1.reset(seed=42)
+    print_result("J5", "10-Step Rollout Without Crash", "PASS", "Rollout passed")
+except Exception as e:
+    print_result("J5", "10-Step Rollout Without Crash", "FAIL", str(e))
+# M1
+try:
+    with open("openenv.yaml", "r") as f:
+        config = yaml.safe_load(f)
+    assert "tasks" in config, "openenv.yaml missing 'tasks' key"
+    task_ids = [t["id"] for t in config["tasks"]]
+    for required in ["district_backlog_easy", "mixed_urgency_medium", "cross_department_hard"]:
+        assert required in task_ids, f"{required} missing from openenv.yaml"
+    print_result("M1", "YAML Loads and Contains All 3 Tasks", "PASS", f"{len(task_ids)} tasks registered")
+except Exception as e:
+    print_result("M1", "YAML Loads and Contains All 3 Tasks", "FAIL", str(e))

baseline_openai.py ADDED Viewed

	@@ -0,0 +1,983 @@

+from __future__ import annotations
+# ── Path bootstrap ──────────────────────────────────────────────────────────
+import sys
+from pathlib import Path
+_ROOT = Path(__file__).resolve().parent
+if str(_ROOT) not in sys.path:
+    sys.path.insert(0, str(_ROOT))
+# ── Load .env ────────────────────────────────────────────────────────────────
+from dotenv import load_dotenv
+load_dotenv(dotenv_path=_ROOT / ".env", override=False)
+import argparse
+import json
+import os
+import random as _random
+import re
+import time
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from typing import Any
+from app.env import GovWorkflowEnv
+from app.models import (
+    ActionModel,
+    ActionType,
+    ObservationModel,
+    PriorityMode,
+    ServiceType,
+    StepInfoModel,
+)
+from app.tasks import get_task, list_tasks
+from app.api_gateway import create_env_gateway, TransportMode
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 1 — Model Registry & Per-Task Pools
+# ══════════════════════════════════════════════════════════════════════════════
+NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1"
+# ── Global 10-Model Sequential Pool (April 2026 — Verified on NVIDIA NIM) ────
+#
+# CHANGES FROM PREVIOUS VERSION:
+#   REMOVED (invalid/unavailable IDs):
+#     qwen/qwen3-next-80b-a3b-instruct     → invalid model ID
+#     moonshotai/kimi-k2-instruct-0905     → not on NVIDIA NIM
+#     deepseek-ai/deepseek-v3.2            → wrong ID (use deepseek-v3)
+#     google/gemma-3-27b-it               → outdated (gemma-4 released)
+#     mistralai/mixtral-8x22b-instruct-v0.1 → replaced by newer models
+#   ADDED (verified April 2026):
+#     deepseek-ai/deepseek-v4-flash        → FREE endpoint, 1M context
+#     deepseek-ai/deepseek-r1             → reasoning, 685B MoE
+#     nvidia/nemotron-3-super-120b-a12b   → hybrid Mamba-Transformer, 1M ctx
+#     minimaxai/minimax-m2.7             → FREE endpoint, 230B
+#     google/gemma-4-31b-it             → latest Gemma on NVIDIA NIM
+#     qwen/qwen3.5-122b-a10b            → latest Qwen on NVIDIA NIM
+GLOBAL_MODEL_POOL: list[str] = [
+    "meta/llama-3.3-70b-instruct",          # 1. Primary
+    "deepseek-ai/deepseek-v4-flash",         # 2. FREE endpoint — 1M context
+    "deepseek-ai/deepseek-r1",              # 3. Reasoning — 685B MoE
+    "nvidia/nemotron-3-super-120b-a12b",    # 4. NVIDIA native — 1M ctx
+    "qwen/qwen3.5-122b-a10b",              # 5. Qwen3.5 — tool calling
+    "deepseek-ai/deepseek-v3",             # 6. DeepSeek V3 — hybrid mode
+    "minimaxai/minimax-m2.7",             # 7. FREE endpoint — 230B
+    "google/gemma-4-31b-it",             # 8. Dense 31B — agentic workflows
+    "microsoft/phi-4-mini-instruct",     # 9. Reliable small — last resort
+    "meta/llama-3.1-8b-instruct",       # 10. Fastest safety fallback
+]
+# ── Free endpoint pool (KEY 2 — NVIDIA_API_KEY_2 fallback) ───────────────────
+FREE_POOL: list[str] = [
+    "deepseek-ai/deepseek-v4-flash",
+    "minimaxai/minimax-m2.7",
+    "microsoft/phi-4-mini-instruct",
+    "meta/llama-3.1-8b-instruct",
+]
+# ── Fixed seeds ────────────────────────────────────────────────────────────────
+TASK_SEEDS: dict[str, int] = {
+    "district_backlog_easy": 11,
+    "mixed_urgency_medium":  22,
+    "cross_department_hard": 33,
+}
+LLM_TEMPERATURE = 0.2
+LLM_TOP_P       = 0.7
+LLM_MAX_TOKENS  = 512
+MAX_LLM_STEPS   = 80
+LLM_CALL_DELAY  = float(os.environ.get("LLM_CALL_DELAY", "12.0"))
+LLM_CALL_JITTER = 1.0
+# ── Enum fields that MUST be lowercase for Pydantic StrEnum ──────────────────
+_ENUM_FIELDS = {"action_type", "priority_mode", "service", "target_service"}
+# ── Canonical field names (Phase 2 update — do NOT use legacy names) ─────────
+#   CORRECT                        WRONG (legacy)
+#   snap.blocked_missing_docs  ←   snap.missing_docs_cases
+#   snap.total_pending         ←   snap.active_cases
+#   obs.fairness_gap           ←   obs.fairness_index
+# ═══════════════════════════════════════════════════════════════���══════════════
+# SECTION 2 — Model Rotator
+# ══════════════════════════════════════════════════════════════════════════════
+class ModelRotator:
+    def __init__(self, task_id: str) -> None:
+        self._sequence: list[str] = GLOBAL_MODEL_POOL.copy()
+        self._index = 0
+        self._task_id = task_id
+        self._rotation_log: list[dict[str, str]] = []
+    @property
+    def current(self) -> str:
+        return self._sequence[self._index]
+    @property
+    def current_key_id(self) -> int:
+        return 2 if self.current in FREE_POOL else 1
+    @property
+    def pool_exhausted(self) -> bool:
+        return len(self._rotation_log) >= 50
+    def rotate(self, reason: str = "error") -> str | None:
+        old = self.current
+        self._rotation_log.append({"from": old, "reason": reason})
+        self._index = (self._index + 1) % len(self._sequence)
+        new = self._sequence[self._index]
+        print(
+            f"\n  🔄 Model rotated: "
+            f"{old.split('/')[-1]}  →  {new.split('/')[-1]}  ({reason})"
+        )
+        return new
+    def summary(self) -> list[dict]:
+        return list(self._rotation_log)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 3 — Result Dataclasses
+# ══════════════════════════════════════════════════════════════════════════════
+@dataclass
+class StepRecord:
+    step: int
+    day: int
+    action_type: str
+    reward: float
+    invalid: bool
+    total_backlog: int
+    total_completed: int
+    model_used: str
+    notes: list[str]
+@dataclass
+class EpisodeResult:
+    task_id: str
+    agent: str
+    primary_model: str
+    seed: int
+    score: float
+    grader_name: str
+    total_steps: int
+    total_reward: float
+    total_completed: int
+    total_sla_breaches: int
+    total_invalid_actions: int
+    final_day: int
+    terminated: bool
+    truncated: bool
+    grader_metrics: dict[str, float]
+    step_log: list[StepRecord]
+    elapsed_seconds: float
+    model_rotations: list[dict]
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    def summary(self) -> str:
+        usage: dict[str, int] = {}
+        for r in self.step_log:
+            usage[r.model_used] = usage.get(r.model_used, 0) + 1
+        usage_str = ", ".join(
+            f"{m.split('/')[-1]} ({c})" for m, c in usage.items()
+        )
+        return (
+            f"[{self.task_id}] agent={self.agent} "
+            f"score={self.score:.3f} reward={self.total_reward:.2f} "
+            f"completed={self.total_completed} breaches={self.total_sla_breaches} "
+            f"invalid={self.total_invalid_actions} "
+            f"rotations={len(self.model_rotations)} "
+            f"day={self.final_day} steps={self.total_steps} "
+            f"time={self.elapsed_seconds:.1f}s\n"
+            f"  Model usage: {usage_str}"
+        )
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 4 — Direct Environment Wrapper
+# ══════════════════════════════════════════════════════════════════════════════
+class DirectEnvClient:
+    """
+    FIX: grade() now calls grade_episode(task_id, episode_state) correctly.
+    Previous version called grade_episode(self.env.state()) — wrong signature.
+    get_episode_state() returns EpisodeStateModel, not ObservationModel.
+    """
+    def __init__(self, task_id: str, seed: int) -> None:
+        self.env = GovWorkflowEnv(task_id=task_id)
+        self._seed = seed
+        self._task_id = task_id
+        self.terminated = False
+        self.truncated = False
+    def reset(self) -> ObservationModel:
+        obs, _ = self.env.reset(seed=self._seed)
+        self.terminated = False
+        self.truncated = False
+        return obs
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.terminated = terminated
+        self.truncated = truncated
+        return obs, reward, terminated, truncated, info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        from app.graders import grade_episode
+        episode_state = self.env.state()
+        result = grade_episode(episode_state)
+        return result.score, result.grader_name, result.metrics
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 5 — HTTP Environment Wrapper
+# ══════════════════════════════════════════════════════════════════════════════
+class HttpEnvClient:
+    def __init__(
+        self, task_id: str, seed: int, base_url: str = "http://localhost:7860"
+    ) -> None:
+        try:
+            import requests as _req
+            self._req = _req
+        except ImportError:
+            raise ImportError("pip install requests  — required for --mode http")
+        self._task_id = task_id
+        self._seed = seed
+        self._base_url = base_url.rstrip("/")
+        self._session_id: str | None = None
+        self.terminated = False
+        self.truncated = False
+    def _post(self, path: str, body: dict) -> dict:
+        r = self._req.post(
+            f"{self._base_url}{path}", json=body, timeout=30
+        )
+        r.raise_for_status()
+        return r.json()
+    def reset(self) -> ObservationModel:
+        data = self._post("/reset", {"task_id": self._task_id, "seed": self._seed})
+        self._session_id = data["session_id"]
+        self.terminated = False
+        self.truncated = False
+        return ObservationModel(**data["observation"])
+    def step(
+        self, action: ActionModel
+    ) -> tuple[ObservationModel, float, bool, bool, StepInfoModel]:
+        data = self._post("/step", {
+            "session_id": self._session_id,
+            "action": action.model_dump(exclude_none=True),
+        })
+        obs  = ObservationModel(**data["observation"])
+        info = StepInfoModel(**data["info"])
+        self.terminated = data["terminated"]
+        self.truncated  = data["truncated"]
+        return obs, data["reward"], data["terminated"], data["truncated"], info
+    def grade(self) -> tuple[float, str, dict[str, float]]:
+        data = self._post("/grade", {"session_id": self._session_id})
+        return data["score"], data["grader_name"], data["metrics"]
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 6 — Heuristic Baseline Agent
+# ══════════════════════════════════════════════════════════════════════════════
+class HeuristicAgent:
+    """
+    Rule-based agent. Requires no API key.
+    FIXED field names (Phase 2 canonical):
+      snap.blocked_missing_docs  ← was snap.missing_docs_cases
+      snap.total_pending         ← was snap.active_cases
+    """
+    def __init__(self) -> None:
+        self._priority_set = False
+        self._admin_action_day: int | None = None
+        self._last_doc_request_day: int | None = None
+    def reset(self) -> None:
+        self._priority_set = False
+        self._admin_action_day = None
+        self._last_doc_request_day = None
+    current_model = "heuristic"
+    def rotation_summary(self) -> list[dict]:
+        return []
+    def update_reward(self, _: float) -> None:
+        pass
+    @staticmethod
+    def _svc_key(service: str | ServiceType) -> str:
+        return service.value if isinstance(service, ServiceType) else str(service)
+    def act(self, obs: ObservationModel) -> ActionModel:
+        snapshots = list(obs.queue_snapshots.values())
+        # One admin action per simulated day; then always advance time.
+        if self._admin_action_day == obs.day:
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        # 1. Set priority mode once
+        if not self._priority_set:
+            self._priority_set = True
+            self._admin_action_day = obs.day
+            return ActionModel(
+                action_type=ActionType.SET_PRIORITY_MODE,
+                priority_mode=PriorityMode.URGENT_FIRST,
+            )
+        # 2. Allocate any idle officer to the currently most loaded service.
+        if obs.officer_pool.idle_officers > 0 and snapshots:
+            most_loaded = max(snapshots, key=lambda s: s.total_pending)
+            self._admin_action_day = obs.day
+            return ActionModel(
+                action_type=ActionType.ASSIGN_CAPACITY,
+                capacity_assignment={most_loaded.service_type.value: 1},
+            )
+        days_left = obs.max_days - obs.day
+        # 3. Reallocate one officer if load/officer ratio is clearly imbalanced.
+        allocated = {
+            self._svc_key(svc): int(off)
+            for svc, off in obs.officer_pool.allocated.items()
+        }
+        if snapshots and len(allocated) >= 2:
+            case_counts = {s.service_type.value: s.total_pending for s in snapshots}
+            best_src: tuple[str, int] | None = None
+            best_tgt: tuple[str, int] | None = None
+            src_ratio = float("inf")
+            tgt_ratio = -1.0
+            for svc, officers in allocated.items():
+                if officers <= 1:
+                    continue
+                ratio = case_counts.get(svc, 0) / max(officers, 1)
+                if ratio < src_ratio:
+                    src_ratio = ratio
+                    best_src = (svc, officers)
+            for svc, officers in allocated.items():
+                ratio = case_counts.get(svc, 0) / max(officers, 1)
+                if ratio > tgt_ratio:
+                    tgt_ratio = ratio
+                    best_tgt = (svc, officers)
+            if best_src and best_tgt and best_src[0] != best_tgt[0] and tgt_ratio > src_ratio * 1.8:
+                self._admin_action_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.REALLOCATE_OFFICERS,
+                    reallocation_delta={best_src[0]: -1, best_tgt[0]: 1},
+                )
+        # 4. Request missing docs conservatively to avoid repeatedly resetting
+        # resolution days for already-requested cases.
+        can_request_docs = (
+            any(s.blocked_missing_docs > 0 for s in snapshots)
+            and (
+                self._last_doc_request_day is None
+                or (obs.day - self._last_doc_request_day) >= 3
+                or obs.pending_doc_resolutions == 0
+            )
+        )
+        if can_request_docs:
+            target_docs = max(
+                snapshots,
+                key=lambda s: (s.blocked_missing_docs, s.current_sla_risk, s.total_pending),
+            )
+            if target_docs.blocked_missing_docs > 0:
+                self._admin_action_day = obs.day
+                self._last_doc_request_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.REQUEST_MISSING_DOCUMENTS,
+                    service_target=target_docs.service_type,
+                )
+        # 5. Escalate in the final window when urgency is present.
+        if obs.escalation_budget_remaining > 0:
+            urgent_snaps = [s for s in snapshots if s.urgent_pending > 0]
+            if urgent_snaps and days_left <= 5:
+                target = max(urgent_snaps, key=lambda s: s.urgent_pending)
+                self._admin_action_day = obs.day
+                return ActionModel(
+                    action_type=ActionType.ESCALATE_SERVICE,
+                    escalation_target=target.service_type,
+                )
+        # 6. Default — progress simulation.
+        return ActionModel(action_type=ActionType.ADVANCE_TIME)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 7 — System Prompt
+# ══════════════════════════════════════════════════════════════════════════════
+SYSTEM_PROMPT = """You are an expert government-office workflow manager AI.
+Your job is to control a simulated government district office processing citizen
+applications across multiple services.
+SERVICES: passport, driving_license, gst_registration, income_certificate,
+          caste_certificate, birth_certificate, land_registration
+WORKFLOW STAGES (in order):
+  submission → document_verification → field_verification → approval → issuance
+YOUR GOAL: Maximise the episode score (0.0 to 1.0) by:
+  - Completing as many applications as possible within SLA deadlines
+  - Prioritising urgent cases (urgency level 3 > 2 > 1)
+  - Keeping all services fairly served (no service left behind)
+  - Using escalations sparingly — only when a case is about to breach SLA
+  - Keeping officers productively busy (not idle)
+QUEUE STATUS FIELDS EXPLAINED:
+  backlog      = total_pending applications in queue
+  missing_docs = blocked_missing_docs (stuck waiting for documents)
+  urgent       = urgent_cases (high-urgency applications)
+  breached     = breached_cases (already past SLA deadline)
+AVAILABLE ACTIONS — return exactly ONE per turn as JSON:
+1. Set queue processing order (do this FIRST on day 0 only):
+   {"action_type": "set_priority_mode", "priority_mode": "urgent_first"}
+   priority_mode options: urgent_first | oldest_first | balanced | backlog_clearance
+2. Deploy a reserve officer to a service (day 0 only if reserves available):
+   {"action_type": "assign_capacity", "service": "driving_license", "officer_delta": 1}
+3. Unblock a stuck application with missing documents:
+   {"action_type": "request_missing_documents", "service": "driving_license"}
+4. Escalate one case to emergency priority (VERY LIMITED — use wisely):
+   {"action_type": "escalate_service", "service": "income_certificate"}
+5. Move officer between services (only when load ratio > 4x):
+   {"action_type": "reallocate_officers", "service": "birth_certificate",
+    "target_service": "driving_license", "officer_delta": 1}
+6. Let one working day pass — THE ONLY ACTION THAT PROCESSES APPLICATIONS:
+   {"action_type": "advance_time"}
+CRITICAL RULES:
+  - ALL values MUST be lowercase: driving_license NOT DRIVING_LICENSE
+  - advance_time is the ONLY action that earns progress reward
+  - Do NOT chain more than 2 admin actions before calling advance_time
+  - Do NOT escalate before (max_days - 5) unless case already breached SLA
+  - Do NOT reallocate if source service has fewer than 2 officers
+OPTIMAL STRATEGY:
+  Day 0:     set_priority_mode → assign_capacity (if reserves > 0) → advance_time
+  Every day: request_missing_documents (ONE service, highest missing_docs) → advance_time
+  Final 5:   escalate_service (urgent/breached only) → advance_time
+RESPONSE FORMAT — return ONLY a raw JSON object, nothing else:
+  CORRECT:   {"action_type": "advance_time"}
+  CORRECT:   {"action_type": "request_missing_documents", "service": "driving_license"}
+  WRONG:     ```json\n{"action_type": "ADVANCE_TIME"}```
+"""
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 8 — JSON Extraction with Lowercase Normaliser
+# ══════════════════════════════════════════════════════════════════════════════
+def _extract_json_action(raw: str) -> dict[str, Any]:
+    cleaned = re.sub(r"```(?:json)?", "", raw).strip()
+    parsed: dict[str, Any] | None = None
+    try:
+        parsed = json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass
+    if parsed is None:
+        match = re.search(r"\{[^{}]*\}", cleaned, re.DOTALL)
+        if match:
+            try:
+                parsed = json.loads(match.group())
+            except json.JSONDecodeError:
+                pass
+    if parsed is None:
+        print(f"  ⚠ JSON parse failed, falling back to advance_time. Raw: {raw[:120]!r}")
+        return {"action_type": "advance_time"}
+    for enum_field in _ENUM_FIELDS:
+        if enum_field in parsed and isinstance(parsed[enum_field], str):
+            parsed[enum_field] = parsed[enum_field].lower()
+    return parsed
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 9 — Observation → User Message Builder
+# ══════════════════════════════════════════════════════════════════════════════
+def _build_user_message(
+    obs: ObservationModel, step_num: int, cumulative_reward: float
+) -> str:
+    """
+    FIXED field names (Phase 2 canonical):
+      snap.total_pending        ← was snap.active_cases
+      snap.blocked_missing_docs ← was snap.missing_docs_cases
+    """
+    queue_lines = []
+    for snap in obs.queue_snapshots:
+        officers = obs.officer_pool.allocations.get(snap.service, 0)
+        queue_lines.append(
+            f"  {snap.service:<22}: "
+            f"backlog={snap.total_pending:>3} "
+            f"officers={officers} "
+            f"missing_docs={snap.blocked_missing_docs:>2} "
+            f"urgent={snap.urgent_cases} "
+            f"breached={snap.breached_cases} "
+            f"avg_age={snap.avg_age_days:.1f}d"
+        )
+    return (
+        f"STEP {step_num} | Day {obs.day}/{obs.max_days} "
+        f"| Days remaining: {obs.max_days - obs.day}\n"
+        f"Cumulative reward: {cumulative_reward:.2f}\n"
+        f"Priority mode: {obs.priority_mode}\n"
+        f"Reserve officers: {obs.officer_pool.reserve_officers}\n"
+        f"Escalation budget remaining: {obs.escalation_budget_remaining}\n"
+        f"Total pending: {obs.total_backlog} "
+        f"| Completed: {obs.total_completed} "
+        f"| SLA breaches: {obs.total_sla_breaches}\n"
+        f"Fairness gap: {obs.fairness_gap:.3f}\n\n"
+        f"QUEUE STATUS:\n" + "\n".join(queue_lines) + "\n\n"
+        f"Return a single JSON action object. All values lowercase."
+    )
+# ════════════════════════════════════════════════════════════════��═════════════
+# SECTION 10 — LLM Agent with Model Rotation
+# ══════════════════════════════════════════════════════════════════════════════
+class LLMAgent:
+    def __init__(
+        self,
+        task_id: str,
+        model_override: str | None = None,
+        api_key: str | None = None,
+    ) -> None:
+        try:
+            from openai import OpenAI
+            self._OpenAI = OpenAI
+        except ImportError:
+            raise ImportError("pip install openai  — required for LLM agent")
+        resolved_key = api_key or os.environ.get("NVIDIA_API_KEY", "")
+        self._api_key_2 = os.environ.get("NVIDIA_API_KEY_2", "")
+        if not resolved_key:
+            raise ValueError(
+                "NVIDIA_API_KEY not set.\n"
+                "  .env file : NVIDIA_API_KEY=nvapi-xxxxxxxxxxxx\n"
+                "  Get free key: https://build.nvidia.com/explore/discover"
+            )
+        self._api_key = resolved_key
+        self._task_id = task_id
+        self._rotator = ModelRotator(task_id)
+        if model_override:
+            seq = [model_override] + [
+                m for m in self._rotator._sequence if m != model_override
+            ]
+            self._rotator._sequence = seq
+        self._client = self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key)
+        self._client_2 = (
+            self._OpenAI(base_url=NVIDIA_BASE_URL, api_key=self._api_key_2)
+            if self._api_key_2 else None
+        )
+        self._history: list[dict[str, str]] = []
+        self._cumulative_reward = 0.0
+    @property
+    def current_model(self) -> str:
+        return self._rotator.current
+    def reset(self) -> None:
+        self._history = []
+        self._cumulative_reward = 0.0
+        self._rotator = ModelRotator(self._task_id)
+    def update_reward(self, reward: float) -> None:
+        self._cumulative_reward += reward
+    def rotation_summary(self) -> list[dict]:
+        return self._rotator.summary()
+    def act(self, obs: ObservationModel, step_num: int) -> ActionModel:
+        if self._rotator.pool_exhausted:
+            print("  ⚠ Pool exhausted — returning advance_time")
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        user_message = _build_user_message(obs, step_num, self._cumulative_reward)
+        self._history.append({"role": "user", "content": user_message})
+        if len(self._history) > 20:
+            self._history = self._history[-20:]
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}] + self._history
+        raw_reply = ""
+        while True:
+            try:
+                active_client = self._client
+                if self._rotator.current_key_id == 2 and self._client_2:
+                    active_client = self._client_2
+                response = active_client.chat.completions.create(
+                    model=self._rotator.current,
+                    messages=messages,
+                    temperature=LLM_TEMPERATURE,
+                    top_p=LLM_TOP_P,
+                    max_tokens=LLM_MAX_TOKENS,
+                    timeout=30,
+                )
+                raw_reply = response.choices.message.content or ""
+                break
+            except KeyboardInterrupt:
+                raise
+            except Exception as exc:
+                err_name = type(exc).__name__
+                err_msg  = str(exc)[:120]
+                print(f"  ⚠ {err_name} on {self._rotator.current.split('/')[-1]}: {err_msg}")
+                self._rotator.rotate(reason=err_name)
+                time.sleep(1.0)
+                if self._rotator.pool_exhausted:
+                    return ActionModel(action_type=ActionType.ADVANCE_TIME)
+        self._history.append({"role": "assistant", "content": raw_reply})
+        action_dict = _extract_json_action(raw_reply)
+        try:
+            return ActionModel(**action_dict)
+        except Exception as exc:
+            print(f"  ⚠ ActionModel parse failed ({exc}), using advance_time")
+            return ActionModel(action_type=ActionType.ADVANCE_TIME)
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 11 — Episode Runner
+# ══════════════════════════════════════════════════════════════════════════════
+def run_episode(
+    task_id: str,
+    agent_type: str,
+    model_override: str | None,
+    mode: TransportMode,
+    server_url: str,
+    api_key: str | None,
+    verbose: bool,
+    max_steps: int = MAX_LLM_STEPS,
+    delay_override: float | None = None,
+) -> EpisodeResult:
+    seed  = TASK_SEEDS.get(task_id, get_task(task_id).seed)
+    delay = delay_override if delay_override is not None else LLM_CALL_DELAY
+    force_fastapi = os.getenv("FORCE_FASTAPI_GATEWAY", "0").strip().lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+    env_api_prefix = os.getenv("OPENENV_ENV_API_PREFIX", "").strip()
+    client = create_env_gateway(
+        task_id=task_id,
+        seed=seed,
+        mode=mode,  # type: ignore[arg-type]
+        base_url=server_url,
+        api_prefix=env_api_prefix,
+        enforce_fastapi=force_fastapi,
+    )
+    if agent_type == "llm":
+        agent: HeuristicAgent | LLMAgent = LLMAgent(
+            task_id=task_id,
+            model_override=model_override,
+            api_key=api_key,
+        )
+        primary_label = agent.current_model
+    else:
+        agent = HeuristicAgent()
+        primary_label = "heuristic"
+    agent.reset()
+    obs = client.reset()
+    step_log: list[StepRecord] = []
+    total_reward = 0.0
+    total_invalid = 0
+    step_num = 0
+    start = time.perf_counter()
+    print(f"\n{'═'*65}")
+    print(f"  Task  : {task_id}")
+    if agent_type == "llm":
+        k1 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY", "") else "❌ MISSING"
+        k2 = "✅ loaded" if os.environ.get("NVIDIA_API_KEY_2", "") else "⚠ not set"
+        print(f"  KEY 1 : {k1}   KEY 2 : {k2}")
+        pool_short = " → ".join(m.split("/")[-1][:14] for m in GLOBAL_MODEL_POOL)
+        print(f"  Pool  : {pool_short}")
+    resolved_mode = getattr(client, "transport", mode)
+    print(f"  Agent : {agent_type}  |  Mode: {resolved_mode}  |  Seed: {seed}")
+    print(f"  Max steps: {max_steps}  |  Delay: {delay}s")
+    print(f"{'═'*65}")
+    while not (client.terminated or client.truncated) and step_num < max_steps:
+        step_num += 1
+        current_model = agent.current_model
+        if agent_type == "llm":
+            action = agent.act(obs, step_num)
+        else:
+            action = agent.act(obs)
+        obs, reward, terminated, truncated, info = client.step(action)
+        agent.update_reward(reward)
+        total_reward += reward
+        if info.invalid_action:
+            total_invalid += 1
+        step_notes: list[str] = []
+        legacy_notes = getattr(info, "notes", None)
+        if isinstance(legacy_notes, list):
+            step_notes.extend(str(n).strip() for n in legacy_notes if str(n).strip())
+        elif isinstance(legacy_notes, str) and legacy_notes.strip():
+            step_notes.append(legacy_notes.strip())
+        if info.action_explanation.strip():
+            step_notes.append(info.action_explanation.strip())
+        step_notes.extend(s.strip() for s in info.effects_resolved_this_step if s.strip())
+        step_notes = list(dict.fromkeys(step_notes))
+        record = StepRecord(
+            step=step_num,
+            day=obs.day,
+            action_type=action.action_type.value,
+            reward=round(reward, 4),
+            invalid=info.invalid_action,
+            total_backlog=obs.total_backlog,
+            total_completed=obs.total_completed,
+            model_used=current_model,
+            notes=step_notes,
+        )
+        step_log.append(record)
+        if verbose:
+            status    = "❌" if info.invalid_action else "✅"
+            model_tag = (
+                f"[{current_model.split('/')[-1][:22]}]"
+                if agent_type == "llm" else ""
+            )
+            print(
+                f"  step={step_num:3d} day={obs.day:2d} "
+                f"action={action.action_type.value:<28} "
+                f"reward={reward:+.3f}  {status}  {model_tag}"
+            )
+            if step_notes:
+                print(f"         notes: {step_notes}")
+        if agent_type == "llm":
+            actual_delay = delay + _random.uniform(-LLM_CALL_JITTER, LLM_CALL_JITTER)
+            if not verbose:
+                print(
+                    f"  Step {step_num}/{max_steps} — sleeping {actual_delay:.1f}s "
+                    f"[{current_model.split('/')[-1][:20]}]",
+                    end="\r", flush=True,
+                )
+            time.sleep(max(1.0, actual_delay))
+            if not verbose:
+                print(" " * 80, end="\r", flush=True)
+    score, grader_name, grader_metrics = client.grade()
+    elapsed = round(time.perf_counter() - start, 2)
+    rotations = agent.rotation_summary()
+    print(f"\n{'-'*65}")
+    print(f"  SCORE  : {score:.3f} / 1.000  (grader: {grader_name})")
+    print(f"  Reward : {total_reward:.2f}  |  Steps: {step_num}")
+    print(f"  Completed: {obs.total_completed}  |  SLA breaches: {obs.total_sla_breaches}")
+    print(f"  Invalid actions: {total_invalid}  |  Model rotations: {len(rotations)}")
+    print(f"  Time: {elapsed}s")
+    print(f"  Grader metrics:")
+    for metric, value in grader_metrics.items():
+        bar = "█" * int(value * 20)
+        print(f"    {metric:<34} {value:.3f}  {bar}")
+    if rotations:
+        print(f"  Rotation log:")
+        for r in rotations:
+            print(f"    {r['from'].split('/')[-1]:<30} → rotated ({r['reason']})")
+    print(f"{'-'*65}")
+    return EpisodeResult(
+        task_id=task_id,
+        agent=agent_type,
+        primary_model=primary_label,
+        seed=seed,
+        score=score,
+        grader_name=grader_name,
+        total_steps=step_num,
+        total_reward=round(total_reward, 4),
+        total_completed=obs.total_completed,
+        total_sla_breaches=obs.total_sla_breaches,
+        total_invalid_actions=total_invalid,
+        final_day=obs.day,
+        terminated=client.terminated,
+        truncated=client.truncated,
+        grader_metrics=grader_metrics,
+        step_log=step_log,
+        elapsed_seconds=elapsed,
+        model_rotations=rotations,
+    )
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 12 — Reporter
+# ══════════════════════════════════════════════════════════════════════════════
+def save_results(results: list[EpisodeResult], out_dir: Path) -> Path:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_path = out_dir / f"baseline_run_{ts}.json"
+    payload = {
+        "run_timestamp": datetime.now().isoformat(),
+        "total_episodes": len(results),
+        "average_score": round(sum(r.score for r in results) / len(results), 4),
+        "model_pool": GLOBAL_MODEL_POOL,
+        "free_pool": FREE_POOL,
+        "episodes": [asdict(r) for r in results],
+    }
+    out_path.write_text(json.dumps(payload, indent=2))
+    return out_path
+def print_leaderboard(results: list[EpisodeResult]) -> None:
+    print(f"\n{'═'*72}")
+    print("  LEADERBOARD")
+    print(f"{'═'*72}")
+    header = (
+        f"  {'TASK':<32} {'MODEL':<24} {'SCORE':>7}  "
+        f"{'REWARD':>8}  {'DONE':>5}  {'ROT':>4}"
+    )
+    print(header)
+    print(f"  {'-'*32} {'-'*24} {'-'*7}  {'-'*8}  {'-'*5}  {'-'*4}")
+    for r in sorted(results, key=lambda x: -x.score):
+        model_label = r.primary_model.split("/")[-1][:23]
+        print(
+            f"  {r.task_id:<32} {model_label:<24} {r.score:>7.3f}  "
+            f"{r.total_reward:>8.2f}  {r.total_completed:>5}  "
+            f"{len(r.model_rotations):>4}"
+        )
+    avg = sum(r.score for r in results) / len(results)
+    print(f"  {'-'*32} {'-'*24} {'-'*7}  {'-'*8}  {'-'*5}  {'-'*4}")
+    print(f"  {'AVERAGE':<32} {'':<24} {avg:>7.3f}")
+    print(f"{'═'*72}\n")
+# ══════════════════════════════════════════════════════════════════════════════
+# SECTION 13 — CLI Entry Point
+# ══════════════════════════════════════════════════════════════════════════════
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="Gov Workflow OpenEnv — Multi-Model Rotating LLM Baseline",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+10-model pool (April 2026):
+  llama-3.3-70b → deepseek-v4-flash → deepseek-r1 → nemotron-3-super →
+  qwen3.5-122b → deepseek-v3 → minimax-m2.7 → gemma-4-31b →
+  phi-4-mini → llama-3.1-8b
+Examples:
+  python baseline_openai.py --agent heuristic --verbose
+  python baseline_openai.py --agent llm --task district_backlog_easy --verbose
+  python baseline_openai.py --agent llm --task all --save-results
+  python baseline_openai.py --agent llm --model deepseek-ai/deepseek-v4-flash
+  python baseline_openai.py --mode http --url http://localhost:7860 --agent llm
+  python baseline_openai.py --mode auto --url http://localhost:7860 --agent llm
+        """,
+    )
+    p.add_argument("--agent", choices=["llm", "heuristic"], default="heuristic")
+    p.add_argument("--task", choices=list_tasks() + ["all"], default="all")
+    p.add_argument("--model", default=None)
+    p.add_argument("--mode", choices=["direct", "http", "auto"], default="auto")
+    p.add_argument("--url", default="http://localhost:7860")
+    p.add_argument("--max-steps", type=int, default=MAX_LLM_STEPS)
+    p.add_argument("--delay", type=float, default=None)
+    p.add_argument("--api-key", default=None)
+    p.add_argument("--verbose", action="store_true")
+    p.add_argument("--save-results", action="store_true")
+    return p
+def main() -> None:
+    args = build_parser().parse_args()
+    tasks = list_tasks() if args.task == "all" else [args.task]
+    print(f"\n{'═'*65}")
+    print("  Gov Workflow OpenEnv — Baseline Runner (April 2026)")
+    print(f"  Agent : {args.agent.upper()}")
+    if args.agent == "llm":
+        pool_disp = " → ".join(m.split("/")[-1][:12] for m in GLOBAL_MODEL_POOL)
+        print(f"  Pool  : {pool_disp}")
+    print(f"  Mode  : {args.mode}  |  Tasks: {', '.join(tasks)}")
+    print(f"{'═'*65}")
+    if args.agent == "llm":
+        key = args.api_key or os.environ.get("NVIDIA_API_KEY", "")
+        if not key:
+            print("\n❌  NVIDIA_API_KEY not set.")
+            print("    .env file  : NVIDIA_API_KEY=nvapi-xxxx")
+            print("    PowerShell : $env:NVIDIA_API_KEY='nvapi-xxxx'")
+            print("    Get free key: https://build.nvidia.com/explore/discover\n")
+            sys.exit(1)
+    else:
+        key = None
+    results: list[EpisodeResult] = []
+    for task_id in tasks:
+        result = run_episode(
+            task_id=task_id,
+            agent_type=args.agent,
+            model_override=args.model,
+            mode=args.mode,
+            server_url=args.url,
+            api_key=key,
+            verbose=args.verbose,
+            max_steps=args.max_steps,
+            delay_override=args.delay,
+        )
+        results.append(result)
+    print_leaderboard(results)
+    if args.save_results:
+        out = save_results(results, Path("results"))
+        print(f"  Results saved → {out}\n")
+if __name__ == "__main__":
+    main()

client.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+Typed HTTP client for Gov Workflow OpenEnv.
+This keeps a simple OpenEnv-style client interface:
+    reset() -> observation wrapper
+    step(action) -> step wrapper
+    state() -> state wrapper
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, TYPE_CHECKING
+import requests
+try:
+    from openenv.core import EnvClient
+    from openenv.core.env_client import StepResult
+except ModuleNotFoundError:
+    EnvClient = None  # type: ignore[assignment]
+    StepResult = None  # type: ignore[assignment]
+if TYPE_CHECKING:
+    from app.models import ActionModel, EpisodeStateModel, ObservationModel, StepInfoModel
+@dataclass
+class ClientStepResult:
+    observation: "ObservationModel"
+    reward: float
+    done: bool
+    terminated: bool
+    truncated: bool
+    info: "StepInfoModel"
+class GovWorkflowClient:
+    """Small typed client for the FastAPI deployment."""
+    def __init__(self, base_url: str) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.session_id: str | None = None
+    def _post(self, path: str, body: dict[str, Any]) -> dict[str, Any]:
+        response = requests.post(f"{self.base_url}{path}", json=body, timeout=30)
+        response.raise_for_status()
+        return response.json()
+    def reset(self, task_id: str = "district_backlog_easy", seed: int | None = None) -> "ObservationModel":
+        from app.models import ObservationModel
+        payload: dict[str, Any] = {"task_id": task_id}
+        if seed is not None:
+            payload["seed"] = seed
+        data = self._post("/reset", payload)
+        self.session_id = data["session_id"]
+        return ObservationModel(**data["observation"])
+    def step(self, action: "ActionModel") -> ClientStepResult:
+        from app.models import ObservationModel, StepInfoModel
+        if not self.session_id:
+            raise RuntimeError("Session not initialized. Call reset() first.")
+        data = self._post(
+            "/step",
+            {
+                "session_id": self.session_id,
+                "action": action.model_dump(exclude_none=True),
+            },
+        )
+        return ClientStepResult(
+            observation=ObservationModel(**data["observation"]),
+            reward=float(data["reward"]),
+            done=bool(data["done"]),
+            terminated=bool(data["terminated"]),
+            truncated=bool(data["truncated"]),
+            info=StepInfoModel(**data["info"]),
+        )
+    def state(self, include_action_history: bool = False) -> "EpisodeStateModel":
+        from app.models import EpisodeStateModel
+        if not self.session_id:
+            raise RuntimeError("Session not initialized. Call reset() first.")
+        data = self._post(
+            "/state",
+            {
+                "session_id": self.session_id,
+                "include_action_history": include_action_history,
+            },
+        )
+        return EpisodeStateModel(**data["state"])
+if EnvClient is not None and StepResult is not None:
+    class GovWorkflowOpenEnvClient(
+        EnvClient["ActionModel", "ObservationModel", "EpisodeStateModel"]
+    ):
+        """
+        OpenEnv-native websocket client.
+        This class is additive and does not replace the existing HTTP client above.
+        """
+        def _step_payload(self, action: "ActionModel") -> dict[str, Any]:
+            return action.model_dump(exclude_none=True, mode="json")
+        def _parse_result(self, payload: dict[str, Any]) -> StepResult["ObservationModel"]:
+            from app.models import ObservationModel
+            observation_payload = payload.get("observation", {})
+            obs = ObservationModel(**observation_payload)
+            return StepResult(
+                observation=obs,
+                reward=payload.get("reward"),
+                done=bool(payload.get("done", False)),
+            )
+        def _parse_state(self, payload: dict[str, Any]) -> "EpisodeStateModel":
+            from app.models import EpisodeStateModel
+            state_payload = payload.get("state", payload)
+            return EpisodeStateModel(**state_payload)
+else:
+    class GovWorkflowOpenEnvClient:  # type: ignore[no-redef]
+        """
+        Placeholder when optional `openenv` package is unavailable.
+        """
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            raise ModuleNotFoundError(
+                "GovWorkflowOpenEnvClient requires the optional 'openenv' package. "
+                "Install it to use websocket OpenEnv client features."
+            )

docs/FRONTEND_WORKFLOW.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# Frontend Workflow
+The frontend is React-based, backend-driven, and served directly by FastAPI.
+## Access
+- UI: `/ui`
+- Assets: `/ui/assets/*`
+- API namespace: `/api/*`
+## What Is Visible in UI
+1. OpenEnv API execution (`reset` / `step` / `state` / `grade`)
+2. Heuristic baseline agent runs (`/api/autostep`, `/api/benchmark`)
+3. Trained RL model execution (Phase 2/3 checkpoints via `/api/rl/run`)
+4. Trained RL evaluation across tasks (`/api/rl/evaluate`)
+5. Script-level workflow visibility for:
+   - `baseline_openai.py`
+   - `inference.py`
+## Frontend API Surface
+- Core:
+  - `GET /api/health`
+  - `GET /api/tasks`
+  - `GET /api/agents`
+  - `POST /api/reset`
+  - `POST /api/step`
+  - `POST /api/state`
+  - `POST /api/grade`
+  - `GET /api/sessions`
+  - `DELETE /api/sessions/{session_id}`
+- Baseline execution:
+  - `POST /api/autostep`
+  - `POST /api/benchmark`
+- Workflow visibility:
+  - `GET /api/workflows/components`
+  - `POST /api/workflows/run`
+- RL visibility/execution:
+  - `GET /api/rl/models`
+  - `POST /api/rl/run`
+  - `POST /api/rl/evaluate`
+## Deployment Notes
+- No Node.js build is required for serving the current frontend.
+- Backend startup remains `app.main:app`.
+- Frontend does not call external LLM providers directly.

docs/PHASE2_IMPLEMENTATION.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Phase 2 Implementation Notes
+Phase 2 goal: Curriculum PPO across easy, medium, and hard tasks with deterministic evaluation discipline.
+## Implemented Components
+- `rl/curriculum.py`
+  - `CurriculumScheduler` with staged task sampling:
+    - Stage 1 (0%-30%): easy only
+    - Stage 2 (30%-70%): easy + medium
+    - Stage 3 (70%-100%): all 3 tasks with configurable weights
+- `rl/configs/curriculum.yaml`
+  - curriculum fractions and weights
+  - PPO hyperparameters for Phase 2
+- `rl/train_ppo.py`
+  - `--phase 2` training path wired to curriculum scheduler
+  - default config path uses `rl/configs/curriculum.yaml`
+  - backward compatibility fallback to `rl/configs/ppo_curriculum.yaml`
+  - explicit CLI args: `--phase1-config`, `--phase2-config`
+- `tests/test_curriculum.py`
+  - stage transitions
+  - stage-1 easy-only enforcement
+  - stage-3 all-task sampling
+  - deterministic task seed invariants
+## Operational Notes
+- Existing 28-action design is preserved.
+- Existing task IDs and grader logic are unchanged.
+- No files were deleted as part of structure cleanup.
+## Commands (using existing .venv313)
+- Train Phase 1:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 1 --timesteps 200000 --n-envs 4 --seed 42`
+- Train Phase 2:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 500000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum.yaml`
+- Train Phase 2 (tuned continuation):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_ppo --phase 2 --timesteps 300000 --n-envs 4 --seed 42 --phase2-config rl/configs/curriculum_tuned.yaml`
+- Evaluate trained model:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase2_final.zip --episodes 3`

docs/PHASE3_IMPLEMENTATION.md ADDED Viewed

	@@ -0,0 +1,39 @@

+# Phase 3 Implementation Notes
+Phase 3 goal: Recurrent PPO (LSTM policy) to capture temporal dependencies such as SLA trend and escalation history.
+## Implemented Components
+- `rl/train_recurrent.py`
+  - RecurrentPPO training with `MlpLstmPolicy`
+  - LSTM hidden size configurable (default 128)
+  - curriculum sampling retained (easy -> medium -> hard)
+  - optional transfer of compatible policy tensors from best Phase 2 checkpoint
+- `rl/configs/recurrent.yaml`
+  - declarative recurrent training and curriculum settings
+- `rl/evaluate.py`
+  - model loading modes: `auto`, `maskable`, `recurrent`
+  - recurrent inference path with LSTM state handling + action-mask sanitization
+  - helper `compare_recurrent_vs_flat(...)`
+- `rl/callbacks.py`
+  - `RecurrentEvalCallback` for periodic grader-based checkpointing in Phase 3
+  - recurrent best checkpoints saved as `best_grader_recurrent_<task>.zip` (no collision with Phase 2 files)
+- `rl/gym_wrapper.py`
+  - optional `hard_action_mask` mode (default off) for safe action execution
+- `tests/test_rl_evaluate.py`
+  - recurrent hidden-state persistence
+  - LSTM reset behavior on episode boundary
+  - recurrent >= flat comparison utility check
+## Commands (using existing .venv313)
+- Train Phase 3:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 600000 --n-envs 4 --seed 42 --config rl/configs/recurrent.yaml`
+- Train Phase 3-v2 (recommended tuning run):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.train_recurrent --timesteps 700000 --n-envs 4 --seed 42 --config rl/configs/recurrent_v2.yaml`
+- Evaluate Phase 3 model:
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/phase3_final.zip --episodes 3 --model-type recurrent`
+- Evaluate best recurrent checkpoint (saved during Phase 3 eval):
+  - `.\\.venv313\\Scripts\\python.exe -m rl.evaluate --model results/best_model/best_grader_recurrent_mixed_urgency_medium.zip --episodes 3 --model-type recurrent`
+- Compare recurrent vs flat on medium task:
+  - `.\\.venv313\\Scripts\\python.exe -c "from rl.evaluate import compare_recurrent_vs_flat; print(compare_recurrent_vs_flat('results/best_model/phase2_final.zip','results/best_model/phase3_final.zip'))"`

docs/PROJECT_STRUCTURE.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Project Structure (Judge-Friendly)
+This repository keeps runtime-critical files in their original paths for deployment safety.
+No existing files were deleted.
+## Top-Level Layout
+- `app/` - core environment logic and FastAPI server
+- `app/web/` - deployed React frontend assets served by backend at `/ui`
+- `frontend/` - frontend ownership docs and reserved source folder for future split components
+- `rl/` - reinforcement-learning wrappers, training, evaluation, configs
+- `tests/` - deterministic unit/integration test suites
+- `scripts/` - operational scripts (local run, validation, benchmark ladder)
+- `docs/` - judge-facing documentation and phase notes
+- `openenv.yaml` - OpenEnv manifest
+- `inference.py` - OpenEnv inference entrypoint
+- `baseline_openai.py` - CLI baseline workflow
+- `Dockerfile` - deployment image
+## Deployment-Critical Paths
+- API app import path: `app.main:app`
+- Frontend route: `/ui` (served from `app/web/index.html`)
+- RL training entrypoint: `python -m rl.train_ppo`
+- RL evaluation entrypoint: `python -m rl.evaluate`
+- OpenEnv config: `openenv.yaml`
+## Phase Mapping
+- Phase 1: `rl/feature_builder.py`, `rl/action_mask.py`, `rl/gym_wrapper.py`, `rl/train_ppo.py`
+- Phase 2: `rl/curriculum.py`, `rl/configs/curriculum.yaml`, `tests/test_curriculum.py`
+- Phase 3: `rl/train_recurrent.py`, `rl/configs/recurrent.yaml`, `tests/test_rl_evaluate.py`
+- Phase 3+: reserved in existing `rl/` module structure
+## Judge Quick Navigation
+1. Environment behavior: `app/env.py`, `app/reward.py`, `app/graders.py`
+2. OpenEnv compliance + inference: `openenv.yaml`, `inference.py`
+3. Frontend behavior: `app/web/react_app.js`, `docs/FRONTEND_WORKFLOW.md`
+4. RL implementation: `rl/`
+5. Validation: `tests/`, `scripts/validate_env.py`, `scripts/validate-submission.sh`

frontend/react/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ node_modules/
2	+ dist/

frontend/react/README.md ADDED Viewed

	@@ -0,0 +1,24 @@

+# react/
+Vite + React frontend for the Gov Workflow OpenEnv console.
+Commands:
+- `npm install`
+- `npm run dev` (local dev on `http://localhost:5173`, proxies `/api` to `http://localhost:7860`)
+- `npm run build` (production build for Docker/HF)
+- `npm run preview`
+If you see `ERR_CONNECTION_REFUSED` on `/api/*`:
+- Start backend first on port `7860`
+- Or set a custom dev proxy target:
+  - PowerShell: `$env:VITE_DEV_API_TARGET='http://127.0.0.1:7860'`
+  - Then run `npm run dev`
+Modules:
+- `Overview`: project and environment summary
+- `Simulation Lab`: dynamic real-world workflow simulation (baseline / inference-like / trained RL)
+- `Training Studio`: launch and monitor background RL training jobs
+- `Model Comparison`: baseline vs trained model score comparison on the same task

frontend/react/index.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<!doctype html>
+<html lang="en" class="dark">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Gov Workflow OpenEnv Console</title>
+    <link href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;700;900&amp;family=Inter:wght@400;600;700&amp;display=swap" rel="stylesheet" />
+    <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap" rel="stylesheet" />
+  </head>
+  <body>
+    <div id="app-root" class="app-root">
+      <div class="boot">Loading frontend...</div>
+    </div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

frontend/react/package-lock.json ADDED Viewed

	@@ -0,0 +1,2050 @@

+{
+  "name": "openenv-rl-frontend",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "openenv-rl-frontend",
+      "version": "0.1.0",
+      "dependencies": {
+        "react": "^18.3.1",
+        "react-dom": "^18.3.1"
+      },
+      "devDependencies": {
+        "@vitejs/plugin-react": "^6.0.1",
+        "autoprefixer": "^10.5.0",
+        "postcss": "^8.5.10",
+        "tailwindcss": "^3.4.19",
+        "vite": "^8.0.7"
+      }
+    },
+    "node_modules/@alloc/quick-lru": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
+      "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@emnapi/core": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.1.tgz",
+      "integrity": "sha512-mukuNALVsoix/w1BJwFzwXBN/dHeejQtuVzcDsfOEsdpCumXb/E9j8w11h5S54tT1xhifGfbbSm/ICrObRb3KA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.2.0",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.1.tgz",
+      "integrity": "sha512-VYi5+ZVLhpgK4hQ0TAjiQiZ6ol0oe4mBx7mVv7IflsiEp0OWoVsp/+f9Vc1hOhE0TtkORVrI1GvzyreqpgWtkA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@emnapi/wasi-threads": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.0.tgz",
+      "integrity": "sha512-N10dEJNSsUx41Z6pZsXU8FjPjpBEplgH24sfkmITrBED1/U2Esum9F3lfLrMjKHHjmi557zQn7kR9R+XWXu5Rg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.13",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
+      "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.0",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.31",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
+      "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@napi-rs/wasm-runtime": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz",
+      "integrity": "sha512-sNXv5oLJ7ob93xkZ1XnxisYhGYXfaG9f65/ZgYuAu3qt7b3NadcOEhLvx28hv31PgX8SZJRYrAIPQilQmFpLVw==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@tybys/wasm-util": "^0.10.1"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Brooooooklyn"
+      },
+      "peerDependencies": {
+        "@emnapi/core": "^1.7.1",
+        "@emnapi/runtime": "^1.7.1"
+      }
+    },
+    "node_modules/@nodelib/fs.scandir": {
+      "version": "2.1.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
+      "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "2.0.5",
+        "run-parallel": "^1.1.9"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.stat": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
+      "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@nodelib/fs.walk": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
+      "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.scandir": "2.1.5",
+        "fastq": "^1.6.0"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/@oxc-project/types": {
+      "version": "0.123.0",
+      "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.123.0.tgz",
+      "integrity": "sha512-YtECP/y8Mj1lSHiUWGSRzy/C6teUKlS87dEfuVKT09LgQbUsBW1rNg+MiJ4buGu3yuADV60gbIvo9/HplA56Ew==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/Boshen"
+      }
+    },
+    "node_modules/@rolldown/binding-android-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-5ZiiecKH2DXAVJTNN13gNMUcCDg4Jy8ZjbXEsPnqa248wgOVeYRX0iqXXD5Jz4bI9BFHgKsI2qmyJynstbmr+g==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-darwin-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-tz/v/8G77seu8zAB3A5sK3UFoOl06zcshEzhUO62sAEtrEuW/H1CcyoupOrD+NbQJytYgA4CppXPzlrmp4JZKA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-darwin-x64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-8DakphqOz8JrMYWTJmWA+vDJxut6LijZ8Xcdc4flOlAhU7PNVwo2MaWBF9iXjJAPo5rC/IxEFZDhJ3GC7NHvug==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-freebsd-x64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-4wBQFfjDuXYN/SVI8inBF3Aa+isq40rc6VMFbk5jcpolUBTe5cYnMsHZ51nFWsx3PVyyNN3vgoESki0Hmr/4BA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm-gnueabihf": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.13.tgz",
+      "integrity": "sha512-JW/e4yPIXLms+jmnbwwy5LA/LxVwZUWLN8xug+V200wzaVi5TEGIWQlh8o91gWYFxW609euI98OCCemmWGuPrw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-ZfKWpXiUymDnavepCaM6KG/uGydJ4l2nBmMxg60Ci4CbeefpqjPWpfaZM7PThOhk2dssqBAcwLc6rAyr0uTdXg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-arm64-musl": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.13.tgz",
+      "integrity": "sha512-bmRg3O6Z0gq9yodKKWCIpnlH051sEfdVwt+6m5UDffAQMUUqU0xjnQqqAUm+Gu7ofAAly9DqiQDtKu2nPDEABA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-ppc64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-8Wtnbw4k7pMYN9B/mOEAsQ8HOiq7AZ31Ig4M9BKn2So4xRaFEhtCSa4ZJaOutOWq50zpgR4N5+L/opnlaCx8wQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-s390x-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-D/0Nlo8mQuxSMohNJUF2lDXWRsFDsHldfRRgD9bRgktj+EndGPj4DOV37LqDKPYS+osdyhZEH7fTakTAEcW7qg==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-x64-gnu": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.13.tgz",
+      "integrity": "sha512-eRrPvat2YaVQcwwKi/JzOP6MKf1WRnOCr+VaI3cTWz3ZoLcP/654z90lVCJ4dAuMEpPdke0n+qyAqXDZdIC4rA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-linux-x64-musl": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.13.tgz",
+      "integrity": "sha512-PsdONiFRp8hR8KgVjTWjZ9s7uA3uueWL0t74/cKHfM4dR5zXYv4AjB8BvA+QDToqxAFg4ZkcVEqeu5F7inoz5w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-openharmony-arm64": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.13.tgz",
+      "integrity": "sha512-hCNXgC5dI3TVOLrPT++PKFNZ+1EtS0mLQwfXXXSUD/+rGlB65gZDwN/IDuxLpQP4x8RYYHqGomlUXzpO8aVI2w==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-wasm32-wasi": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.13.tgz",
+      "integrity": "sha512-viLS5C5et8NFtLWw9Sw3M/w4vvnVkbWkO7wSNh3C+7G1+uCkGpr6PcjNDSFcNtmXY/4trjPBqUfcOL+P3sWy/g==",
+      "cpu": [
+        "wasm32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "1.9.1",
+        "@emnapi/runtime": "1.9.1",
+        "@napi-rs/wasm-runtime": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/@rolldown/binding-win32-arm64-msvc": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.13.tgz",
+      "integrity": "sha512-Fqa3Tlt1xL4wzmAYxGNFV36Hb+VfPc9PYU+E25DAnswXv3ODDu/yyWjQDbXMo5AGWkQVjLgQExuVu8I/UaZhPQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/binding-win32-x64-msvc": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.13.tgz",
+      "integrity": "sha512-/pLI5kPkGEi44TDlnbio3St/5gUFeN51YWNAk/Gnv6mEQBOahRBh52qVFVBpmrnU01n2yysvBML9Ynu7K4kGAQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      }
+    },
+    "node_modules/@rolldown/pluginutils": {
+      "version": "1.0.0-rc.7",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.7.tgz",
+      "integrity": "sha512-qujRfC8sFVInYSPPMLQByRh7zhwkGFS4+tyMQ83srV1qrxL4g8E2tyxVVyxd0+8QeBM1mIk9KbWxkegRr76XzA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tybys/wasm-util": {
+      "version": "0.10.1",
+      "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz",
+      "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@vitejs/plugin-react": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-6.0.1.tgz",
+      "integrity": "sha512-l9X/E3cDb+xY3SWzlG1MOGt2usfEHGMNIaegaUGFsLkb3RCn/k8/TOXBcab+OndDI4TBtktT8/9BwwW8Vi9KUQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@rolldown/pluginutils": "1.0.0-rc.7"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "peerDependencies": {
+        "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0",
+        "babel-plugin-react-compiler": "^1.0.0",
+        "vite": "^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@rolldown/plugin-babel": {
+          "optional": true
+        },
+        "babel-plugin-react-compiler": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/any-promise": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
+      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/anymatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/arg": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz",
+      "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/autoprefixer": {
+      "version": "10.5.0",
+      "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.5.0.tgz",
+      "integrity": "sha512-FMhOoZV4+qR6aTUALKX2rEqGG+oyATvwBt9IIzVR5rMa2HRWPkxf+P+PAJLD1I/H5/II+HuZcBJYEFBpq39ong==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/autoprefixer"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "browserslist": "^4.28.2",
+        "caniuse-lite": "^1.0.30001787",
+        "fraction.js": "^5.3.4",
+        "picocolors": "^1.1.1",
+        "postcss-value-parser": "^4.2.0"
+      },
+      "bin": {
+        "autoprefixer": "bin/autoprefixer"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      },
+      "peerDependencies": {
+        "postcss": "^8.1.0"
+      }
+    },
+    "node_modules/baseline-browser-mapping": {
+      "version": "2.10.21",
+      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz",
+      "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "baseline-browser-mapping": "dist/cli.cjs"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/binary-extensions": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
+      "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/browserslist": {
+      "version": "4.28.2",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
+      "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "baseline-browser-mapping": "^2.10.12",
+        "caniuse-lite": "^1.0.30001782",
+        "electron-to-chromium": "^1.5.328",
+        "node-releases": "^2.0.36",
+        "update-browserslist-db": "^1.2.3"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
+    "node_modules/camelcase-css": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz",
+      "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/caniuse-lite": {
+      "version": "1.0.30001790",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz",
+      "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "CC-BY-4.0"
+    },
+    "node_modules/chokidar": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+      "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "anymatch": "~3.1.2",
+        "braces": "~3.0.2",
+        "glob-parent": "~5.1.2",
+        "is-binary-path": "~2.1.0",
+        "is-glob": "~4.0.1",
+        "normalize-path": "~3.0.0",
+        "readdirp": "~3.6.0"
+      },
+      "engines": {
+        "node": ">= 8.10.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.2"
+      }
+    },
+    "node_modules/chokidar/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/commander": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
+      "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/cssesc": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
+      "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "cssesc": "bin/cssesc"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
+      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/didyoumean": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
+      "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/dlv": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
+      "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.344",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz",
+      "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/fast-glob": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz",
+      "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@nodelib/fs.stat": "^2.0.2",
+        "@nodelib/fs.walk": "^1.2.3",
+        "glob-parent": "^5.1.2",
+        "merge2": "^1.3.0",
+        "micromatch": "^4.0.8"
+      },
+      "engines": {
+        "node": ">=8.6.0"
+      }
+    },
+    "node_modules/fast-glob/node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/fastq": {
+      "version": "1.20.1",
+      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz",
+      "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "reusify": "^1.0.4"
+      }
+    },
+    "node_modules/fdir": {
+      "version": "6.5.0",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
+      "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/fraction.js": {
+      "version": "5.3.4",
+      "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
+      "integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/rawify"
+      }
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/glob-parent": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
+      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.3"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
+      "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/is-binary-path": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
+      "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "binary-extensions": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-core-module": {
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz",
+      "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
+      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/jiti": {
+      "version": "1.21.7",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
+      "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "jiti": "bin/jiti.js"
+      }
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "license": "MIT"
+    },
+    "node_modules/lightningcss": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz",
+      "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "detect-libc": "^2.0.3"
+      },
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      },
+      "optionalDependencies": {
+        "lightningcss-android-arm64": "1.32.0",
+        "lightningcss-darwin-arm64": "1.32.0",
+        "lightningcss-darwin-x64": "1.32.0",
+        "lightningcss-freebsd-x64": "1.32.0",
+        "lightningcss-linux-arm-gnueabihf": "1.32.0",
+        "lightningcss-linux-arm64-gnu": "1.32.0",
+        "lightningcss-linux-arm64-musl": "1.32.0",
+        "lightningcss-linux-x64-gnu": "1.32.0",
+        "lightningcss-linux-x64-musl": "1.32.0",
+        "lightningcss-win32-arm64-msvc": "1.32.0",
+        "lightningcss-win32-x64-msvc": "1.32.0"
+      }
+    },
+    "node_modules/lightningcss-android-arm64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz",
+      "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-darwin-arm64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz",
+      "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-darwin-x64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz",
+      "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-freebsd-x64": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz",
+      "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm-gnueabihf": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz",
+      "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-gnu": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz",
+      "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-arm64-musl": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz",
+      "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-gnu": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz",
+      "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-linux-x64-musl": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz",
+      "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-arm64-msvc": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz",
+      "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lightningcss-win32-x64-msvc": {
+      "version": "1.32.0",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz",
+      "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MPL-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 12.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/parcel"
+      }
+    },
+    "node_modules/lilconfig": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
+      "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/antonk52"
+      }
+    },
+    "node_modules/lines-and-columns": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
+      "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "license": "MIT",
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
+    "node_modules/merge2": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
+      "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/micromatch": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
+      "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "braces": "^3.0.3",
+        "picomatch": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=8.6"
+      }
+    },
+    "node_modules/micromatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/mz": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz",
+      "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "any-promise": "^1.0.0",
+        "object-assign": "^4.0.1",
+        "thenify-all": "^1.0.0"
+      }
+    },
+    "node_modules/nanoid": {
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "bin": {
+        "nanoid": "bin/nanoid.cjs"
+      },
+      "engines": {
+        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
+      }
+    },
+    "node_modules/node-releases": {
+      "version": "2.0.38",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz",
+      "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-hash": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
+      "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/path-parse": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
+      "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/picomatch": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/pify": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz",
+      "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/pirates": {
+      "version": "4.0.7",
+      "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz",
+      "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/postcss": {
+      "version": "8.5.10",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz",
+      "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "nanoid": "^3.3.11",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      }
+    },
+    "node_modules/postcss-import": {
+      "version": "15.1.0",
+      "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz",
+      "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "postcss-value-parser": "^4.0.0",
+        "read-cache": "^1.0.0",
+        "resolve": "^1.1.7"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      },
+      "peerDependencies": {
+        "postcss": "^8.0.0"
+      }
+    },
+    "node_modules/postcss-js": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.1.0.tgz",
+      "integrity": "sha512-oIAOTqgIo7q2EOwbhb8UalYePMvYoIeRY2YKntdpFQXNosSu3vLrniGgmH9OKs/qAkfoj5oB3le/7mINW1LCfw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "camelcase-css": "^2.0.1"
+      },
+      "engines": {
+        "node": "^12 || ^14 || >= 16"
+      },
+      "peerDependencies": {
+        "postcss": "^8.4.21"
+      }
+    },
+    "node_modules/postcss-load-config": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz",
+      "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "lilconfig": "^3.1.1"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "peerDependencies": {
+        "jiti": ">=1.21.0",
+        "postcss": ">=8.0.9",
+        "tsx": "^4.8.1",
+        "yaml": "^2.4.2"
+      },
+      "peerDependenciesMeta": {
+        "jiti": {
+          "optional": true
+        },
+        "postcss": {
+          "optional": true
+        },
+        "tsx": {
+          "optional": true
+        },
+        "yaml": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/postcss-nested": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz",
+      "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "postcss-selector-parser": "^6.1.1"
+      },
+      "engines": {
+        "node": ">=12.0"
+      },
+      "peerDependencies": {
+        "postcss": "^8.2.14"
+      }
+    },
+    "node_modules/postcss-selector-parser": {
+      "version": "6.1.2",
+      "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz",
+      "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "cssesc": "^3.0.0",
+        "util-deprecate": "^1.0.2"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/postcss-value-parser": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
+      "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/queue-microtask": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
+      "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/react": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
+      "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/react-dom": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
+      "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0",
+        "scheduler": "^0.23.2"
+      },
+      "peerDependencies": {
+        "react": "^18.3.1"
+      }
+    },
+    "node_modules/read-cache": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
+      "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "pify": "^2.3.0"
+      }
+    },
+    "node_modules/readdirp": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
+      "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "picomatch": "^2.2.1"
+      },
+      "engines": {
+        "node": ">=8.10.0"
+      }
+    },
+    "node_modules/readdirp/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/resolve": {
+      "version": "1.22.12",
+      "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.12.tgz",
+      "integrity": "sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "is-core-module": "^2.16.1",
+        "path-parse": "^1.0.7",
+        "supports-preserve-symlinks-flag": "^1.0.0"
+      },
+      "bin": {
+        "resolve": "bin/resolve"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/reusify": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz",
+      "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "iojs": ">=1.0.0",
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/rolldown": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.13.tgz",
+      "integrity": "sha512-bvVj8YJmf0rq4pSFmH7laLa6pYrhghv3PRzrCdRAr23g66zOKVJ4wkvFtgohtPLWmthgg8/rkaqRHrpUEh0Zbw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@oxc-project/types": "=0.123.0",
+        "@rolldown/pluginutils": "1.0.0-rc.13"
+      },
+      "bin": {
+        "rolldown": "bin/cli.mjs"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "optionalDependencies": {
+        "@rolldown/binding-android-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-darwin-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-darwin-x64": "1.0.0-rc.13",
+        "@rolldown/binding-freebsd-x64": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.13",
+        "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.13",
+        "@rolldown/binding-linux-x64-musl": "1.0.0-rc.13",
+        "@rolldown/binding-openharmony-arm64": "1.0.0-rc.13",
+        "@rolldown/binding-wasm32-wasi": "1.0.0-rc.13",
+        "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.13",
+        "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.13"
+      }
+    },
+    "node_modules/rolldown/node_modules/@rolldown/pluginutils": {
+      "version": "1.0.0-rc.13",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.13.tgz",
+      "integrity": "sha512-3ngTAv6F/Py35BsYbeeLeecvhMKdsKm4AoOETVhAA+Qc8nrA2I0kF7oa93mE9qnIurngOSpMnQ0x2nQY2FPviA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/run-parallel": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
+      "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "queue-microtask": "^1.2.2"
+      }
+    },
+    "node_modules/scheduler": {
+      "version": "0.23.2",
+      "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz",
+      "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.1.0"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/sucrase": {
+      "version": "3.35.1",
+      "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz",
+      "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.2",
+        "commander": "^4.0.0",
+        "lines-and-columns": "^1.1.6",
+        "mz": "^2.7.0",
+        "pirates": "^4.0.1",
+        "tinyglobby": "^0.2.11",
+        "ts-interface-checker": "^0.1.9"
+      },
+      "bin": {
+        "sucrase": "bin/sucrase",
+        "sucrase-node": "bin/sucrase-node"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      }
+    },
+    "node_modules/supports-preserve-symlinks-flag": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
+      "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/tailwindcss": {
+      "version": "3.4.19",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz",
+      "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@alloc/quick-lru": "^5.2.0",
+        "arg": "^5.0.2",
+        "chokidar": "^3.6.0",
+        "didyoumean": "^1.2.2",
+        "dlv": "^1.1.3",
+        "fast-glob": "^3.3.2",
+        "glob-parent": "^6.0.2",
+        "is-glob": "^4.0.3",
+        "jiti": "^1.21.7",
+        "lilconfig": "^3.1.3",
+        "micromatch": "^4.0.8",
+        "normalize-path": "^3.0.0",
+        "object-hash": "^3.0.0",
+        "picocolors": "^1.1.1",
+        "postcss": "^8.4.47",
+        "postcss-import": "^15.1.0",
+        "postcss-js": "^4.0.1",
+        "postcss-load-config": "^4.0.2 || ^5.0 || ^6.0",
+        "postcss-nested": "^6.2.0",
+        "postcss-selector-parser": "^6.1.2",
+        "resolve": "^1.22.8",
+        "sucrase": "^3.35.0"
+      },
+      "bin": {
+        "tailwind": "lib/cli.js",
+        "tailwindcss": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/thenify": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
+      "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "any-promise": "^1.0.0"
+      }
+    },
+    "node_modules/thenify-all": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz",
+      "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "thenify": ">= 3.1.0 < 4"
+      },
+      "engines": {
+        "node": ">=0.8"
+      }
+    },
+    "node_modules/tinyglobby": {
+      "version": "0.2.16",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
+      "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fdir": "^6.5.0",
+        "picomatch": "^4.0.4"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/SuperchupuDev"
+      }
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/ts-interface-checker": {
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz",
+      "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "dev": true,
+      "license": "0BSD",
+      "optional": true
+    },
+    "node_modules/update-browserslist-db": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
+      "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
+    "node_modules/util-deprecate": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vite": {
+      "version": "8.0.7",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.7.tgz",
+      "integrity": "sha512-P1PbweD+2/udplnThz3btF4cf6AgPky7kk23RtHUkJIU5BIxwPprhRGmOAHs6FTI7UiGbTNrgNP6jSYD6JaRnw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "lightningcss": "^1.32.0",
+        "picomatch": "^4.0.4",
+        "postcss": "^8.5.8",
+        "rolldown": "1.0.0-rc.13",
+        "tinyglobby": "^0.2.15"
+      },
+      "bin": {
+        "vite": "bin/vite.js"
+      },
+      "engines": {
+        "node": "^20.19.0 || >=22.12.0"
+      },
+      "funding": {
+        "url": "https://github.com/vitejs/vite?sponsor=1"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      },
+      "peerDependencies": {
+        "@types/node": "^20.19.0 || >=22.12.0",
+        "@vitejs/devtools": "^0.1.0",
+        "esbuild": "^0.27.0 || ^0.28.0",
+        "jiti": ">=1.21.0",
+        "less": "^4.0.0",
+        "sass": "^1.70.0",
+        "sass-embedded": "^1.70.0",
+        "stylus": ">=0.54.8",
+        "sugarss": "^5.0.0",
+        "terser": "^5.16.0",
+        "tsx": "^4.8.1",
+        "yaml": "^2.4.2"
+      },
+      "peerDependenciesMeta": {
+        "@types/node": {
+          "optional": true
+        },
+        "@vitejs/devtools": {
+          "optional": true
+        },
+        "esbuild": {
+          "optional": true
+        },
+        "jiti": {
+          "optional": true
+        },
+        "less": {
+          "optional": true
+        },
+        "sass": {
+          "optional": true
+        },
+        "sass-embedded": {
+          "optional": true
+        },
+        "stylus": {
+          "optional": true
+        },
+        "sugarss": {
+          "optional": true
+        },
+        "terser": {
+          "optional": true
+        },
+        "tsx": {
+          "optional": true
+        },
+        "yaml": {
+          "optional": true
+        }
+      }
+    }
+  }
+}

frontend/react/package.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "name": "openenv-rl-frontend",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite --configLoader native || vite",
+    "build": "vite build --configLoader native || vite build",
+    "preview": "vite preview --configLoader native --host 0.0.0.0 --port 4173 || vite preview --host 0.0.0.0 --port 4173"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^6.0.1",
+    "autoprefixer": "^10.5.0",
+    "postcss": "^8.5.10",
+    "tailwindcss": "^3.4.19",
+    "vite": "^8.0.7"
+  }
+}

frontend/react/postcss.config.js ADDED Viewed

	@@ -0,0 +1,6 @@

+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}

frontend/react/src/App.jsx ADDED Viewed

	@@ -0,0 +1,21 @@

+import { useState, useEffect } from "react";
+import { api } from "./api/client";
+import { Dashboard } from "./components/story-ui/Dashboard";
+export default function App() {
+  const [tasks, setTasks] = useState([]);
+  useEffect(() => {
+    const boot = async () => {
+      try {
+        const taskRes = await api("/tasks");
+        setTasks(taskRes.tasks || []);
+      } catch (err) {
+        console.error("Failed to load tasks", err);
+      }
+    };
+    boot();
+  }, []);
+  return <Dashboard tasks={tasks} />;
+}

frontend/react/src/api/client.js ADDED Viewed

	@@ -0,0 +1,131 @@

+const DEFAULT_LOCAL_API = "http://127.0.0.1:7860";
+const LOCAL_PORTS = ["7860"];
+const LOCAL_HOSTS = ["127.0.0.1", "localhost"];
+function candidates(path) {
+  const urls = [];
+  const rootOnlyPaths = path === "/rl/models";
+  const compatNoApiPaths =
+    path.startsWith("/simulation/") ||
+    path.startsWith("/training/") ||
+    path.startsWith("/rl/") ||
+    path.startsWith("/openenv/") ||
+    path.startsWith("/benchmark") ||
+    path.startsWith("/history/");
+  let isLocalDev5173 = false;
+  if (typeof window !== "undefined") {
+    const host = window.location.hostname;
+    const isLocal = host === "localhost" || host === "127.0.0.1";
+    isLocalDev5173 = isLocal && window.location.port === "5173";
+  }
+  // Training story endpoints are mounted at /training/* (not /api/training/*).
+  // Avoid known-bad prefixes first to prevent noisy 404 logs in browser console.
+  if (path.startsWith("/training/")) {
+    if (isLocalDev5173) {
+      for (const port of LOCAL_PORTS) {
+        for (const lh of LOCAL_HOSTS) {
+          urls.push(`http://${lh}:${port}${path}`);
+        }
+      }
+    } else {
+      urls.push(path);
+    }
+    return [...new Set(urls)];
+  }
+  if (isLocalDev5173) {
+    // For local dev, prefer direct backend URLs first to avoid noisy Vite proxy
+    // connection-refused spam when backend is temporarily down.
+    for (const port of LOCAL_PORTS) {
+      for (const lh of LOCAL_HOSTS) {
+        if (rootOnlyPaths) {
+          urls.push(`http://${lh}:${port}${path}`);
+        } else {
+          urls.push(`http://${lh}:${port}/api${path}`);
+          urls.push(`http://${lh}:${port}/api/v1${path}`);
+          if (compatNoApiPaths) {
+            urls.push(`http://${lh}:${port}${path}`);
+          }
+        }
+      }
+    }
+  }
+  if (rootOnlyPaths) {
+    urls.push(path);
+  } else {
+    urls.push(`/api${path}`, `/api/v1${path}`);
+    if (compatNoApiPaths) {
+      urls.push(path);
+    }
+  }
+  if (isLocalDev5173 && !rootOnlyPaths) {
+    for (const port of LOCAL_PORTS) {
+      for (const lh of LOCAL_HOSTS) {
+        // keep original ordering as fallback candidates
+        urls.push(`http://${lh}:${port}/api${path}`);
+        urls.push(`http://${lh}:${port}/api/v1${path}`);
+      }
+    }
+  }
+  return [...new Set(urls)];
+}
+export async function api(path, options = {}) {
+  const method = String(options.method || "GET").toUpperCase();
+  const headers = { ...(options.headers || {}) };
+  if (method !== "GET" && method !== "HEAD" && !("Content-Type" in headers)) {
+    headers["Content-Type"] = "application/json";
+  }
+  const requestOptions = {
+    ...options,
+    method,
+    headers,
+  };
+  if (method === "GET" || method === "HEAD") {
+    delete requestOptions.body;
+  }
+  const errors = [];
+  for (const url of candidates(path)) {
+    try {
+      const res = await fetch(url, requestOptions);
+      let payload = null;
+      try {
+        payload = await res.json();
+      } catch (err) {
+        payload = null;
+      }
+      if (!res.ok) {
+        const detail = payload?.detail || `${res.status}`;
+        throw new Error(`API ${path} failed on ${url}: ${detail}`);
+      }
+      return payload;
+    } catch (err) {
+      errors.push(err);
+    }
+  }
+  const firstApiError = errors.find(
+    (e) => e instanceof Error && e.message.startsWith(`API ${path} failed`)
+  );
+  if (firstApiError) {
+    throw firstApiError;
+  }
+  const lastError = errors.length ? errors[errors.length - 1] : new Error("Unknown request failure.");
+  throw new Error(
+    `API ${path} connection failed. Start backend on ${DEFAULT_LOCAL_API}. Last error: ${
+      lastError instanceof Error ? lastError.message : String(lastError)
+    }`
+  );
+}
+export function fmt(value, digits = 2) {
+  if (value == null || Number.isNaN(Number(value))) return "-";
+  return Number(value).toFixed(digits);
+}

frontend/react/src/components/Charts.jsx ADDED Viewed

	@@ -0,0 +1,142 @@

+import { useEffect, useRef } from "react";
+function drawGridAndAxes(ctx, w, h, pad, yMin, yMax) {
+  const chartW = w - pad * 2;
+  const chartH = h - pad * 2;
+  ctx.clearRect(0, 0, w, h);
+  // chart area background
+  const bg = ctx.createLinearGradient(0, 0, 0, h);
+  bg.addColorStop(0, "#060b12");
+  bg.addColorStop(1, "#03070d");
+  ctx.fillStyle = bg;
+  ctx.fillRect(0, 0, w, h);
+  ctx.strokeStyle = "#13202f";
+  ctx.lineWidth = 1;
+  const gridRows = 5;
+  for (let i = 0; i <= gridRows; i += 1) {
+    const y = pad + (chartH * i) / gridRows;
+    ctx.beginPath();
+    ctx.moveTo(pad, y);
+    ctx.lineTo(w - pad, y);
+    ctx.stroke();
+  }
+  const gridCols = 8;
+  for (let i = 0; i <= gridCols; i += 1) {
+    const x = pad + (chartW * i) / gridCols;
+    ctx.beginPath();
+    ctx.moveTo(x, pad);
+    ctx.lineTo(x, h - pad);
+    ctx.stroke();
+  }
+  ctx.strokeStyle = "#2a3e54";
+  ctx.beginPath();
+  ctx.moveTo(pad, pad);
+  ctx.lineTo(pad, h - pad);
+  ctx.lineTo(w - pad, h - pad);
+  ctx.stroke();
+  const zeroInRange = yMin <= 0 && yMax >= 0;
+  if (zeroInRange) {
+    const yRange = Math.max(1e-9, yMax - yMin);
+    const y0 = pad + ((yMax - 0) / yRange) * chartH;
+    ctx.strokeStyle = "#2d5f84";
+    ctx.setLineDash([4, 4]);
+    ctx.beginPath();
+    ctx.moveTo(pad, y0);
+    ctx.lineTo(w - pad, y0);
+    ctx.stroke();
+    ctx.setLineDash([]);
+  }
+}
+export function LineChart({ seriesA, seriesB, labelA = "A", labelB = "B" }) {
+  const ref = useRef(null);
+  useEffect(() => {
+    const canvas = ref.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext("2d");
+    const w = canvas.width;
+    const h = canvas.height;
+    const pad = 40;
+    const all = [...seriesA, ...seriesB];
+    if (!all.length) return;
+    const yMaxRaw = Math.max(...all);
+    const yMinRaw = Math.min(...all);
+    const margin = Math.max(1, (yMaxRaw - yMinRaw) * 0.12);
+    const yMax = yMaxRaw + margin;
+    const yMin = yMinRaw - margin;
+    const yRange = Math.max(1e-9, yMax - yMin);
+    const chartW = w - pad * 2;
+    const chartH = h - pad * 2;
+    drawGridAndAxes(ctx, w, h, pad, yMin, yMax);
+    const yPx = (value) => pad + ((yMax - value) / yRange) * chartH;
+    const draw = (arr, color, glowColor) => {
+      if (!arr.length) return;
+      ctx.shadowBlur = 8;
+      ctx.shadowColor = glowColor;
+      ctx.strokeStyle = color;
+      ctx.lineWidth = 2.25;
+      const stepX = chartW / Math.max(arr.length - 1, 1);
+      ctx.beginPath();
+      arr.forEach((v, i) => {
+        const x = pad + i * stepX;
+        const y = yPx(Number(v || 0));
+        if (i === 0) ctx.moveTo(x, y);
+        else ctx.lineTo(x, y);
+      });
+      ctx.stroke();
+      ctx.shadowBlur = 0;
+      // point markers
+      ctx.fillStyle = color;
+      arr.forEach((v, i) => {
+        const x = pad + i * stepX;
+        const y = yPx(Number(v || 0));
+        ctx.beginPath();
+        ctx.arc(x, y, 2.2, 0, Math.PI * 2);
+        ctx.fill();
+      });
+    };
+    draw(seriesA, "#4fd6ff", "rgba(79, 214, 255, 0.7)");
+    draw(seriesB, "#ff8b1a", "rgba(255, 139, 26, 0.6)");
+    ctx.fillStyle = "#9ec3dd";
+    ctx.font = "12px Segoe UI";
+    ctx.fillText(`${labelA} (cyan)`, pad, 18);
+    ctx.fillStyle = "#ffbb80";
+    ctx.fillText(`${labelB} (orange)`, pad + 170, 18);
+    ctx.fillStyle = "#6f90aa";
+    ctx.fillText(`max ${yMaxRaw.toFixed(2)}`, 6, pad + 2);
+    ctx.fillText(`min ${yMinRaw.toFixed(2)}`, 6, h - pad + 2);
+    ctx.fillText("steps", w - 44, h - 10);
+  }, [seriesA, seriesB, labelA, labelB]);
+  return <canvas className="chart-canvas" ref={ref} width={1000} height={280} />;
+}
+export function CompareBars({ rows }) {
+  const safeRows = Array.isArray(rows) ? rows : [];
+  return (
+    <div className="compare-bars">
+      {safeRows.map((row) => (
+        <div key={row.label} className="compare-row">
+          <div className="compare-label">{row.label}</div>
+          <div className="compare-track">
+            <div className="compare-fill" style={{ width: `${Math.max(0, Math.min(100, row.value * 100))}%` }} />
+          </div>
+          <div className="compare-value">{row.value.toFixed(3)}</div>
+        </div>
+      ))}
+    </div>
+  );
+}

frontend/react/src/components/Layout.jsx ADDED Viewed

	@@ -0,0 +1,33 @@

+const NAV_ITEMS = [
+  { id: "overview", title: "Overview" },
+  { id: "simulation", title: "Simulation Lab" },
+  { id: "training", title: "Training Studio" },
+  { id: "comparison", title: "Model Comparison" },
+];
+export function Layout({ active, onChange, status, children }) {
+  return (
+    <div className="app-shell">
+      <aside className="sidebar">
+        <h1>OpenEnv RL Console</h1>
+        <p className="sidebar-sub">Real-world government workflow simulation and RL training.</p>
+        <nav>
+          {NAV_ITEMS.map((item) => (
+            <button
+              key={item.id}
+              className={`nav-btn ${active === item.id ? "active" : ""}`}
+              onClick={() => onChange(item.id)}
+            >
+              {item.title}
+            </button>
+          ))}
+        </nav>
+      </aside>
+      <main className="content">
+        <div className="status-banner">{status}</div>
+        {children}
+      </main>
+    </div>
+  );
+}

frontend/react/src/components/story-ui/Dashboard.jsx ADDED Viewed

	@@ -0,0 +1,1589 @@

+import React, { useState, useEffect } from "react";
+import { api, fmt } from "../../api/client";
+import { useStorySimulation } from "../../hooks/useStorySimulation";
+import { TrainingTabV2 } from "./TrainingTabV2";
+// --- Timeline Tab -------------------------------------------------------------
+const PHASE_LABELS = {
+  early: { label: "Early Phase", color: "indigo", icon: "flag", desc: "Agent explores the environment and initial decisions are made." },
+  middle: { label: "Mid-Phase", color: "amber", icon: "timeline", desc: "Policy adapts as patterns emerge in the backlog." },
+  late: { label: "Final Phase", color: "violet", icon: "sports_score", desc: "Agent converges toward optimal resolution strategy." },
+};
+function TimelineTab({ tasks }) {
+  const {
+    taskId, setTaskId, maxSteps, setMaxSteps,
+    agentMode,
+    policyName, setPolicyName,
+    modelPath, setModelPath,
+    modelType, setModelType,
+    availablePolicies,
+    availableModels,
+    configError,
+    running, starting, currentStep,
+    kpis, timeline, resources, journeyStats,
+    startSimulation, stopSimulation,
+  } = useStorySimulation({ defaultTask: tasks[0] || "district_backlog_easy" });
+  const isIdle = !starting && !running;
+  const startBlocked = agentMode === "trained_rl" && !modelPath;
+  const progressPct = maxSteps > 0 ? Math.min(100, Math.round((currentStep / maxSteps) * 100)) : 0;
+  const fmt2 = (n) => new Intl.NumberFormat().format(n ?? 0);
+  const fmtDelta = (n) => { const v = Number(n ?? 0); return v > 0 ? `+${v.toFixed(1)}` : v.toFixed(1); };
+  // Local string buffer so the user can freely type without the field snapping back
+  const [stepsInput, setStepsInput] = useState(String(maxSteps));
+  // Keep buffer in sync if maxSteps changes from outside
+  React.useEffect(() => { setStepsInput(String(maxSteps)); }, [maxSteps]);
+  // Build phase-annotated timeline: insert phase dividers between phase changes
+  const annotatedTimeline = [];
+  let lastPhase = null;
+  let phaseStats = { drop: 0, keys: 0 };
+  for (let i = 0; i < timeline.length; i++) {
+    const ev = timeline[i];
+    const ph = ev.phase;
+    if (ph && ph !== lastPhase) {
+      if (lastPhase && PHASE_LABELS[lastPhase]) {
+        // We reached the end of the previous (newer) phase in the chronological timeline,
+        // so insert its summary before starting the older phase.
+        annotatedTimeline.push({
+          _summary: true,
+          phase: lastPhase,
+          stats: { ...phaseStats },
+          key: `sum-${lastPhase}-${i}`,
+        });
+      }
+      if (PHASE_LABELS[ph]) {
+        annotatedTimeline.push({ _divider: true, phase: ph, key: `div-${ph}-${i}` });
+      }
+      lastPhase = ph;
+      phaseStats = { drop: 0, keys: 0 };
+    }
+    if (ev.key) phaseStats.keys += 1;
+    if (ev.backlogDelta) phaseStats.drop += ev.backlogDelta;
+    annotatedTimeline.push(ev);
+  }
+  // Handle the very last (oldest) phase summary at the bottom of the list
+  if (lastPhase && PHASE_LABELS[lastPhase] && timeline.length > 0) {
+    annotatedTimeline.push({
+      _summary: true,
+      phase: lastPhase,
+      stats: { ...phaseStats },
+      key: `sum-${lastPhase}-end`,
+    });
+  }
+  return (
+    <div className="space-y-5">
+      {/* --- Controls bar --- */}
+      <div className="flex flex-wrap gap-3 items-center justify-between bg-slate-900/60 border border-white/5 rounded-xl px-5 py-3">
+        <div className="flex flex-wrap items-center gap-4">
+          <div className="flex items-center gap-2">
+            <span className="text-slate-400 text-sm font-medium">Scenario</span>
+            <select
+              value={taskId}
+              onChange={(e) => setTaskId(e.target.value)}
+              disabled={!isIdle}
+              className="appearance-none bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500 cursor-pointer"
+            >
+              {tasks.length > 0
+                ? tasks.map((t) => <option key={t} value={t} className="bg-slate-900">{t.replace(/_/g, " ").toUpperCase()}</option>)
+                : <option>Loading...</option>}
+            </select>
+          </div>
+          <div className="flex items-center gap-2">
+            <span className="text-slate-400 text-sm font-medium">Steps</span>
+            <input
+              type="number"
+              min={10}
+              max={100}
+              step={10}
+              value={stepsInput}
+              disabled={!isIdle}
+              onChange={(e) => setStepsInput(e.target.value)}
+              onBlur={() => {
+                const v = parseInt(stepsInput, 10);
+                const clamped = isNaN(v) ? 40 : Math.min(100, Math.max(10, v));
+                setMaxSteps(clamped);
+                setStepsInput(String(clamped));
+              }}
+              onKeyDown={(e) => {
+                if (e.key === "Enter") e.currentTarget.blur();
+              }}
+              className="w-20 bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500 text-center"
+            />
+          </div>
+          {agentMode === "baseline_policy" && (
+            <div className="flex items-center gap-2">
+              <span className="text-slate-400 text-sm font-medium">Policy</span>
+              <select
+                value={policyName}
+                onChange={(e) => setPolicyName(e.target.value)}
+                disabled={!isIdle}
+                className="appearance-none bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500 cursor-pointer"
+              >
+                {(availablePolicies.length > 0 ? availablePolicies : ["backlog_clearance"]).map((p) => (
+                  <option key={p} value={p} className="bg-slate-900">{String(p).replace(/_/g, " ")}</option>
+                ))}
+              </select>
+            </div>
+          )}
+          {agentMode === "trained_rl" && (
+            <>
+              <div className="flex items-center gap-2">
+                <span className="text-slate-400 text-sm font-medium">Model</span>
+                <select
+                  value={modelPath}
+                  onChange={(e) => {
+                    const selected = availableModels.find((m) => m.path === e.target.value);
+                    setModelPath(e.target.value);
+                    if (selected?.model_type) setModelType(selected.model_type);
+                  }}
+                  disabled={!isIdle}
+                  className="max-w-[260px] appearance-none bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500 cursor-pointer"
+                >
+                  {(availableModels.length > 0
+                    ? availableModels
+                    : [{ label: "No model found", path: "", model_type: "maskable" }]
+                  ).map((m) => (
+                    <option key={`${m.path}-${m.model_type}`} value={m.path} className="bg-slate-900">
+                      {m.label || m.path || "Unknown model"}
+                    </option>
+                  ))}
+                </select>
+              </div>
+              <div className="flex items-center gap-2">
+                <span className="text-slate-400 text-sm font-medium">Type</span>
+                <select
+                  value={modelType}
+                  onChange={(e) => setModelType(e.target.value)}
+                  disabled={!isIdle}
+                  className="appearance-none bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500 cursor-pointer"
+                >
+                  <option value="maskable" className="bg-slate-900">Maskable PPO</option>
+                  <option value="recurrent" className="bg-slate-900">Recurrent PPO</option>
+                </select>
+              </div>
+            </>
+          )}
+        </div>
+        <button
+          onClick={running ? stopSimulation : startSimulation}
+          disabled={starting || (!running && startBlocked)}
+          className={`text-white text-sm font-bold px-6 py-2 rounded-lg transition-all duration-300 ${
+            running
+              ? "bg-rose-500/80 hover:bg-rose-500 shadow-[0_0_15px_rgba(244,63,94,0.4)]"
+              : "bg-gradient-to-r from-violet-600 to-indigo-500 shadow-[0_0_15px_rgba(99,102,241,0.4)] hover:shadow-[0_0_25px_rgba(99,102,241,0.7)]"
+          }`}
+        >
+          {starting ? "Initializing..." : running ? "Stop Simulation" : "Start Auto-Resolution"}
+        </button>
+      </div>
+      {configError && (
+        <div className="bg-rose-500/10 border border-rose-500/30 rounded-xl px-4 py-3 text-xs font-semibold text-rose-300">
+          {configError}
+        </div>
+      )}
+      {startBlocked && !configError && (
+        <div className="bg-amber-500/10 border border-amber-500/30 rounded-xl px-4 py-3 text-xs font-semibold text-amber-300">
+          Select an available RL model checkpoint before starting `trained_rl` mode.
+        </div>
+      )}
+      {/* --- Progress bar (only visible while running) --- */}
+      {(running || currentStep > 0) && (
+        <div className="bg-slate-900/60 border border-white/5 rounded-xl px-5 py-3">
+          <div className="flex justify-between items-center mb-2">
+            <span className="text-xs font-semibold text-slate-400 uppercase tracking-widest">
+              {running ? "Simulation In Progress" : journeyStats ? "Episode Complete" : "Stopped"}
+            </span>
+            <span className="text-xs font-black text-white">
+              Step {currentStep} / {maxSteps} - {progressPct}%
+            </span>
+          </div>
+          <div className="w-full bg-slate-800 rounded-full h-2 overflow-hidden">
+            <div
+              className={`h-2 rounded-full transition-all duration-500 ${
+                journeyStats ? "bg-emerald-500" : "bg-indigo-500"
+              } ${running ? "animate-pulse" : ""}`}
+              style={{ width: `${progressPct}%` }}
+            />
+          </div>
+          {running && (
+            <div className="flex items-center gap-1.5 mt-2">
+              <div className="w-1.5 h-1.5 bg-indigo-400 rounded-full animate-bounce" style={{ animationDelay: "0ms" }} />
+              <div className="w-1.5 h-1.5 bg-indigo-400 rounded-full animate-bounce" style={{ animationDelay: "150ms" }} />
+              <div className="w-1.5 h-1.5 bg-indigo-400 rounded-full animate-bounce" style={{ animationDelay: "300ms" }} />
+              <span className="text-xs text-slate-500 ml-1">Agent is making decisions...</span>
+            </div>
+          )}
+        </div>
+      )}
+      {/* --- Journey Summary (Before -> After) - appears after episode completes --- */}
+      {journeyStats && (
+        <div className="bg-gradient-to-br from-slate-900 to-indigo-950/30 border border-indigo-500/20 rounded-xl p-5 shadow-[0_0_30px_rgba(99,102,241,0.08)]">
+          <div className="flex items-center gap-2 mb-4">
+            <span className="material-symbols-outlined text-indigo-400">auto_graph</span>
+            <h3 className="text-base font-black text-white">Journey Summary - Start to End Transformation</h3>
+          </div>
+          <div className="grid grid-cols-2 md:grid-cols-4 gap-3">
+            {[
+              {
+                label: "Backlog Change",
+                before: journeyStats.initialBacklog,
+                after: journeyStats.finalBacklog,
+                suffix: " cases",
+                goodWhenDown: true,
+              },
+              {
+                label: "SLA Breaches",
+                before: journeyStats.initialSla,
+                after: journeyStats.finalSla,
+                suffix: "",
+                goodWhenDown: true,
+              },
+              {
+                label: "Steps Taken",
+                before: null,
+                after: journeyStats.totalSteps,
+                suffix: "",
+                goodWhenDown: false,
+                singleValue: true,
+              },
+              {
+                label: "Final Score",
+                before: journeyStats.finalScore != null ? "No Agent (0.0%)" : "N/A",
+                after: journeyStats.finalScore != null ? `${(journeyStats.finalScore * 100).toFixed(1)}%` : "N/A",
+                suffix: "",
+                goodWhenDown: false,
+                isScore: true,
+                isBaselineCmp: true,
+              },
+            ].map((stat) => {
+              const delta = stat.singleValue ? null : stat.isBaselineCmp ? (journeyStats.finalScore * 100) : stat.after - stat.before;
+              const trend =
+                delta === null
+                  ? "none"
+                  : delta === 0
+                    ? "stable"
+                    : stat.goodWhenDown
+                      ? (delta < 0 ? "improving" : "worsening")
+                      : (delta > 0 ? "improving" : "worsening");
+              const direction =
+                delta === null || delta === 0
+                  ? "stable"
+                  : stat.goodWhenDown
+                    ? (delta < 0 ? "down" : "up")
+                    : (delta > 0 ? "up" : "down");
+              const directionIcon =
+                direction === "up"
+                  ? "north"
+                  : direction === "down"
+                    ? "south"
+                    : "horizontal_rule";
+              const trendClass =
+                trend === "improving"
+                  ? "text-emerald-400"
+                  : trend === "worsening"
+                    ? "text-rose-400"
+                    : "text-slate-300";
+              return (
+                <div key={stat.label} className="bg-slate-800/60 border border-white/5 rounded-lg p-3">
+                  <div className="text-xs font-semibold text-slate-400 mb-2 tracking-wide">{stat.label}</div>
+                  {stat.singleValue ? (
+                    <div className={`text-2xl font-black ${stat.isScore ? "text-emerald-400" : "text-white"}`}>{stat.after}{stat.suffix}</div>
+                  ) : (
+                    <div className="flex items-center gap-2">
+                      <span className="text-slate-500 text-sm font-bold truncate">
+                        {stat.isBaselineCmp ? "Baseline" : stat.before}{stat.suffix}
+                      </span>
+                      <span className="material-symbols-outlined text-slate-600 text-base">arrow_forward</span>
+                      <span className={`text-xl font-black ${trendClass}`}>
+                        {stat.after}{stat.suffix}
+                      </span>
+                    </div>
+                  )}
+                  {delta !== null && (
+                    <div className={`text-xs font-bold mt-1 ${trendClass} inline-flex items-center gap-1`}>
+                      <span className="material-symbols-outlined text-[14px] leading-none">{directionIcon}</span>
+                      <span>{Number(Math.abs(delta).toFixed(2))} {trend === "stable" ? "no change" : trend}</span>
+                    </div>
+                  )}
+                  {stat.label === "Backlog Change" && journeyStats.backlogImprovement !== 0 && (
+                    <div className="text-[10px] text-slate-500 mt-0.5">
+                      {journeyStats.backlogImprovement > 0 ? `${journeyStats.backlogImprovement}% cleared` : `${Math.abs(journeyStats.backlogImprovement)}% grew`}
+                    </div>
+                  )}
+                </div>
+              );
+            })}
+          </div>
+        </div>
+      )}
+      {/* --- KPI Row --- */}
+      <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
+        {[
+          { label: "Total Backlog", value: fmt2(kpis.backlog), delta: kpis.backlogDelta, accent: "rose", icon: "inbox" },
+          { label: "SLA Breaches", value: fmt2(kpis.slaBreaches), delta: kpis.slaDelta, accent: "amber", icon: "timer_off" },
+          { label: "Fairness Gap", value: `${(Number(kpis.fairness) * 100).toFixed(1)}%`, delta: kpis.fairnessDelta, accent: "emerald", icon: "balance" },
+        ].map((kpi) => {
+          const delta = Number(kpi.delta ?? 0);
+          const trend = delta < 0 ? "down" : delta > 0 ? "up" : "stable";
+          const trendIcon = trend === "up" ? "north" : trend === "down" ? "south" : "horizontal_rule";
+          const badgeClass =
+            trend === "down"
+              ? "bg-emerald-500/20 text-emerald-400"
+              : trend === "up"
+                ? "bg-rose-500/20 text-rose-400"
+                : "bg-slate-500/20 text-slate-300";
+          return (
+            <div key={kpi.label} className="bg-slate-900/70 border border-white/5 backdrop-blur-md p-5 rounded-xl relative overflow-hidden group hover:border-white/10 transition-colors">
+              <div className={`absolute -right-3 -top-3 w-20 h-20 bg-${kpi.accent}-500/10 rounded-full blur-2xl`} />
+              <div className="flex justify-between items-start mb-2">
+                <div className="flex items-center gap-1.5">
+                  <span className={`material-symbols-outlined text-${kpi.accent}-400 text-base`}>{kpi.icon}</span>
+                  <span className="text-xs font-semibold tracking-widest text-slate-400 uppercase">{kpi.label}</span>
+                </div>
+                <span className={`text-xs font-bold px-2 py-0.5 rounded-full ${badgeClass} inline-flex items-center gap-1`}>
+                  <span className="material-symbols-outlined text-[14px] leading-none">{trendIcon}</span>
+                  <span>{fmtDelta(delta)}</span>
+                </span>
+              </div>
+              <div className="text-4xl font-black text-white">{kpi.value}</div>
+              <div className="text-xs text-slate-500 mt-1">
+                {trend === "down" ? "Trend improving" : trend === "stable" ? "Stable" : "Trend worsening"}
+              </div>
+            </div>
+          );
+        })}
+      </div>
+      {/* --- Story Timeline + Queue Monitors --- */}
+      <div className="grid grid-cols-1 lg:grid-cols-12 gap-4">
+        {/* Story Timeline */}
+        <div className="lg:col-span-7 bg-slate-900/70 border border-white/5 backdrop-blur-md rounded-xl p-6 min-h-[420px]">
+          <h2 className="text-lg font-bold text-white mb-5 flex items-center gap-2">
+            <span className="material-symbols-outlined text-indigo-400">auto_stories</span> Story Timeline
+            {timeline.length > 1 && (
+              <span className="ml-auto text-xs text-slate-500">{timeline.filter(e => e.key).length} key moments</span>
+            )}
+          </h2>
+          {timeline.length === 0 ? (
+            <div className="flex flex-col items-center justify-center h-64 text-slate-500">
+              <span className="material-symbols-outlined text-5xl mb-3 opacity-30">play_circle</span>
+              <p className="text-center text-sm">
+                Select a scenario, set the number of steps, and press{" "}
+                <strong className="text-white">Start Auto-Resolution</strong> to begin.
+              </p>
+            </div>
+          ) : (
+            <div className="relative pl-8 space-y-4 before:absolute before:inset-0 before:ml-[1.125rem] before:-translate-x-px before:h-full before:w-0.5 before:bg-gradient-to-b before:from-indigo-500/60 before:to-transparent max-h-[520px] overflow-y-auto pr-1">
+              {annotatedTimeline.map((ev, idx) => {
+                // Phase divider
+                if (ev._divider) {
+                  const ph = PHASE_LABELS[ev.phase];
+                  return (
+                    <div key={ev.key} className="relative flex items-center gap-3 mt-6 mb-2">
+                      <div className={`absolute left-[-2.2rem] w-9 h-9 bg-slate-900 rounded-full border border-${ph.color}-500/40 flex items-center justify-center z-10`}>
+                        <span className={`material-symbols-outlined text-[14px] text-${ph.color}-400`}>{ph.icon}</span>
+                      </div>
+                      <div className={`ml-2 text-xs font-black text-${ph.color}-400 tracking-widest uppercase border-b border-${ph.color}-500/20 pb-1 flex-1`}>
+                        {ph.label}
+                        <span className="font-normal text-slate-500 normal-case tracking-normal ml-2">- {ph.desc}</span>
+                      </div>
+                    </div>
+                  );
+                }
+                // Phase summary block
+                if (ev._summary) {
+                  const drop = Math.abs(ev.stats.drop || 0);
+                  const isDrop = (ev.stats.drop || 0) < 0;
+                  return (
+                    <div key={ev.key} className="relative pl-12 py-2">
+                      <div className="bg-slate-800/40 rounded-lg p-3 inline-flex items-center gap-6 border border-white/5">
+                        <div>
+                          <span className="text-[10px] text-slate-500 uppercase tracking-widest block mb-0.5">Phase Backlog Move</span>
+                          <span className={`text-sm font-black ${isDrop ? "text-emerald-400" : ev.stats.drop > 0 ? "text-rose-400" : "text-slate-300"}`}>
+                            {isDrop ? "down " : ev.stats.drop > 0 ? "up " : ""}{drop} cases
+                          </span>
+                        </div>
+                        <div>
+                          <span className="text-[10px] text-slate-500 uppercase tracking-widest block mb-0.5">Key Decisions</span>
+                          <span className="text-sm font-black text-indigo-300">{ev.stats.keys}</span>
+                        </div>
+                      </div>
+                    </div>
+                  );
+                }
+                const color = ev.type === "error" ? "rose" : ev.type === "warning" ? "amber" : ev.type === "success" ? "emerald" : "indigo";
+                return (
+                  <div
+                    key={`${ev.id}-${idx}`}
+                    className="relative group"
+                    style={{ animation: `fadeUp 0.25s ease-out ${Math.min(idx, 10) * 0.03}s both` }}
+                  >
+                    <div className={`absolute left-[-2.2rem] w-9 h-9 bg-slate-900 rounded-full border border-${color}-500/40 flex items-center justify-center z-10 group-hover:border-${color}-400 transition-colors ${ev.key ? `shadow-[0_0_10px_rgba(99,102,241,0.3)]` : ""}`}>
+                      <span className={`material-symbols-outlined text-[16px] text-${color}-400`}>{ev.icon}</span>
+                    </div>
+                    <div className={`bg-slate-800/50 border rounded-lg p-3 hover:bg-white/5 transition-colors ${ev.key ? `border-${color}-500/30 shadow-[0_0_12px_rgba(99,102,241,0.08)]` : "border-white/5"}`}>
+                      <div className="flex justify-between items-start gap-3">
+                        <div className="flex-1 min-w-0">
+                          <div className="flex items-center gap-2 mb-0.5">
+                            <span className={`text-xs font-bold text-${color}-400`}>{ev.time}</span>
+                            {ev.outcomeLabel && (
+                              <span
+                                className={`text-[10px] font-bold px-1.5 py-0.5 rounded ${
+                                  ev.outcomeType === "success"
+                                    ? "bg-emerald-500/20 text-emerald-300"
+                                    : ev.outcomeType === "warning"
+                                      ? "bg-amber-500/20 text-amber-300"
+                                      : "bg-slate-600/20 text-slate-300"
+                                }`}
+                              >
+                                {ev.outcomeLabel}
+                              </span>
+                            )}
+                            {ev.key && (
+                              <span className="text-[10px] font-black bg-indigo-500/20 text-indigo-300 px-1.5 py-0.5 rounded tracking-wider">
+                                KEY MOMENT
+                              </span>
+                            )}
+                            {ev._count > 1 && (
+                              <span className="text-[10px] font-bold bg-slate-700 text-slate-400 px-1.5 py-0.5 rounded">
+                                x{ev._count}
+                              </span>
+                            )}
+                          </div>
+                          <h4 className="font-bold text-white text-sm flex items-center gap-1.5">
+                            {ev.title}
+                            {ev.isHugeImpact && <span title="Massive Improvement" className="text-sm">High Impact</span>}
+                            {ev.isHighReward && <span title="High Reward Action" className="text-sm">Hot</span>}
+                          </h4>
+                          <p className="text-xs text-slate-400 mt-1 leading-relaxed">{ev.desc}</p>
+                          {ev.reason && (
+                            <div className="mt-2 bg-indigo-500/10 border-l-2 border-indigo-500/30 pl-2 py-1 text-xs text-indigo-200/80">
+                              <span className="font-semibold text-indigo-300">Agent Reasoning:</span> {ev.reason}
+                            </div>
+                          )}
+                        </div>
+                        {ev.impact !== 0 && (
+                          <div className={`shrink-0 bg-${color}-500/10 border border-${color}-500/20 px-2 py-1 rounded text-xs font-bold text-${color}-400 whitespace-nowrap`}>
+                            {Number(ev.impact) >= 0 ? "+" : ""}{Number(ev.impact).toFixed(2)}
+                          </div>
+                        )}
+                      </div>
+                    </div>
+                  </div>
+                );
+              })}
+            </div>
+          )}
+        </div>
+        {/* Live Queue Monitors */}
+        <div className="lg:col-span-5 bg-slate-900/70 border border-white/5 backdrop-blur-md rounded-xl p-6">
+          <h2 className="text-lg font-bold text-white mb-5 flex items-center gap-2">
+            <span className="material-symbols-outlined text-emerald-400">monitor_heart</span> Live Queue Monitors
+          </h2>
+          {resources.length === 0 ? (
+            <div className="flex flex-col items-center justify-center h-48 text-slate-500">
+              <span className="material-symbols-outlined text-4xl mb-2 opacity-30">sensors</span>
+              <p className="text-sm">Awaiting live telemetry...</p>
+            </div>
+          ) : (
+            <div className="space-y-5">
+              {resources.map((res, i) => {
+                const color = res.percentage > 85 ? "rose" : res.percentage > 60 ? "amber" : "emerald";
+                const tone = color === "rose"
+                  ? {
+                      text: "text-rose-400",
+                      bar: "bg-rose-500",
+                    }
+                  : color === "amber"
+                    ? {
+                        text: "text-amber-400",
+                        bar: "bg-amber-500",
+                      }
+                    : {
+                        text: "text-emerald-400",
+                        bar: "bg-emerald-500",
+                      };
+                return (
+                  <div key={res.name || i}>
+                    <div className="flex justify-between mb-1.5">
+                      <span className="text-sm font-semibold text-white">{res.name}</span>
+                      <div className="flex items-center gap-2">
+                        <span className={`text-xs font-bold ${tone.text}`}>{res.activeCases} active</span>
+                        {res.percentage > 85 && (
+                          <span className="text-[10px] font-black text-rose-400 bg-rose-500/10 px-1.5 rounded">OVERLOADED</span>
+                        )}
+                      </div>
+                    </div>
+                    <div className="w-full bg-slate-800 rounded-full h-2.5 overflow-hidden">
+                      <div
+                        className={`${tone.bar} h-full rounded-full transition-all duration-700 ease-in-out`}
+                        style={{ width: `${res.percentage}%` }}
+                      />
+                    </div>
+                  </div>
+                );
+              })}
+            </div>
+          )}
+          {/* Reward cumulative tracker - shown after first step */}
+          {currentStep > 0 && (
+            <div className="mt-6 pt-5 border-t border-white/5">
+              <div className="text-xs font-semibold text-slate-400 mb-3 uppercase tracking-widest">Impact Summary</div>
+              <div className="grid grid-cols-2 gap-3">
+                <div className="bg-slate-800/60 rounded-lg p-3 text-center">
+                  <div className="text-xs text-slate-400 mb-1">Steps Elapsed</div>
+                  <div className="text-xl font-black text-white">{currentStep}</div>
+                </div>
+                <div className="bg-slate-800/60 rounded-lg p-3 text-center">
+                  <div className="text-xs text-slate-400 mb-1">Key Moments</div>
+                  <div className="text-xl font-black text-indigo-300">
+                    {timeline.filter((e) => e.key).length}
+                  </div>
+                </div>
+              </div>
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
+// --- Resources Tab ------------------------------------------------------------
+function BenchmarkResults({ results }) {
+  const COLORS = { backlog_clearance: "#6366f1", urgent_first: "#10b981", oldest_first: "#f59e0b" };
+  const sorted = [...results.agent_results].sort((a, b) => b.average_score - a.average_score);
+  const winner = sorted[0];
+  const maxScore = Math.max(...results.agent_results.map((a) => a.average_score), 0.001);
+  const chartH = 140;
+  return (
+    <div className="space-y-5">
+      {/* Winner callout */}
+      <div className="bg-emerald-500/10 border border-emerald-500/30 rounded-xl p-5 flex flex-wrap items-center justify-between gap-4">
+        <div className="flex items-center gap-4">
+          <span className="material-symbols-outlined text-emerald-400 text-4xl">emoji_events</span>
+          <div>
+            <div className="text-xs font-black text-emerald-400 tracking-widest mb-1">BEST PERFORMING POLICY</div>
+            <div className="text-xl font-black text-white capitalize">{winner.agent_policy.replace(/_/g, " ")}</div>
+            <div className="text-sm text-slate-400 mt-0.5">
+              Avg score{" "}<span className="text-emerald-400 font-bold">{(winner.average_score * 100).toFixed(1)}%</span>
+              {" | "}Range {(winner.min_score * 100).toFixed(0)}%-{(winner.max_score * 100).toFixed(0)}%
+            </div>
+          </div>
+        </div>
+        <div className="bg-emerald-500/10 border border-emerald-500/20 px-3 py-2 rounded-lg max-w-sm hidden lg:block">
+          <div className="text-xs font-bold text-emerald-400 mb-1 flex items-center gap-1">
+            <span className="material-symbols-outlined text-[14px]">psychology</span> Agent Intelligence
+          </div>
+          <p className="text-[10px] text-emerald-200/80 leading-relaxed font-medium">
+            This policy performed best by maintaining fewer SLA breaches relative to its peers while securing steady backlog reduction across critical queues.
+          </p>
+        </div>
+      </div>
+      {/* Bar chart */}
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h3 className="text-sm font-bold text-white mb-6">Average Grader Score by Policy</h3>
+        <div className="flex items-end justify-center gap-10">
+          {sorted.map((agent) => {
+            const pct = agent.average_score / maxScore;
+            const barH = Math.max(Math.round(pct * chartH), 6);
+            const color = COLORS[agent.agent_policy] || "#6366f1";
+            const isWinner = agent.agent_policy === winner.agent_policy;
+            return (
+              <div key={agent.agent_policy} className="flex flex-col items-center gap-2 w-28">
+                <div className="text-base font-black text-white">{(agent.average_score * 100).toFixed(1)}%</div>
+                <div className="relative w-full flex items-end justify-center" style={{ height: chartH }}>
+                  {isWinner && <div className="absolute -top-5 left-1/2 -translate-x-1/2 text-lg text-emerald-400">Top</div>}
+                  <div
+                    className="w-full rounded-t-lg transition-all duration-700"
+                    style={{
+                      height: barH,
+                      background: `linear-gradient(to top, ${color}88, ${color})`,
+                      boxShadow: isWinner ? `0 0 24px ${color}60` : "none",
+                    }}
+                  />
+                </div>
+                <div className="text-xs font-semibold text-center leading-tight" style={{ color }}>
+                  {agent.agent_policy.replace(/_/g, " ")}
+                </div>
+                <div className="text-xs text-slate-500">{agent.runs.length} runs</div>
+              </div>
+            );
+          })}
+        </div>
+      </div>
+      {/* Multi-metric comparison bars */}
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h3 className="text-sm font-bold text-white mb-5">Metric Comparison</h3>
+        <div className="space-y-6">
+          {[
+            {
+              label: "Score (higher is better)",
+              vals: results.agent_results.map((a) => ({ key: a.agent_policy, v: a.average_score, display: `${(a.average_score * 100).toFixed(1)}%` })),
+              higherGood: true,
+            },
+            {
+              label: "Avg Completed Cases (higher is better)",
+              vals: results.agent_results.map((a) => {
+                const avg = a.runs.reduce((s, r) => s + (r.completed ?? 0), 0) / Math.max(a.runs.length, 1);
+                return { key: a.agent_policy, v: avg, display: avg.toFixed(1) };
+              }),
+              higherGood: true,
+            },
+            {
+              label: "Avg Remaining Backlog (lower is better)",
+              vals: results.agent_results.map((a) => {
+                const avg = a.runs.reduce((s, r) => s + (r.backlog ?? 0), 0) / Math.max(a.runs.length, 1);
+                return { key: a.agent_policy, v: avg, display: avg.toFixed(1) };
+              }),
+              higherGood: false,
+            },
+          ].map(({ label, vals, higherGood }) => {
+            const maxVal = Math.max(...vals.map((v) => v.v), 0.001);
+            const best = higherGood
+              ? vals.reduce((a, b) => (b.v > a.v ? b : a))
+              : vals.reduce((a, b) => (b.v < a.v ? b : a));
+            return (
+              <div key={label}>
+                <div className="text-xs font-bold text-slate-400 mb-3">{label}</div>
+                <div className="space-y-2">
+                  {vals.map((v) => {
+                    const pct = Math.round((v.v / maxVal) * 100);
+                    const color = (COLORS)[v.key] || "#6366f1";
+                    return (
+                      <div key={v.key} className="flex items-center gap-3">
+                        <div className="w-36 text-xs text-slate-300 capitalize shrink-0 flex items-center gap-1">
+                          {v.key.replace(/_/g, " ")}
+                          {v.key === best.key && <span className="text-[10px] font-black text-emerald-400">Top</span>}
+                        </div>
+                        <div className="flex-1 bg-slate-800 rounded-full h-2.5 overflow-hidden">
+                          <div className="h-2.5 rounded-full transition-all duration-700" style={{ width: `${pct}%`, backgroundColor: color }} />
+                        </div>
+                        <div className="w-14 text-right text-xs font-bold text-white">{v.display}</div>
+                      </div>
+                    );
+                  })}
+                </div>
+              </div>
+            );
+          })}
+        </div>
+      </div>
+      {/* Raw episode table */}
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h3 className="text-sm font-bold text-white mb-4">All Episodes - Raw Data</h3>
+        <div className="overflow-x-auto">
+          <table className="w-full text-xs text-left">
+            <thead>
+              <tr className="text-slate-400 border-b border-white/5">
+                <th className="pb-2 pr-4">Policy</th>
+                <th className="pb-2 pr-4">Run #</th>
+                <th className="pb-2 pr-4">Score</th>
+                <th className="pb-2 pr-4">Reward</th>
+                <th className="pb-2 pr-4">Completed</th>
+                <th className="pb-2 pr-4">Backlog</th>
+                <th className="pb-2">Steps</th>
+              </tr>
+            </thead>
+            <tbody>
+              {results.agent_results.flatMap((agent) =>
+                agent.runs.map((run) => (
+                  <tr key={`${agent.agent_policy}-${run.run_index}`} className="border-b border-white/5 hover:bg-white/5">
+                    <td className="py-2 pr-4 font-medium" style={{ color: (COLORS)[agent.agent_policy] || "#6366f1" }}>
+                      {agent.agent_policy.replace(/_/g, " ")}
+                    </td>
+                    <td className="py-2 pr-4 text-slate-400">#{run.run_index}</td>
+                    <td className="py-2 pr-4 font-bold text-white">{(run.score * 100).toFixed(1)}%</td>
+                    <td className="py-2 pr-4 text-amber-400">{run.reward_sum?.toFixed(2) ?? "-"}</td>
+                    <td className="py-2 pr-4 text-emerald-400">{run.completed ?? "-"}</td>
+                    <td className="py-2 pr-4 text-rose-400">{run.backlog ?? "-"}</td>
+                    <td className="py-2 text-slate-400">{run.steps ?? "-"}</td>
+                  </tr>
+                ))
+              )}
+            </tbody>
+          </table>
+        </div>
+      </div>
+    </div>
+  );
+}
+function ResourcesTab({ tasks }) {
+  const [benchTask, setBenchTask] = useState(tasks[0] || "district_backlog_easy");
+  const [loading, setLoading] = useState(false);
+  const [results, setResults] = useState(null);
+  const [error, setError] = useState("");
+  const runBenchmark = async () => {
+    setLoading(true);
+    setError("");
+    setResults(null);
+    try {
+      const data = await api("/benchmark", {
+        method: "POST",
+        body: JSON.stringify({
+          task_id: benchTask,
+          agent_policies: ["backlog_clearance", "urgent_first", "oldest_first"],
+          runs: 3,
+          max_steps: 60,
+        }),
+      });
+      setResults(data);
+    } catch (e) {
+      setError(e.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+  return (
+    <div className="space-y-6">
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h2 className="text-lg font-bold text-white mb-1 flex items-center gap-2">
+          <span className="material-symbols-outlined text-violet-400">leaderboard</span> Policy Benchmark Comparison
+        </h2>
+        <p className="text-sm text-slate-400 mb-5">
+          Run all three baseline policies on the same scenario and compare their grader scores,
+          completed cases, and remaining backlogs side-by-side with visual charts.
+        </p>
+        <div className="flex flex-wrap gap-3 items-center">
+          <select
+            value={benchTask}
+            onChange={(e) => setBenchTask(e.target.value)}
+            className="appearance-none bg-slate-800 border border-white/10 text-sm font-medium px-3 py-1.5 rounded-lg text-indigo-300 focus:outline-none focus:border-indigo-500"
+          >
+            {tasks.map((t) => (
+              <option key={t} value={t} className="bg-slate-900">
+                {t.replace(/_/g, " ").toUpperCase()}
+              </option>
+            ))}
+          </select>
+          <button
+            onClick={runBenchmark}
+            disabled={loading}
+            className="bg-violet-600 hover:bg-violet-500 text-white text-sm font-bold px-5 py-2 rounded-lg transition-all disabled:opacity-50"
+          >
+            {loading ? "Simulating 9 episodes..." : "Run Benchmark"}
+          </button>
+        </div>
+      </div>
+      {error && (
+        <div className="bg-rose-500/10 border border-rose-500/30 rounded-xl p-4 text-rose-400 text-sm">
+          {error}
+        </div>
+      )}
+      {loading && (
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-10 flex flex-col items-center gap-4">
+          <div className="w-10 h-10 border-4 border-indigo-500 border-t-transparent rounded-full animate-spin" />
+          <p className="text-slate-400 text-sm">Running 3 policies x 3 episodes each - takes ~20 seconds.</p>
+        </div>
+      )}
+      {results && <BenchmarkResults results={results} />}
+    </div>
+  );
+}
+// --- Library Tab --------------------------------------------------------------
+function LibraryTab({ tasks }) {
+  const [compliance, setCompliance] = useState(null);
+  const [workflows, setWorkflows] = useState(null);
+  const [selected, setSelected] = useState(null);
+  useEffect(() => {
+    api("/openenv_compliance").then(setCompliance).catch(() => {});
+    api("/workflows/components").then(setWorkflows).catch(() => {});
+  }, []);
+  const taskDetails = {
+    district_backlog_easy: { diff: "Easy", desc: "Single-service district queue focused on income certificate flow.", services: 1 },
+    mixed_urgency_medium: { diff: "Medium", desc: "Income, land, passport, driving license, and Aadhaar workloads with mixed urgency.", services: 5 },
+    cross_department_hard: { diff: "Hard", desc: "Five-service crisis mode with high arrivals, fairness pressure, and event shocks.", services: 5 },
+  };
+  const systemTabGuide = [
+    {
+      id: "timeline",
+      title: "Simulation (Timeline Tab)",
+      icon: "timeline",
+      summary: "Runs live step-by-step environment simulation and shows queue movement, KPI changes, and decision timeline in real time.",
+      userFlow: "Choose scenario, steps, and model/policy, then start auto-resolution.",
+      outputs: "Live backlog, SLA, fairness, key moments, queue pressure bars, and impact summary.",
+      endpoints: ["/simulation/live/start", "/simulation/live/step", "/simulation/live/{run_id}/stop", "/tasks", "/agents", "/rl_models", "/rl/models"],
+    },
+    {
+      id: "training",
+      title: "Training Tab",
+      icon: "fitness_center",
+      summary: "Controls RL training jobs and tracks how the policy improves over timesteps.",
+      userFlow: "Start/stop a training job and monitor live checkpoints and job history.",
+      outputs: "Active job state, progress, reward/score checkpoints, sequential narrative feed, and OpenEnv contract replay results.",
+      endpoints: ["/training_jobs", "/training_jobs/list", "/training_jobs/{job_id}", "/training_jobs/{job_id}/stop", "/reset", "/step", "/state", "/grade"],
+    },
+    {
+      id: "analytics",
+      title: "Analytics Tab",
+      icon: "analytics",
+      summary: "Shows endpoint-fed system analytics from historical simulation, jobs, models, sessions, and compliance health.",
+      userFlow: "Open the tab; metrics auto-refresh from backend every few seconds.",
+      outputs: "Task distributions, mode splits, training status mix, endpoint health, model inventory, and run history tables.",
+      endpoints: ["/history/simulations", "/history/comparisons", "/training_jobs", "/rl_models", "/rl/models", "/tasks", "/agents", "/sessions", "/actions/schema", "/openenv_compliance", "/workflows/components"],
+    },
+    {
+      id: "resources",
+      title: "Resources Tab (Benchmark)",
+      icon: "leaderboard",
+      summary: "Compares baseline policies on the same task to identify which strategy performs best.",
+      userFlow: "Select a scenario and run benchmark.",
+      outputs: "Winner policy card, score bars, metric comparison bars, and raw run-level benchmark table.",
+      endpoints: ["/compare_agents"],
+    },
+    {
+      id: "library",
+      title: "Library Tab",
+      icon: "menu_book",
+      summary: "Acts as the complete system overview and reference center for tasks, compliance, and workflow availability.",
+      userFlow: "Explore scenarios, inspect OpenEnv checks, and verify available workflow components.",
+      outputs: "Task cards with difficulty/service counts, compliance checklist, and component readiness matrix.",
+      endpoints: ["/tasks", "/openenv_compliance", "/workflows/components"],
+    },
+  ];
+  return (
+    <div className="space-y-6">
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h2 className="text-lg font-bold text-white mb-2 flex items-center gap-2">
+          <span className="material-symbols-outlined text-violet-400">hub</span> Complete System Overview
+        </h2>
+        <p className="text-sm text-slate-400 mb-5">
+          This section explains how each product tab works, what backend APIs power it, and what outputs users can expect.
+          Use it as a quick guide for judges and reviewers.
+        </p>
+        <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
+          {systemTabGuide.map((tab) => (
+            <div key={tab.id} className="bg-slate-800/50 border border-white/5 rounded-xl p-4">
+              <div className="flex items-center gap-2 mb-2">
+                <span className="material-symbols-outlined text-indigo-300">{tab.icon}</span>
+                <h3 className="text-sm font-bold text-white">{tab.title}</h3>
+              </div>
+              <p className="text-xs text-slate-300 leading-relaxed mb-2">{tab.summary}</p>
+              <div className="text-xs text-slate-400 mb-1">
+                <span className="text-slate-300 font-semibold">User flow:</span> {tab.userFlow}
+              </div>
+              <div className="text-xs text-slate-400 mb-3">
+                <span className="text-slate-300 font-semibold">Outputs:</span> {tab.outputs}
+              </div>
+              <div className="flex flex-wrap gap-1.5">
+                {tab.endpoints.map((ep) => (
+                  <code key={ep} className="text-[10px] text-cyan-300 bg-slate-900 px-1.5 py-0.5 rounded border border-cyan-500/20">
+                    {ep}
+                  </code>
+                ))}
+              </div>
+            </div>
+          ))}
+        </div>
+      </div>
+      <div>
+        <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+          <span className="material-symbols-outlined text-amber-400">menu_book</span> Scenario Library
+        </h2>
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
+          {tasks.map((t) => {
+            const info = taskDetails[t] || { diff: "-", desc: "Custom scenario.", services: "-" };
+            const diffColor = info.diff === "Easy" ? "emerald" : info.diff === "Medium" ? "amber" : "rose";
+            const isSelected = selected === t;
+            return (
+              <button
+                key={t}
+                onClick={() => setSelected(isSelected ? null : t)}
+                className={`text-left bg-slate-900/70 border rounded-xl p-5 transition-all hover:border-indigo-500/40 ${isSelected ? "border-indigo-500/60 shadow-[0_0_20px_rgba(99,102,241,0.15)]" : "border-white/5"}`}
+              >
+                <div className="flex justify-between items-start mb-3">
+                  <div className={`text-xs font-black tracking-widest text-${diffColor}-400`}>{info.diff.toUpperCase()}</div>
+                  <span className="material-symbols-outlined text-slate-500 text-lg">{isSelected ? "expand_less" : "expand_more"}</span>
+                </div>
+                <h3 className="font-bold text-white text-sm mb-2">{t.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase())}</h3>
+                <p className="text-xs text-slate-400 leading-relaxed">{info.desc}</p>
+                {isSelected && (
+                  <div className="mt-4 pt-4 border-t border-white/5 space-y-2">
+                    <div className="flex justify-between text-xs"><span className="text-slate-400">Services</span><span className="text-white font-bold">{info.services}</span></div>
+                    <div className="flex justify-between text-xs"><span className="text-slate-400">Difficulty</span><span className="text-white font-bold">{info.diff}</span></div>
+                    <div className="flex justify-between text-xs"><span className="text-slate-400">Task ID</span><span className="text-indigo-300 font-mono">{t}</span></div>
+                  </div>
+                )}
+              </button>
+            );
+          })}
+        </div>
+      </div>
+      {compliance && (
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+          <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+            <span className="material-symbols-outlined text-indigo-400">verified</span> OpenEnv Compliance Status
+          </h2>
+          <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
+            {compliance.items?.map((item) => (
+              <div key={item.key} className={`flex items-start gap-3 bg-slate-800/50 border rounded-lg p-3 ${item.status === "pass" ? "border-emerald-500/25" : "border-rose-500/25"}`}>
+                <span className={`material-symbols-outlined text-lg shrink-0 ${item.status === "pass" ? "text-emerald-400" : item.status === "fail" ? "text-rose-400" : "text-amber-400"}`}>
+                  {item.status === "pass" ? "check_circle" : item.status === "fail" ? "cancel" : "help"}
+                </span>
+                <div>
+                  <div className="text-sm font-semibold text-white">{item.label}</div>
+                  <div className="text-xs text-slate-400 mt-0.5">{item.detail}</div>
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+      {workflows && (
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+          <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+            <span className="material-symbols-outlined text-cyan-400">account_tree</span> Workflow Components
+          </h2>
+          <div className="space-y-3">
+            {workflows.components?.map((c) => (
+              <div key={c.component} className={`flex items-center gap-4 bg-slate-800/50 border rounded-lg p-3 ${c.available ? "border-emerald-500/20" : "border-slate-700"}`}>
+                <span className={`material-symbols-outlined text-lg ${c.available ? "text-emerald-400" : "text-slate-600"}`}>
+                  {c.available ? "check_box" : "check_box_outline_blank"}
+                </span>
+                <div className="flex-1 min-w-0">
+                  <div className="text-sm font-bold text-white">{c.component}</div>
+                  <div className="text-xs text-slate-400 truncate">{c.description}</div>
+                </div>
+                {c.command && (
+                  <code className="text-xs text-indigo-300 bg-slate-900 px-2 py-1 rounded font-mono hidden lg:block max-w-xs truncate">{c.command}</code>
+                )}
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+// --- Analytics Tab ------------------------------------------------------------
+function AnalyticsTab() {
+  const [history, setHistory] = useState([]);
+  const [rlModels, setRlModels] = useState([]);
+  const [rlModelsV2, setRlModelsV2] = useState([]);
+  const [trainingJobs, setTrainingJobs] = useState([]);
+  const [tasksList, setTasksList] = useState([]);
+  const [agentsList, setAgentsList] = useState([]);
+  const [sessionsInfo, setSessionsInfo] = useState({ active_sessions: 0, session_ids: [] });
+  const [actionsSchema, setActionsSchema] = useState({});
+  const [complianceInfo, setComplianceInfo] = useState({ items: [] });
+  const [workflowInfo, setWorkflowInfo] = useState({ components: [] });
+  const [comparisonsInfo, setComparisonsInfo] = useState({ comparisons: [] });
+  const [endpointHealth, setEndpointHealth] = useState([]);
+  const [loadingHistory, setLoadingHistory] = useState(true);
+  const [loadingAll, setLoadingAll] = useState(true);
+  useEffect(() => {
+    let cancelled = false;
+    const load = async () => {
+      setLoadingHistory(true);
+      setLoadingAll(true);
+      try {
+        const [
+          historyRes,
+          rlRes,
+          rlResV2,
+          jobsRes,
+          tasksRes,
+          agentsRes,
+          sessionsRes,
+          actionsRes,
+          complianceRes,
+          workflowsRes,
+          comparisonsRes,
+        ] = await Promise.allSettled([
+          api("/history/simulations?limit=80"),
+          api("/rl_models"),
+          api("/rl/models"),
+          api("/training_jobs"),
+          api("/tasks"),
+          api("/agents"),
+          api("/sessions"),
+          api("/actions/schema"),
+          api("/openenv_compliance"),
+          api("/workflows/components"),
+          api("/history/comparisons?limit=30"),
+        ]);
+        if (cancelled) return;
+        const checks = [
+          { key: "history", label: "History", ok: historyRes.status === "fulfilled" },
+          { key: "rl_models", label: "RL Models", ok: rlRes.status === "fulfilled" },
+          { key: "rl_models_v2", label: "RL Models V2", ok: rlResV2.status === "fulfilled" },
+          { key: "training_jobs", label: "Training Jobs", ok: jobsRes.status === "fulfilled" },
+          { key: "tasks", label: "Tasks", ok: tasksRes.status === "fulfilled" },
+          { key: "agents", label: "Agents", ok: agentsRes.status === "fulfilled" },
+          { key: "sessions", label: "Sessions", ok: sessionsRes.status === "fulfilled" },
+          { key: "actions_schema", label: "Action Schema", ok: actionsRes.status === "fulfilled" },
+          { key: "openenv_compliance", label: "Compliance", ok: complianceRes.status === "fulfilled" },
+          { key: "workflow_components", label: "Workflow Components", ok: workflowsRes.status === "fulfilled" },
+          { key: "comparison_history", label: "Comparison History", ok: comparisonsRes.status === "fulfilled" },
+        ];
+        setEndpointHealth(checks);
+        setHistory(historyRes.status === "fulfilled" ? (historyRes.value?.runs || []) : []);
+        setRlModels(rlRes.status === "fulfilled" ? (rlRes.value?.models || []) : []);
+        setRlModelsV2(rlResV2.status === "fulfilled" ? (Array.isArray(rlResV2.value) ? rlResV2.value : []) : []);
+        setTrainingJobs(jobsRes.status === "fulfilled" ? (jobsRes.value?.jobs || []) : []);
+        setTasksList(tasksRes.status === "fulfilled" ? (tasksRes.value?.tasks || []) : []);
+        setAgentsList(agentsRes.status === "fulfilled" ? (Array.isArray(agentsRes.value) ? agentsRes.value : []) : []);
+        setSessionsInfo(sessionsRes.status === "fulfilled" ? (sessionsRes.value || { active_sessions: 0, session_ids: [] }) : { active_sessions: 0, session_ids: [] });
+        setActionsSchema(actionsRes.status === "fulfilled" ? (actionsRes.value || {}) : {});
+        setComplianceInfo(complianceRes.status === "fulfilled" ? (complianceRes.value || { items: [] }) : { items: [] });
+        setWorkflowInfo(workflowsRes.status === "fulfilled" ? (workflowsRes.value || { components: [] }) : { components: [] });
+        setComparisonsInfo(comparisonsRes.status === "fulfilled" ? (comparisonsRes.value || { comparisons: [] }) : { comparisons: [] });
+      } finally {
+        if (!cancelled) {
+          setLoadingHistory(false);
+          setLoadingAll(false);
+        }
+      }
+    };
+    load();
+    const timer = setInterval(load, 8000);
+    return () => {
+      cancelled = true;
+      clearInterval(timer);
+    };
+  }, []);
+  const byTask = history.reduce((acc, run) => {
+    const t = run.task_id || "unknown";
+    if (!acc[t]) acc[t] = [];
+    acc[t].push(run);
+    return acc;
+  }, {});
+  const getRunScore = (run) => {
+    const value = run?.score ?? run?.payload?.score;
+    const num = Number(value);
+    return Number.isFinite(num) ? num : null;
+  };
+  const getRunReward = (run) => {
+    const value = run?.total_reward ?? run?.payload?.total_reward;
+    const num = Number(value);
+    return Number.isFinite(num) ? num : null;
+  };
+  const getJobProgress = (job) => {
+    const p = Number(job?.progress);
+    if (Number.isFinite(p)) return Math.max(0, Math.min(1, p));
+    const ts = Number(job?.latest_metrics?.total_timesteps);
+    const total = Number(job?.timesteps);
+    if (Number.isFinite(ts) && Number.isFinite(total) && total > 0) {
+      return Math.max(0, Math.min(1, ts / total));
+    }
+    return 0;
+  };
+  const scoreData = history.map(getRunScore).filter((v) => v != null);
+  const avgScore = scoreData.length ? scoreData.reduce((s, v) => s + v, 0) / scoreData.length : null;
+  const runningJobs = trainingJobs.filter((j) => String(j?.status || "").toLowerCase() === "running").length;
+  const endpointCoverage = endpointHealth.length
+    ? endpointHealth.filter((x) => x.ok).length / endpointHealth.length
+    : null;
+  const timelineTaskRows = Object.entries(byTask)
+    .map(([label, runs]) => ({ label, value: runs.length }))
+    .sort((a, b) => b.value - a.value);
+  const timelineModeRows = Object.entries(
+    history.reduce((acc, run) => {
+      const mode = String(run?.agent_mode || "unknown");
+      acc[mode] = (acc[mode] || 0) + 1;
+      return acc;
+    }, {})
+  ).map(([label, value]) => ({ label, value }));
+  const trainingStatusRows = Object.entries(
+    trainingJobs.reduce((acc, job) => {
+      const status = String(job?.status || "unknown").toLowerCase();
+      acc[status] = (acc[status] || 0) + 1;
+      return acc;
+    }, {})
+  ).map(([label, value]) => ({ label, value }));
+  const trainingPhaseRows = [1, 2].map((phase) => {
+    const rows = trainingJobs.filter((job) => Number(job?.phase || 0) === phase);
+    const avgProgress = rows.length
+      ? rows.reduce((sum, job) => sum + getJobProgress(job), 0) / rows.length
+      : 0;
+    return {
+      label: `Phase ${phase}`,
+      value: Number((avgProgress * 100).toFixed(1)),
+      jobs: rows.length,
+    };
+  });
+  const compliancePass = Array.isArray(complianceInfo?.items)
+    ? complianceInfo.items.filter((x) => x?.status === "pass").length
+    : 0;
+  const complianceFail = Array.isArray(complianceInfo?.items)
+    ? complianceInfo.items.filter((x) => x?.status === "fail").length
+    : 0;
+  const complianceUnknown = Array.isArray(complianceInfo?.items)
+    ? complianceInfo.items.filter((x) => x?.status !== "pass" && x?.status !== "fail").length
+    : 0;
+  const systemMetricRows = [
+    { label: "Tasks", value: tasksList.length },
+    { label: "Agents", value: agentsList.length },
+    { label: "Action Types", value: Number(actionsSchema?.total_action_types || 0) },
+    { label: "Active Sessions", value: Number(sessionsInfo?.active_sessions || 0) },
+    { label: "RL Models V1", value: rlModels.filter((m) => m.exists).length },
+    { label: "RL Models V2", value: rlModelsV2.filter((m) => m.exists).length },
+    {
+      label: "Workflow Components",
+      value: Array.isArray(workflowInfo?.components)
+        ? workflowInfo.components.filter((x) => x?.available).length
+        : 0,
+    },
+    { label: "Comparisons", value: Array.isArray(comparisonsInfo?.comparisons) ? comparisonsInfo.comparisons.length : 0 },
+  ];
+  const buildConicGradient = (rows, palette) => {
+    const total = rows.reduce((sum, row) => sum + Number(row?.value || 0), 0);
+    if (total <= 0) return null;
+    let cursor = 0;
+    const segments = [];
+    rows.forEach((row, idx) => {
+      const value = Number(row?.value || 0);
+      if (value <= 0) return;
+      const delta = (value / total) * 100;
+      const start = cursor;
+      const end = cursor + delta;
+      segments.push(`${palette[idx % palette.length]} ${start.toFixed(2)}% ${end.toFixed(2)}%`);
+      cursor = end;
+    });
+    if (cursor < 100) {
+      segments.push(`#1e293b ${cursor.toFixed(2)}% 100%`);
+    }
+    return `conic-gradient(${segments.join(", ")})`;
+  };
+  const timelineModeGradient = buildConicGradient(
+    timelineModeRows,
+    ["#22d3ee", "#a78bfa", "#f59e0b", "#34d399", "#f472b6"]
+  );
+  const trainingStatusGradient = buildConicGradient(
+    trainingStatusRows,
+    ["#22c55e", "#eab308", "#6366f1", "#ef4444", "#64748b"]
+  );
+  const complianceGradient = buildConicGradient(
+    [
+      { label: "pass", value: compliancePass },
+      { label: "fail", value: complianceFail },
+      { label: "unknown", value: complianceUnknown },
+    ],
+    ["#22c55e", "#ef4444", "#f59e0b"]
+  );
+  const renderBars = (rows, color = "bg-indigo-500") => {
+    const maxVal = Math.max(...rows.map((r) => Number(r?.value || 0)), 1);
+    return (
+      <div className="space-y-2">
+        {rows.map((row) => {
+          const widthPct = Math.max(0, Math.min(100, (Number(row.value || 0) / maxVal) * 100));
+          return (
+            <div key={row.label} className="space-y-1">
+              <div className="flex justify-between text-xs">
+                <span className="text-slate-300">{row.label.replace(/_/g, " ")}</span>
+                <span className="text-white font-semibold">{Number(row.value || 0)}</span>
+              </div>
+              <div className="h-2 w-full rounded bg-slate-800 overflow-hidden">
+                <div className={`h-full ${color}`} style={{ width: `${widthPct}%` }} />
+              </div>
+            </div>
+          );
+        })}
+      </div>
+    );
+  };
+  return (
+    <div className="space-y-6">
+      <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
+        {[
+          { label: "Total Runs", value: history.length, icon: "play_circle", color: "indigo" },
+          { label: "Avg Score", value: avgScore != null ? `${(avgScore * 100).toFixed(1)}%` : "—", icon: "grade", color: "emerald" },
+          { label: "Running Jobs", value: runningJobs, icon: "settings_slow_motion", color: "violet" },
+          { label: "Endpoint Coverage", value: endpointCoverage != null ? `${(endpointCoverage * 100).toFixed(0)}%` : "—", icon: "hub", color: "amber" },
+        ].map((s) => (
+          <div key={s.label} className="bg-slate-900/70 border border-white/5 rounded-xl p-4">
+            <div className="flex items-center gap-2 mb-2">
+              <span className={`material-symbols-outlined text-${s.color}-400`}>{s.icon}</span>
+              <span className="text-xs font-semibold text-slate-400 uppercase tracking-widest">{s.label}</span>
+            </div>
+            <div className="text-3xl font-black text-white">{s.value}</div>
+          </div>
+        ))}
+      </div>
+      {!loadingHistory && (
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-cyan-400">bar_chart</span> Timeline Metric: Runs by Task
+            </h2>
+            {timelineTaskRows.length === 0 ? (
+              <div className="text-xs text-slate-500">No timeline history yet.</div>
+            ) : renderBars(timelineTaskRows, "bg-cyan-500")}
+          </div>
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6 h-full flex flex-col">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-violet-400">pie_chart</span> Timeline Metric: Agent Mode Mix
+            </h2>
+            {timelineModeGradient ? (
+              <div className="grid grid-cols-[120px,1fr] gap-4 items-center">
+                <div className="relative w-[120px] h-[120px] rounded-full" style={{ background: timelineModeGradient }}>
+                  <div className="absolute inset-[18px] rounded-full bg-slate-950/95 border border-white/5" />
+                </div>
+                <div className="space-y-2">
+                  {timelineModeRows.map((row, idx) => (
+                    <div key={row.label} className="flex items-center justify-between text-xs">
+                      <div className="flex items-center gap-2 text-slate-300">
+                        <span
+                          className="inline-block w-2.5 h-2.5 rounded-full"
+                          style={{ backgroundColor: ["#22d3ee", "#a78bfa", "#f59e0b", "#34d399", "#f472b6"][idx % 5] }}
+                        />
+                        {row.label}
+                      </div>
+                      <span className="text-white font-semibold">{row.value}</span>
+                    </div>
+                  ))}
+                </div>
+              </div>
+            ) : (
+              <div className="text-xs text-slate-500">No timeline mode data yet.</div>
+            )}
+          </div>
+        </div>
+      )}
+      {!loadingAll && (
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-emerald-400">stacked_bar_chart</span> Training Metric: Job Status Mix
+            </h2>
+            {trainingStatusGradient ? (
+              <div className="space-y-3">
+                <div className="h-4 rounded bg-slate-800 overflow-hidden">
+                  <div className="h-full" style={{ background: trainingStatusGradient }} />
+                </div>
+                <div className="grid grid-cols-2 gap-2">
+                  {trainingStatusRows.map((row, idx) => (
+                    <div key={row.label} className="flex items-center justify-between text-xs bg-slate-800/40 border border-white/5 rounded px-2 py-1">
+                      <div className="flex items-center gap-2 text-slate-300">
+                        <span
+                          className="inline-block w-2.5 h-2.5 rounded-full"
+                          style={{ backgroundColor: ["#22c55e", "#eab308", "#6366f1", "#ef4444", "#64748b"][idx % 5] }}
+                        />
+                        {row.label}
+                      </div>
+                      <span className="text-white font-semibold">{row.value}</span>
+                    </div>
+                  ))}
+                </div>
+              </div>
+            ) : (
+              <div className="text-xs text-slate-500">No training jobs available yet.</div>
+            )}
+          </div>
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-indigo-400">dataset</span> Training Metric: Phase Progress (%)
+            </h2>
+            <div className="space-y-3">
+              {trainingPhaseRows.map((row) => (
+                <div key={row.label} className="space-y-1">
+                  <div className="flex justify-between text-xs">
+                    <span className="text-slate-300">{row.label}</span>
+                    <span className="text-white font-semibold">{row.value.toFixed(1)}% · {row.jobs} jobs</span>
+                  </div>
+                  <div className="h-2 w-full rounded bg-slate-800 overflow-hidden">
+                    <div className="h-full bg-indigo-500" style={{ width: `${Math.max(0, Math.min(100, row.value))}%` }} />
+                  </div>
+                </div>
+              ))}
+            </div>
+          </div>
+        </div>
+      )}
+      {!loadingAll && (
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6 h-full flex flex-col">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-cyan-400">analytics</span> System Metric: Endpoint-fed Counts
+            </h2>
+            {renderBars(systemMetricRows, "bg-cyan-500")}
+          </div>
+          <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6 h-full flex flex-col">
+            <h2 className="text-base font-bold text-white mb-4 flex items-center gap-2">
+              <span className="material-symbols-outlined text-violet-400">policy</span>
+              System Metric: Compliance + Endpoint Health
+            </h2>
+            <div className="grid grid-cols-[120px,1fr] gap-4 items-center mb-4">
+              <div className="relative w-[120px] h-[120px] rounded-full" style={{ background: complianceGradient || "#1e293b" }}>
+                <div className="absolute inset-[18px] rounded-full bg-slate-950/95 border border-white/5" />
+              </div>
+              <div className="space-y-1 text-xs">
+                <div className="flex justify-between"><span className="text-slate-300">Pass</span><span className="text-emerald-400 font-semibold">{compliancePass}</span></div>
+                <div className="flex justify-between"><span className="text-slate-300">Fail</span><span className="text-rose-400 font-semibold">{complianceFail}</span></div>
+                <div className="flex justify-between"><span className="text-slate-300">Unknown</span><span className="text-amber-300 font-semibold">{complianceUnknown}</span></div>
+              </div>
+            </div>
+            <h3 className="text-xs font-semibold uppercase tracking-widest text-slate-400 mb-2">Endpoint Health</h3>
+            <div className="grid grid-cols-2 gap-2">
+              {endpointHealth.map((row) => (
+                <div
+                  key={row.key}
+                  className={`text-xs border rounded px-2 py-1 ${row.ok ? "border-emerald-500/30 text-emerald-300 bg-emerald-500/10" : "border-rose-500/30 text-rose-300 bg-rose-500/10"}`}
+                >
+                  {row.label}
+                </div>
+              ))}
+            </div>
+          </div>
+        </div>
+      )}
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+          <span className="material-symbols-outlined text-indigo-400">history</span> Simulation Run History
+        </h2>
+        {loadingHistory ? (
+          <div className="flex items-center gap-3 text-slate-400 text-sm p-6">
+            <div className="w-5 h-5 border-2 border-indigo-500 border-t-transparent rounded-full animate-spin" />
+            Loading history…
+          </div>
+        ) : history.length === 0 ? (
+          <p className="text-slate-500 text-sm py-6 text-center">No simulation history yet. Run a simulation on the Timeline tab first.</p>
+        ) : (
+          <div className="overflow-x-auto">
+            <table className="w-full text-xs text-left">
+              <thead>
+                <tr className="text-slate-400 border-b border-white/5">
+                  <th className="pb-2 pr-4">Run ID</th>
+                  <th className="pb-2 pr-4">Task</th>
+                  <th className="pb-2 pr-4">Agent Mode</th>
+                  <th className="pb-2 pr-4">Status</th>
+                  <th className="pb-2 pr-4">Score</th>
+                  <th className="pb-2">Reward</th>
+                </tr>
+              </thead>
+              <tbody>
+                {history.map((run) => {
+                  const score = getRunScore(run);
+                  const reward = getRunReward(run);
+                  const status = run.status || "completed";
+                  const statusColor = status === "completed" ? "emerald" : status === "running" ? "amber" : "slate";
+                  return (
+                    <tr key={run.run_id} className="border-b border-white/5 hover:bg-white/5">
+                      <td className="py-2 pr-4 font-mono text-indigo-300">{run.run_id?.slice(0, 8)}…</td>
+                      <td className="py-2 pr-4 text-white font-medium">{run.task_id?.replace(/_/g, " ")}</td>
+                      <td className="py-2 pr-4 text-slate-400">{run.agent_mode}</td>
+                      <td className="py-2 pr-4">
+                        <span className={`bg-${statusColor}-500/20 text-${statusColor}-400 text-xs font-bold px-2 py-0.5 rounded-full`}>{status}</span>
+                      </td>
+                      <td className="py-2 pr-4 font-bold text-white">{score != null ? `${(score * 100).toFixed(1)}%` : "—"}</td>
+                      <td className="py-2 text-amber-400">{reward != null ? reward.toFixed(2) : "—"}</td>
+                    </tr>
+                  );
+                })}
+              </tbody>
+            </table>
+          </div>
+        )}
+      </div>
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+        <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+          <span className="material-symbols-outlined text-amber-400">model_training</span> Trained RL Model Checkpoints
+        </h2>
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
+          {rlModels.length === 0 && rlModelsV2.length === 0 ? (
+            <p className="text-slate-500 text-sm col-span-3">No trained models found. Train a model via the RL pipeline first.</p>
+          ) : (
+            [...rlModels, ...rlModelsV2.map((m) => ({
+              label: m.model_path ? String(m.model_path).split(/[\\/]/).pop() : "unnamed",
+              path: m.model_path ? `${m.model_path}.zip` : "",
+              exists: Boolean(m.exists),
+              model_type: Number(m.phase) === 2 ? "phase2" : "phase1",
+            }))].map((m) => (
+              <div key={`${m.path}-${m.label}`} className={`border rounded-xl p-4 ${m.exists ? "border-amber-500/30 bg-amber-500/5" : "border-white/5 bg-slate-800/40"}`}>
+                <div className="flex items-center gap-2 mb-2">
+                  <span className={`material-symbols-outlined text-lg ${m.exists ? "text-amber-400" : "text-slate-600"}`}>
+                    {m.exists ? "check_circle" : "radio_button_unchecked"}
+                  </span>
+                  <span className="text-sm font-bold text-white">{m.label}</span>
+                </div>
+                <div className="text-xs text-slate-400 font-mono truncate">{m.path?.split("\\").pop() || m.path?.split("/").pop()}</div>
+                <div className="text-xs text-slate-500 mt-1">Type: {m.model_type}</div>
+                {!m.exists && <div className="text-xs text-slate-600 mt-2">Not yet trained</div>}
+              </div>
+            ))
+          )}
+        </div>
+      </div>
+      {Object.keys(byTask).length > 0 && (
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-6">
+          <h2 className="text-lg font-bold text-white mb-4 flex items-center gap-2">
+            <span className="material-symbols-outlined text-violet-400">bar_chart</span> Score by Scenario
+          </h2>
+          <div className="space-y-4">
+            {Object.entries(byTask).map(([task, runs]) => {
+              const scores = runs.map((r) => r.score ?? r.payload?.score).filter((s) => s != null);
+              const avg = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : null;
+              const avgPct = avg != null ? Number((avg * 100).toFixed(1)) : 0;
+              return (
+                <div key={task} className="space-y-1">
+                  <div className="flex justify-between text-sm">
+                    <span className="font-semibold text-white">{task.replace(/_/g, " ")}</span>
+                    <span className="text-slate-400">{runs.length} runs · avg {avg != null ? `${avgPct}%` : "—"}</span>
+                  </div>
+                  <div className="h-2 w-full rounded bg-slate-800 overflow-hidden">
+                    <div className="h-full bg-violet-500" style={{ width: `${Math.max(0, Math.min(100, avgPct))}%` }} />
+                  </div>
+                </div>
+              );
+            })}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+function TrainingTab({ tasks }) {
+  return <TrainingTabV2 tasks={tasks} />;
+}
+const TABS = [
+  { id: "timeline", label: "Timeline", icon: "timeline" },
+  { id: "training", label: "Training", icon: "fitness_center" },
+  { id: "resources", label: "Resources", icon: "leaderboard" },
+  { id: "library", label: "Overview", icon: "menu_book" },
+  { id: "analytics", label: "Analytics", icon: "analytics" },
+];
+export function Dashboard({ tasks = [] }) {
+  const [activeTab, setActiveTab] = useState("library");
+  return (
+    <div className="font-body-base min-h-screen flex flex-col pt-16 bg-[#0a0b14] text-white">
+      <nav className="fixed top-0 left-0 w-full z-50 flex items-center justify-between px-6 h-16 bg-slate-950/80 backdrop-blur-xl border-b border-white/5 shadow-2xl shadow-indigo-950/50">
+        <div className="flex items-center space-x-8">
+          <span className="text-lg font-black tracking-tighter text-white uppercase">
+            <span className="text-indigo-400">OPEN</span>ENV
+          </span>
+          <div className="hidden md:flex space-x-1">
+            {TABS.map((tab) => (
+              <button
+                key={tab.id}
+                onClick={() => setActiveTab(tab.id)}
+                className={`flex items-center gap-1.5 px-4 py-2 rounded-lg text-sm font-semibold transition-all duration-200 ${
+                  activeTab === tab.id
+                    ? "bg-indigo-600/30 text-indigo-300 border border-indigo-500/30"
+                    : "text-slate-400 hover:text-white hover:bg-white/5"
+                }`}
+              >
+                <span className="material-symbols-outlined text-[16px]">{tab.icon}</span>
+                {tab.label}
+              </button>
+            ))}
+          </div>
+        </div>
+        <div className="flex items-center gap-3">
+          <div className="hidden md:flex items-center gap-1.5 bg-emerald-500/10 border border-emerald-500/20 px-3 py-1.5 rounded-full">
+            <div className="w-2 h-2 bg-emerald-400 rounded-full animate-pulse" />
+            <span className="text-xs font-bold text-emerald-400">LIVE</span>
+          </div>
+          <div className="text-xs text-slate-500 hidden md:block">Gov Workflow RL | OpenEnv v2.0</div>
+        </div>
+      </nav>
+      <main className="flex-1 max-w-7xl w-full mx-auto px-6 py-8">
+        <div className="flex md:hidden mb-6 bg-slate-900 rounded-xl p-1 space-x-1">
+          {TABS.map((tab) => (
+            <button
+              key={tab.id}
+              onClick={() => setActiveTab(tab.id)}
+              className={`flex-1 py-2 text-xs font-bold rounded-lg transition-all ${activeTab === tab.id ? "bg-indigo-600 text-white" : "text-slate-400"}`}
+            >
+              {tab.label}
+            </button>
+          ))}
+        </div>
+        <div className="mb-6">
+          {activeTab === "timeline" && <div><h1 className="text-2xl font-black text-white">Oversight Dashboard</h1><p className="text-sm text-slate-400 mt-1">Watch the AI agent resolve a government workflow backlog in real time - step by step, decision by decision.</p></div>}
+          {activeTab === "training" && <div><h1 className="text-2xl font-black text-white">Reinforcement Learning</h1><p className="text-sm text-slate-400 mt-1">Visualize policy convergence and reward trends as the agent continuously improves.</p></div>}
+          {activeTab === "resources" && <div><h1 className="text-2xl font-black text-white">Policy Benchmark</h1><p className="text-sm text-slate-400 mt-1">Compare all three baseline policies head-to-head on identical scenarios to see which strategy wins.</p></div>}
+          {activeTab === "library" && <div><h1 className="text-2xl font-black text-white">Overview</h1><p className="text-sm text-slate-400 mt-1">Explore system behavior, task configurations, OpenEnv compliance status, and workflow architecture.</p></div>}
+          {activeTab === "analytics" && <div><h1 className="text-2xl font-black text-white">Performance Analytics</h1><p className="text-sm text-slate-400 mt-1">Review historical simulation runs, trained model checkpoints, and reward improvement evidence.</p></div>}
+        </div>
+        {activeTab === "timeline" && <TimelineTab tasks={tasks} />}
+        {activeTab === "training" && <TrainingTab tasks={tasks} />}
+        {activeTab === "resources" && <ResourcesTab tasks={tasks} />}
+        {activeTab === "library" && <LibraryTab tasks={tasks} />}
+        {activeTab === "analytics" && <AnalyticsTab />}
+      </main>
+      <style>{`
+        @keyframes fadeUp {
+          from { opacity: 0; transform: translateY(8px); }
+          to { opacity: 1; transform: translateY(0); }
+        }
+      `}</style>
+    </div>
+  );
+}

frontend/react/src/components/story-ui/TrainingTabV2.jsx ADDED Viewed

	@@ -0,0 +1,1760 @@

+import React, { useEffect, useMemo, useRef, useState } from "react";
+import { api, fmt } from "../../api/client";
+function backendBaseUrl() {
+  if (typeof window === "undefined") return "http://127.0.0.1:7860";
+  const host = window.location.hostname;
+  const port = window.location.port;
+  if ((host === "127.0.0.1" || host === "localhost") && port === "5173") {
+    return `http://${host}:7860`;
+  }
+  return window.location.origin;
+}
+function normalizePath(path) {
+  return String(path || "").replace(/\\/g, "/").toLowerCase();
+}
+function toNumberOrNull(value) {
+  const n = Number(value);
+  return Number.isFinite(n) ? n : null;
+}
+function timestampToDate(value) {
+  const n = Number(value);
+  if (!Number.isFinite(n) || n <= 0) return null;
+  return new Date(n * 1000);
+}
+function metricRowKV(line) {
+  const m = String(line || "").match(/\|\s*([a-zA-Z0-9_ ]+?)\s*\|\s*([-]?\d+(?:\.\d+)?)\s*\|/);
+  if (!m) return null;
+  return {
+    key: String(m[1]).trim().toLowerCase().replace(/\s+/g, "_"),
+    value: parseFloat(m[2]),
+  };
+}
+function parseLogMetrics(lines) {
+  const rewards = [];
+  const scores = [];
+  let latestTableReward = null;
+  let latestTableScore = null;
+  let latestProgressRatio = null;
+  let latestLoggedTimesteps = null;
+  for (const line of lines || []) {
+    if (!line) continue;
+    const ratioMatch = line.match(/(\d[\d,]*)\/(\d[\d,]*)/);
+    if (ratioMatch) {
+      const done = parseInt(String(ratioMatch[1]).replace(/,/g, ""), 10);
+      const total = parseInt(String(ratioMatch[2]).replace(/,/g, ""), 10);
+      if (Number.isFinite(done) && Number.isFinite(total) && total > 0) {
+        latestProgressRatio = done / total;
+      }
+    }
+    const metric = metricRowKV(line);
+    if (metric) {
+      if (metric.key === "ep_rew_mean" || metric.key === "mean_reward") {
+        latestTableReward = metric.value;
+      }
+      if (metric.key === "grader_score" || metric.key === "avg_grader_score") {
+        latestTableScore = metric.value;
+      }
+      if (metric.key === "total_timesteps") {
+        const ts = parseInt(String(metric.value), 10);
+        if (Number.isFinite(ts)) {
+          latestLoggedTimesteps = ts;
+          if (Number.isFinite(latestTableReward)) {
+            rewards.push({ t: ts, value: Number(latestTableReward) });
+            latestTableReward = null;
+          }
+          if (Number.isFinite(latestTableScore)) {
+            scores.push({ t: ts, value: Number(latestTableScore) });
+            latestTableScore = null;
+          }
+        }
+      }
+    }
+    const evalReward = line.match(/Eval\s+num_timesteps=(\d[\d,]*),\s*episode_reward=([-]?\d+(?:\.\d+)?)/i);
+    if (evalReward) {
+      const ts = parseInt(String(evalReward[1]).replace(/,/g, ""), 10);
+      const rew = parseFloat(evalReward[2]);
+      if (Number.isFinite(ts) && Number.isFinite(rew)) {
+        latestLoggedTimesteps = ts;
+        rewards.push({ t: ts, value: rew });
+      }
+    }
+    const evalScore = line.match(/\[Eval\]\s+Average grader score:\s+([0-9.]+)/i);
+    if (evalScore) {
+      const score = parseFloat(evalScore[1]);
+      if (Number.isFinite(score)) {
+        const ts = latestLoggedTimesteps || (scores.length > 0 ? scores[scores.length - 1].t + 1 : 1);
+        scores.push({ t: ts, value: score });
+      }
+    }
+    const bestScore = line.match(/\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)/i);
+    if (bestScore) {
+      const score = parseFloat(bestScore[1]);
+      if (Number.isFinite(score)) {
+        const ts = latestLoggedTimesteps || (scores.length > 0 ? scores[scores.length - 1].t + 1 : 1);
+        scores.push({ t: ts, value: score });
+      }
+    }
+  }
+  const dedupe = (rows) => {
+    const map = new Map();
+    for (const row of rows) {
+      if (!Number.isFinite(row.t) || !Number.isFinite(row.value)) continue;
+      map.set(row.t, row);
+    }
+    return Array.from(map.values()).sort((a, b) => a.t - b.t);
+  };
+  return {
+    rewardPoints: dedupe(rewards),
+    scorePoints: dedupe(scores),
+    logProgressRatio: Number.isFinite(latestProgressRatio) ? latestProgressRatio : null,
+    lastLoggedTimesteps: Number.isFinite(latestLoggedTimesteps) ? latestLoggedTimesteps : null,
+  };
+}
+function seriesSpread(rows) {
+  if (!Array.isArray(rows) || rows.length === 0) return 0;
+  const vals = rows.map((r) => Number(r?.value)).filter(Number.isFinite);
+  if (vals.length === 0) return 0;
+  return Math.max(...vals) - Math.min(...vals);
+}
+function payloadHighlights(payload) {
+  const src = payload && typeof payload === "object" ? payload : {};
+  const keys = [
+    "task_id",
+    "step",
+    "reward",
+    "score",
+    "done",
+    "backlog",
+    "completed",
+    "total_backlog",
+    "total_completed",
+    "total_sla_breaches",
+    "total_valid",
+    "total_actions",
+    "passed",
+    "action_history_len",
+  ];
+  const out = [];
+  for (const key of keys) {
+    if (!(key in src)) continue;
+    const value = src[key];
+    if (value == null) continue;
+    if (typeof value === "number") {
+      out.push([key, Number.isFinite(value) ? Number(value).toFixed(Math.abs(value) >= 10 ? 1 : 3) : String(value)]);
+    } else {
+      out.push([key, String(value)]);
+    }
+  }
+  return out;
+}
+function toPolyline(points, { minY, maxY, width, height }) {
+  if (!points || points.length === 0) return "";
+  return points
+    .map((p, idx) => {
+      const x = (idx / Math.max(points.length - 1, 1)) * width;
+      const y = height - ((p.value - minY) / (maxY - minY || 1)) * height;
+      return `${x},${y}`;
+    })
+    .join(" ");
+}
+function normalizeSeries(points) {
+  const map = new Map();
+  for (const row of points || []) {
+    const t = Number(row?.t);
+    const value = Number(row?.value);
+    if (!Number.isFinite(t) || !Number.isFinite(value)) continue;
+    map.set(t, { t, value });
+  }
+  return Array.from(map.values()).sort((a, b) => a.t - b.t);
+}
+function toPolylineByT(points, { minX, maxX, minY, maxY, width, height }) {
+  if (!points || points.length === 0) return "";
+  const xDen = maxX - minX || 1;
+  const yDen = maxY - minY || 1;
+  return points
+    .map((p) => {
+      const x = ((p.t - minX) / xDen) * width;
+      const y = height - ((p.value - minY) / yDen) * height;
+      return `${x},${y}`;
+    })
+    .join(" ");
+}
+function toStairPolylineByT(points, { minX, maxX, minY, maxY, width, height }) {
+  if (!points || points.length === 0) return "";
+  const xDen = maxX - minX || 1;
+  const yDen = maxY - minY || 1;
+  const xOf = (t) => ((t - minX) / xDen) * width;
+  const yOf = (v) => height - ((v - minY) / yDen) * height;
+  const sorted = normalizeSeries(points);
+  if (sorted.length === 0) return "";
+  const out = [];
+  const first = sorted[0];
+  out.push(`${xOf(minX)},${yOf(first.value)}`);
+  out.push(`${xOf(first.t)},${yOf(first.value)}`);
+  for (let i = 1; i < sorted.length; i += 1) {
+    const prev = sorted[i - 1];
+    const curr = sorted[i];
+    const x = xOf(curr.t);
+    out.push(`${x},${yOf(prev.value)}`);
+    out.push(`${x},${yOf(curr.value)}`);
+  }
+  const last = sorted[sorted.length - 1];
+  out.push(`${xOf(maxX)},${yOf(last.value)}`);
+  return out.join(" ");
+}
+function summarizeLogLine(line) {
+  const raw = String(line || "").trim();
+  if (!raw) return { title: "Info", text: "Empty line", tone: "slate" };
+  const lower = raw.toLowerCase();
+  const evalReward = raw.match(/Eval\s+num_timesteps=(\d[\d,]*),\s*episode_reward=([-]?\d+(?:\.\d+)?)/i);
+  if (evalReward) {
+    const ts = Number(String(evalReward[1]).replace(/,/g, ""));
+    const rew = Number(evalReward[2]);
+    return {
+      title: "Eval Checkpoint",
+      text: `Timesteps ${Number.isFinite(ts) ? ts.toLocaleString() : "-"} | Reward ${Number.isFinite(rew) ? rew.toFixed(2) : "-"}`,
+      tone: "emerald",
+    };
+  }
+  const bestScore = raw.match(/\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)/i);
+  if (bestScore) {
+    const score = Number(bestScore[1]);
+    return {
+      title: "Best Score Improved",
+      text: `Grader score improved to ${Number.isFinite(score) ? score.toFixed(4) : "-"}.`,
+      tone: "emerald",
+    };
+  }
+  const avgScore = raw.match(/\[Eval\]\s+Average grader score:\s+([0-9.]+)/i);
+  if (avgScore) {
+    const score = Number(avgScore[1]);
+    return {
+      title: "Evaluation Summary",
+      text: `Average grader score ${Number.isFinite(score) ? score.toFixed(4) : "-"}.`,
+      tone: "emerald",
+    };
+  }
+  const metric = metricRowKV(raw);
+  if (metric) {
+    const key = String(metric.key || "").replace(/_/g, " ");
+    return {
+      title: "Metric Update",
+      text: `${key}: ${Number.isFinite(metric.value) ? metric.value : "-"}`,
+      tone: "indigo",
+    };
+  }
+  if (lower.includes("traceback") || lower.includes("exception") || lower.includes("error")) {
+    return { title: "Error", text: "A runtime error was reported by the training process. Review backend logs for the exact stack trace.", tone: "rose" };
+  }
+  if (lower.includes("[eval]")) {
+    return { title: "Evaluation", text: "Evaluation cycle completed and scores were updated.", tone: "emerald" };
+  }
+  if (lower.includes("[training_jobs]")) {
+    if (lower.includes("started pid=")) {
+      return { title: "Job Started", text: "Training worker started successfully and began consuming timesteps.", tone: "cyan" };
+    }
+    if (lower.includes("command:")) {
+      return { title: "Runtime Config", text: "Training command was prepared with current phase and environment settings.", tone: "cyan" };
+    }
+    return { title: "System", text: "Background training service published a runtime status update.", tone: "cyan" };
+  }
+  if (lower.includes("[phase 1]")) {
+    return { title: "Phase 1 Update", text: "Phase 1 PPO training is actively optimizing policy behavior.", tone: "indigo" };
+  }
+  if (lower.includes("[phase 2]")) {
+    return { title: "Phase 2 Update", text: "Phase 2 curriculum training is active for harder scenario generalization.", tone: "indigo" };
+  }
+  if (lower.includes("[costmonitor]")) {
+    return { title: "Constraint Monitor", text: "SLA/fairness penalty monitor updated policy constraint feedback.", tone: "amber" };
+  }
+  return { title: "Runtime Update", text: "The trainer reported a new runtime event and internal state progressed.", tone: "amber" };
+}
+function summarizeEnvEvent(event) {
+  const stage = String(event?.stage || "");
+  const payload = event?.payload || {};
+  const task = payload?.task_id ? ` [${payload.task_id}]` : "";
+  if (stage === "reset") {
+    return `Task${task}: session created. Day ${payload?.day ?? "-"}, starting backlog ${payload?.backlog ?? "-"}.`;
+  }
+  if (stage === "state:initial") {
+    return `Task${task}: initial snapshot captured. Completed ${payload?.total_completed ?? "-"}, backlog ${payload?.total_backlog ?? "-"}.`;
+  }
+  if (stage === "action-masks") {
+    return `Task${task}: step ${payload?.step ?? "-"} validated actions (${payload?.total_valid ?? "-"} valid of ${payload?.total_actions ?? "-"}).`;
+  }
+  if (stage === "auto_step") {
+    return `Task${task}: step ${payload?.step ?? "-"} executed. Reward ${fmt(payload?.reward, 3)}, backlog ${payload?.backlog ?? "-"}, completed ${payload?.completed ?? "-"}.`;
+  }
+  if (stage === "state:post_step") {
+    return `Task${task}: post-step state updated. Completed ${payload?.total_completed ?? "-"}, backlog ${payload?.total_backlog ?? "-"}, SLA breaches ${payload?.total_sla_breaches ?? "-"}.`;
+  }
+  if (stage === "grade") {
+    return `Task${task}: grading finished. Score ${fmt(payload?.score, 3)}, pass ${String(payload?.passed)}.`;
+  }
+  if (stage === "session:closed") {
+    return `Task${task}: session closed successfully.`;
+  }
+  if (stage === "task:error") {
+    return `Task${task}: run failed - ${payload?.error || "unknown error"}.`;
+  }
+  return `Task${task}: ${stage}.`;
+}
+function workflowStageLabel(stage) {
+  const key = String(stage || "").toLowerCase();
+  if (key === "reset") return "Reset";
+  if (key === "state:initial") return "Initial State";
+  if (key === "action-masks") return "Action Validation";
+  if (key === "auto_step") return "Auto Step";
+  if (key === "state:post_step") return "Post-Step State";
+  if (key === "grade") return "Grade";
+  if (key === "session:closed") return "Session Closed";
+  if (key === "task:error") return "Task Error";
+  return stage;
+}
+function jsonPretty(value) {
+  try {
+    return JSON.stringify(value, null, 2);
+  } catch (_err) {
+    return String(value);
+  }
+}
+function toneClasses(tone) {
+  if (tone === "rose") return "bg-rose-500/5 border-rose-500/20";
+  if (tone === "emerald") return "bg-emerald-500/5 border-emerald-500/20";
+  if (tone === "indigo") return "bg-indigo-500/5 border-indigo-500/20";
+  if (tone === "cyan") return "bg-cyan-500/5 border-cyan-500/20";
+  if (tone === "amber") return "bg-amber-500/5 border-amber-500/20";
+  return "bg-slate-700/10 border-slate-500/20";
+}
+function statusClasses(status) {
+  const s = String(status || "").toLowerCase();
+  if (s === "running") return "text-emerald-300 bg-emerald-500/10 border-emerald-500/30";
+  if (s === "queued") return "text-amber-300 bg-amber-500/10 border-amber-500/30";
+  if (s === "completed") return "text-indigo-300 bg-indigo-500/10 border-indigo-500/30";
+  if (s === "failed") return "text-rose-300 bg-rose-500/10 border-rose-500/30";
+  if (s === "stopped") return "text-slate-300 bg-slate-600/20 border-slate-500/30";
+  return "text-slate-300 bg-slate-700/20 border-slate-500/30";
+}
+function normalizeJob(raw, index) {
+  const jobId = String(raw?.job_id || raw?.id || `job-${index}`);
+  const status = String(raw?.status || "unknown");
+  const timesteps = Number(raw?.timesteps || 0);
+  const latestMetrics = raw?.latest_metrics && typeof raw.latest_metrics === "object" ? raw.latest_metrics : {};
+  const progressRaw = toNumberOrNull(raw?.progress);
+  const ts = toNumberOrNull(latestMetrics.total_timesteps);
+  const progressFromMetrics =
+    Number.isFinite(ts) && Number.isFinite(timesteps) && timesteps > 0
+      ? Math.max(0, Math.min(1, Number(ts) / Number(timesteps)))
+      : null;
+  const progress = Number.isFinite(progressRaw)
+    ? Math.max(0, Math.min(1, Number(progressRaw)))
+    : Number.isFinite(progressFromMetrics)
+      ? Number(progressFromMetrics)
+      : 0;
+  return {
+    ...raw,
+    job_id: jobId,
+    status,
+    timesteps: Number.isFinite(timesteps) ? timesteps : 0,
+    phase: Number(raw?.phase || 0),
+    n_envs: Number(raw?.n_envs || 0),
+    progress,
+    latest_metrics: latestMetrics,
+    logs_tail: Array.isArray(raw?.logs_tail) ? raw.logs_tail : [],
+    created_at: toNumberOrNull(raw?.created_at),
+    updated_at: toNumberOrNull(raw?.updated_at),
+  };
+}
+export function TrainingTabV2({ tasks = [] }) {
+  const [endpointRows, setEndpointRows] = useState([]);
+  const [endpointError, setEndpointError] = useState("");
+  const [agents, setAgents] = useState([]);
+  const [modelRows, setModelRows] = useState([]);
+  const [modelError, setModelError] = useState("");
+  const [jobs, setJobs] = useState([]);
+  const [jobsLoading, setJobsLoading] = useState(false);
+  const [jobsError, setJobsError] = useState("");
+  const [activeJobId, setActiveJobId] = useState("");
+  const [activeJob, setActiveJob] = useState(null);
+  const [deletingJobId, setDeletingJobId] = useState("");
+  const [jobError, setJobError] = useState("");
+  const [pollIntervalMs, setPollIntervalMs] = useState(1500);
+  const pollFailuresRef = useRef(0);
+  const [rewardPoints, setRewardPoints] = useState([]);
+  const [scorePoints, setScorePoints] = useState([]);
+  const [scoreSignalMeta, setScoreSignalMeta] = useState({
+    key: "grader_score",
+    label: "Grader Score",
+    fallback: false,
+  });
+  const [logLines, setLogLines] = useState([]);
+  const [logProgressRatio, setLogProgressRatio] = useState(null);
+  const [lastLoggedTimesteps, setLastLoggedTimesteps] = useState(null);
+  const [jobForm, setJobForm] = useState({
+    phase: 1,
+    timesteps: 80000,
+    n_envs: 4,
+    seed: "",
+  });
+  const [envTaskId, setEnvTaskId] = useState(tasks[0] || "district_backlog_easy");
+  const [envSeed, setEnvSeed] = useState("");
+  const [envPolicyName, setEnvPolicyName] = useState("backlog_clearance");
+  const [envMaxSteps, setEnvMaxSteps] = useState(6);
+  const [envBusy, setEnvBusy] = useState(false);
+  const [envError, setEnvError] = useState("");
+  const [envFlowEvents, setEnvFlowEvents] = useState([]);
+  const [envFlowSummary, setEnvFlowSummary] = useState(null);
+  const [envFlowRuns, setEnvFlowRuns] = useState([]);
+  const envEventSeqRef = useRef(0);
+  useEffect(() => {
+    if (tasks.length > 0 && !tasks.includes(envTaskId)) {
+      setEnvTaskId(tasks[0]);
+    }
+  }, [tasks, envTaskId]);
+  useEffect(() => {
+    if (agents.length > 0 && !agents.includes(envPolicyName)) {
+      setEnvPolicyName(agents[0]);
+    }
+  }, [agents, envPolicyName]);
+  const refreshEndpointHealth = async () => {
+    setEndpointError("");
+    const directGet = async (path) => {
+      const res = await fetch(`${backendBaseUrl()}${path}`, { method: "GET" });
+      if (!res.ok) {
+        throw new Error(`${path} -> ${res.status}`);
+      }
+      try {
+        return await res.json();
+      } catch (_err) {
+        return { ok: true };
+      }
+    };
+    const checks = [
+      { key: "health", label: "Health", fn: () => api("/health") },
+      { key: "tasks", label: "Tasks", fn: () => api("/tasks") },
+      { key: "agents", label: "Agents", fn: () => api("/agents") },
+      { key: "training_jobs", label: "Training Jobs", fn: () => api("/training_jobs") },
+      { key: "actions_schema", label: "Action Schema", fn: () => api("/actions/schema") },
+      { key: "rl_models", label: "RL Models", fn: () => api("/rl_models") },
+      { key: "rl_models_v2", label: "RL Models V2", fn: () => api("/rl/models") },
+      { key: "v1_agents", label: "V1 Agents", fn: () => directGet("/api/v1/agents") },
+      { key: "v1_rl_models", label: "V1 RL Models", fn: () => directGet("/api/v1/rl_models") },
+    ];
+    const settled = await Promise.allSettled(
+      checks.map(async (chk) => {
+        const start = Date.now();
+        await chk.fn();
+        return { key: chk.key, label: chk.label, ok: true, ms: Date.now() - start };
+      })
+    );
+    const rows = settled.map((res, idx) => {
+      const meta = checks[idx];
+      if (res.status === "fulfilled") return res.value;
+      return {
+        key: meta.key,
+        label: meta.label,
+        ok: false,
+        ms: null,
+        error: res.reason?.message || String(res.reason),
+      };
+    });
+    setEndpointRows(rows);
+    if (rows.some((r) => !r.ok)) {
+      setEndpointError("Some endpoints are down. Retries remain active.");
+    }
+  };
+  const refreshCatalog = async () => {
+    setModelError("");
+    try {
+      const [agentRes, rlV1Res, rlV2Res] = await Promise.allSettled([
+        api("/agents"),
+        api("/rl_models"),
+        api("/rl/models"),
+      ]);
+      if (agentRes.status === "fulfilled") {
+        setAgents(Array.isArray(agentRes.value) ? agentRes.value : []);
+      }
+      const unified = [];
+      if (rlV1Res.status === "fulfilled") {
+        const rows = Array.isArray(rlV1Res.value?.models) ? rlV1Res.value.models : [];
+        for (const row of rows) {
+          unified.push({
+            source: "api/rl_models",
+            label: row.label || row.path || "unnamed",
+            path: row.path || "",
+            exists: Boolean(row.exists),
+            phase: normalizePath(row.path).includes("/phase2/") ? 2 : normalizePath(row.path).includes("/phase1/") ? 1 : 0,
+          });
+        }
+      }
+      if (rlV2Res.status === "fulfilled") {
+        const rows = Array.isArray(rlV2Res.value) ? rlV2Res.value : [];
+        for (const row of rows) {
+          const path = row.model_path
+            ? (String(row.model_path).toLowerCase().endsWith(".zip") ? row.model_path : `${row.model_path}.zip`)
+            : "";
+          unified.push({
+            source: "api/rl/models",
+            label: path.split(/[\\/]/).pop() || row.model_path || "unnamed",
+            path,
+            exists: Boolean(row.exists),
+            phase: Number(row.phase || 0),
+          });
+        }
+      }
+      const dedupe = new Map();
+      for (const row of unified) {
+        const key = normalizePath(row.path);
+        if (!key) continue;
+        if (!dedupe.has(key)) dedupe.set(key, row);
+      }
+      const rows = Array.from(dedupe.values()).sort((a, b) => {
+        if (a.phase !== b.phase) return b.phase - a.phase;
+        return String(a.label).localeCompare(String(b.label));
+      });
+      setModelRows(rows);
+      if (rows.length === 0) {
+        setModelError("No models discovered from dynamic model endpoints.");
+      }
+    } catch (err) {
+      setModelError(err?.message || "Failed to load model registry.");
+    }
+  };
+  const refreshJobs = async () => {
+    setJobsLoading(true);
+    try {
+      const data = await api("/training_jobs");
+      const rowsRaw = Array.isArray(data?.jobs) ? data.jobs : [];
+      const rows = rowsRaw.map(normalizeJob).sort((a, b) => Number(b.created_at || 0) - Number(a.created_at || 0));
+      setJobs(rows);
+      setJobsError("");
+      const running = rows.find((j) => j.status === "running" || j.status === "queued");
+      const current = rows.find((j) => j.job_id === activeJobId);
+      if (running?.job_id) {
+        if (!current || (current.status !== "running" && current.status !== "queued")) {
+          setActiveJobId(running.job_id);
+        }
+      } else if (!activeJobId && rows[0]?.job_id) {
+        setActiveJobId(rows[0].job_id);
+      }
+    } catch (err) {
+      setJobsError(err?.message || "Failed to load training jobs.");
+    } finally {
+      setJobsLoading(false);
+    }
+  };
+  const parseAndSetPoints = (jobSnapshot) => {
+    const lines = Array.isArray(jobSnapshot?.logs_tail) ? jobSnapshot.logs_tail : [];
+    setLogLines(lines);
+    const parsed = parseLogMetrics(lines);
+    setLogProgressRatio(parsed.logProgressRatio);
+    setLastLoggedTimesteps(parsed.lastLoggedTimesteps);
+    const nextRewards = [];
+    const nextScores = [];
+    const nextSignals = {
+      explained_variance: [],
+      ep_len_mean: [],
+      approx_kl: [],
+    };
+    const history = Array.isArray(jobSnapshot?.metric_history) ? jobSnapshot.metric_history : [];
+    for (const row of history) {
+      const t = Number(row?.t ?? row?.total_timesteps ?? NaN);
+      if (!Number.isFinite(t)) continue;
+      const rew = Number(row?.ep_rew_mean ?? row?.mean_reward ?? NaN);
+      const score = Number(row?.grader_score ?? row?.avg_grader_score ?? NaN);
+      if (Number.isFinite(rew)) nextRewards.push({ t, value: rew });
+      if (Number.isFinite(score)) nextScores.push({ t, value: score });
+      for (const key of Object.keys(nextSignals)) {
+        const vv = Number(row?.[key] ?? NaN);
+        if (Number.isFinite(vv)) nextSignals[key].push({ t, value: vv });
+      }
+    }
+    nextRewards.push(...parsed.rewardPoints);
+    nextScores.push(...parsed.scorePoints);
+    const lm = jobSnapshot?.latest_metrics || {};
+    const metricTs = Number(lm.total_timesteps ?? NaN);
+    const metricReward = Number(lm.ep_rew_mean ?? lm.mean_reward ?? NaN);
+    const metricScore = Number(lm.grader_score ?? lm.avg_grader_score ?? NaN);
+    if (Number.isFinite(metricTs) && Number.isFinite(metricReward)) {
+      nextRewards.push({ t: metricTs, value: metricReward });
+    }
+    if (Number.isFinite(metricTs) && Number.isFinite(metricScore)) {
+      nextScores.push({ t: metricTs, value: metricScore });
+    }
+    for (const key of Object.keys(nextSignals)) {
+      const vv = Number(lm[key] ?? NaN);
+      if (Number.isFinite(metricTs) && Number.isFinite(vv)) {
+        nextSignals[key].push({ t: metricTs, value: vv });
+      }
+    }
+    const dedupe = (rows) => {
+      const map = new Map();
+      for (const row of rows) {
+        if (!Number.isFinite(row.t) || !Number.isFinite(row.value)) continue;
+        map.set(row.t, row);
+      }
+      return Array.from(map.values()).sort((a, b) => a.t - b.t);
+    };
+    const dedupedRewards = dedupe(nextRewards);
+    const dedupedScores = dedupe(nextScores);
+    const dedupedSignals = Object.fromEntries(
+      Object.entries(nextSignals).map(([key, rows]) => [key, dedupe(rows)])
+    );
+    let chosenScores = dedupedScores;
+    let chosenMeta = { key: "grader_score", label: "Grader Score", fallback: false };
+    if (dedupedScores.length < 2 || seriesSpread(dedupedScores) < 1e-6) {
+      const fallbackCandidates = [
+        { key: "explained_variance", label: "Explained Variance" },
+        { key: "ep_len_mean", label: "Episode Length Mean" },
+        { key: "approx_kl", label: "Approx KL" },
+      ];
+      for (const candidate of fallbackCandidates) {
+        const rows = dedupedSignals[candidate.key] || [];
+        if (rows.length >= 2 && seriesSpread(rows) >= 1e-6) {
+          chosenScores = rows;
+          chosenMeta = { key: candidate.key, label: candidate.label, fallback: true };
+          break;
+        }
+      }
+    }
+    setRewardPoints(dedupedRewards);
+    setScorePoints(chosenScores);
+    setScoreSignalMeta(chosenMeta);
+  };
+  const startTrainingJob = async () => {
+    setJobError("");
+    try {
+      const payload = {
+        phase: Number(jobForm.phase) || 1,
+        timesteps: Number(jobForm.timesteps) || 80000,
+        n_envs: Number(jobForm.n_envs) || 4,
+      };
+      const seedNum = Number(jobForm.seed);
+      if (jobForm.seed !== "" && Number.isFinite(seedNum)) payload.seed = seedNum;
+      const res = await api("/training_jobs", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      });
+      if (res?.job_id) {
+        setActiveJobId(res.job_id);
+        const norm = normalizeJob(res, 0);
+        setActiveJob(norm);
+        parseAndSetPoints(norm);
+      }
+      await refreshJobs();
+    } catch (err) {
+      setJobError(err?.message || "Failed to start training job.");
+    }
+  };
+  const stopTrainingJob = async () => {
+    if (!activeJobId) return;
+    setJobError("");
+    try {
+      await api(`/training_jobs/${activeJobId}/stop`, { method: "POST" });
+      await refreshJobs();
+      const stopped = await api(`/training_jobs/${activeJobId}`);
+      const norm = normalizeJob(stopped, 0);
+      setActiveJob(norm);
+      parseAndSetPoints(norm);
+    } catch (err) {
+      setJobError(err?.message || "Failed to stop training job.");
+    }
+  };
+  const clearTrainingHistory = async () => {
+    setJobError("");
+    try {
+      await api("/training_jobs?clear_artifacts=false", { method: "DELETE" });
+      setJobs([]);
+      setActiveJob(null);
+      setActiveJobId("");
+      setRewardPoints([]);
+      setScorePoints([]);
+      setScoreSignalMeta({ key: "grader_score", label: "Grader Score", fallback: false });
+      setLogLines([]);
+      setLogProgressRatio(null);
+      setLastLoggedTimesteps(null);
+    } catch (err) {
+      setJobError(err?.message || "Failed to clear training history.");
+    }
+  };
+  const deleteTrainingJob = async (jobId) => {
+    if (!jobId) return;
+    setJobError("");
+    setDeletingJobId(jobId);
+    try {
+      await api(`/training_jobs/${jobId}?clear_artifacts=false`, { method: "DELETE" });
+      if (activeJobId === jobId) {
+        setActiveJobId("");
+        setActiveJob(null);
+        setRewardPoints([]);
+        setScorePoints([]);
+        setScoreSignalMeta({ key: "grader_score", label: "Grader Score", fallback: false });
+        setLogLines([]);
+      }
+      await refreshJobs();
+    } catch (err) {
+      setJobError(err?.message || "Failed to delete training job.");
+    } finally {
+      setDeletingJobId("");
+    }
+  };
+  const pushEnvEvent = (stage, payload, tone = "indigo") => {
+    const seq = envEventSeqRef.current + 1;
+    envEventSeqRef.current = seq;
+    setEnvFlowEvents((prev) => [
+      ...prev,
+      { id: `${Date.now()}-${Math.random()}`, seq, ts: Date.now(), stage, payload, tone },
+    ].slice(-400));
+  };
+  const runAutomatedOpenEnvFlow = async () => {
+    setEnvBusy(true);
+    setEnvError("");
+    setEnvFlowSummary(null);
+    setEnvFlowEvents([]);
+    setEnvFlowRuns([]);
+    envEventSeqRef.current = 0;
+    try {
+      const seedNum = Number(envSeed);
+      const taskScope = Array.isArray(tasks) && tasks.length > 0 ? tasks : [envTaskId];
+      const runTaskIds = Array.from(new Set(taskScope.filter(Boolean)));
+      const maxSteps = Math.max(1, Number(envMaxSteps) || 6);
+      const taskResults = [];
+      for (const taskId of runTaskIds) {
+        let sessionId = "";
+        let stepsExecuted = 0;
+        let finalState = null;
+        try {
+          const resetPayload = { task_id: taskId };
+          if (envSeed !== "" && Number.isFinite(seedNum)) {
+            resetPayload.seed = seedNum;
+          }
+          const resetRes = await api("/reset", {
+            method: "POST",
+            body: JSON.stringify(resetPayload),
+          });
+          sessionId = String(resetRes?.session_id || "");
+          if (!sessionId) throw new Error(`reset() did not return session_id for task ${taskId}`);
+          pushEnvEvent(
+            "reset",
+            {
+              task_id: taskId,
+              day: resetRes?.observation?.day,
+              backlog: resetRes?.observation?.total_backlog,
+              completed: resetRes?.observation?.total_completed,
+            },
+            "emerald"
+          );
+          const initialState = await api("/state", {
+            method: "POST",
+            body: JSON.stringify({ session_id: sessionId, include_action_history: false }),
+          });
+          pushEnvEvent(
+            "state:initial",
+            {
+              task_id: taskId,
+              total_completed: initialState?.state?.total_completed,
+              total_backlog: initialState?.state?.total_backlog,
+              fairness_gap: initialState?.state?.fairness_gap,
+            },
+            "cyan"
+          );
+          let done = false;
+          for (let idx = 0; idx < maxSteps; idx += 1) {
+            if (done) break;
+            const masks = await api("/action-masks", {
+              method: "POST",
+              body: JSON.stringify({ session_id: sessionId }),
+            });
+            pushEnvEvent(
+              "action-masks",
+              {
+                task_id: taskId,
+                step: idx + 1,
+                total_valid: masks?.total_valid,
+                total_actions: masks?.total_actions,
+              },
+              "amber"
+            );
+            const stepRes = await api("/auto_step", {
+              method: "POST",
+              body: JSON.stringify({
+                session_id: sessionId,
+                agent_policy: envPolicyName || "backlog_clearance",
+              }),
+            });
+            done = Boolean(stepRes?.done);
+            stepsExecuted += 1;
+            pushEnvEvent(
+              "auto_step",
+              {
+                task_id: taskId,
+                step: idx + 1,
+                reward: stepRes?.reward,
+                done: stepRes?.done,
+                day: stepRes?.observation?.day,
+                backlog: stepRes?.observation?.total_backlog,
+                completed: stepRes?.observation?.total_completed,
+              },
+              "indigo"
+            );
+            const stateRes = await api("/state", {
+              method: "POST",
+              body: JSON.stringify({ session_id: sessionId, include_action_history: true }),
+            });
+            finalState = stateRes;
+            pushEnvEvent(
+              "state:post_step",
+              {
+                task_id: taskId,
+                step: idx + 1,
+                total_completed: stateRes?.state?.total_completed,
+                total_backlog: stateRes?.state?.total_backlog,
+                total_sla_breaches: stateRes?.state?.total_sla_breaches,
+                action_history_len: Array.isArray(stateRes?.state?.action_history) ? stateRes.state.action_history.length : 0,
+              },
+              "cyan"
+            );
+          }
+          const gradeRes = await api("/grade", {
+            method: "POST",
+            body: JSON.stringify({ session_id: sessionId }),
+          });
+          const scoreValue = Number(gradeRes?.score);
+          const dynamicPassed =
+            typeof gradeRes?.passed === "boolean"
+              ? gradeRes.passed
+              : (Number.isFinite(scoreValue) ? scoreValue >= 0.5 : null);
+          pushEnvEvent(
+            "grade",
+            {
+              task_id: taskId,
+              score: gradeRes?.score,
+              passed: dynamicPassed,
+            },
+            "emerald"
+          );
+          taskResults.push({
+            task_id: taskId,
+            steps_executed: stepsExecuted,
+            score: gradeRes?.score ?? null,
+            passed: dynamicPassed,
+            final_completed: finalState?.state?.total_completed ?? null,
+            final_backlog: finalState?.state?.total_backlog ?? null,
+            final_sla_breaches: finalState?.state?.total_sla_breaches ?? null,
+          });
+        } catch (taskErr) {
+          const msg = taskErr?.message || String(taskErr);
+          pushEnvEvent("task:error", { task_id: taskId, error: msg }, "rose");
+          taskResults.push({
+            task_id: taskId,
+            steps_executed: stepsExecuted,
+            score: null,
+            passed: null,
+            error: msg,
+          });
+        } finally {
+          if (sessionId) {
+            try {
+              await api(`/sessions/${sessionId}`, { method: "DELETE" });
+              pushEnvEvent("session:closed", { task_id: taskId }, "slate");
+            } catch (_err) {
+              // no-op
+            }
+          }
+        }
+      }
+      setEnvFlowRuns(taskResults);
+      const validScores = taskResults
+        .map((row) => Number(row.score))
+        .filter((v) => Number.isFinite(v));
+      const passedCount = taskResults.filter((row) => row.passed === true).length;
+      setEnvFlowSummary({
+        tasks_executed: taskResults.length,
+        total_steps_executed: taskResults.reduce((acc, row) => acc + Number(row.steps_executed || 0), 0),
+        avg_score:
+          validScores.length > 0
+            ? validScores.reduce((acc, score) => acc + Number(score), 0) / validScores.length
+            : null,
+        passed_tasks: passedCount,
+      });
+    } catch (err) {
+      setEnvError(err?.message || "Automated OpenEnv workflow failed.");
+    } finally {
+      setEnvBusy(false);
+    }
+  };
+  useEffect(() => {
+    refreshEndpointHealth();
+    refreshCatalog();
+    refreshJobs();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+  useEffect(() => {
+    const t = setInterval(() => {
+      refreshJobs();
+    }, 5000);
+    return () => clearInterval(t);
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+  useEffect(() => {
+    const t = setInterval(() => {
+      refreshEndpointHealth();
+    }, 15000);
+    return () => clearInterval(t);
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+  useEffect(() => {
+    if (!activeJobId) return undefined;
+    let cancelled = false;
+    const t = setInterval(async () => {
+      if (cancelled) return;
+      try {
+        const snapshotRaw = await api(`/training_jobs/${activeJobId}`);
+        if (cancelled) return;
+        const snapshot = normalizeJob(snapshotRaw, 0);
+        setActiveJob(snapshot);
+        parseAndSetPoints(snapshot);
+        setJobError("");
+        pollFailuresRef.current = 0;
+        if (pollIntervalMs !== 1500) setPollIntervalMs(1500);
+      } catch (err) {
+        pollFailuresRef.current += 1;
+        if (pollFailuresRef.current >= 3) {
+          setPollIntervalMs(4000);
+          setJobError(err?.message || "Polling failed repeatedly, switched to fallback polling.");
+        }
+      }
+    }, pollIntervalMs);
+    return () => {
+      cancelled = true;
+      clearInterval(t);
+    };
+  }, [activeJobId, pollIntervalMs]);
+  useEffect(() => {
+    if (!activeJobId) return;
+    const row = jobs.find((j) => j.job_id === activeJobId);
+    if (!row) return;
+    setActiveJob(row);
+    parseAndSetPoints(row);
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [activeJobId, jobs]);
+  const progressA = useMemo(() => {
+    if (!activeJob) return null;
+    const p = toNumberOrNull(activeJob.progress);
+    return Number.isFinite(p) ? Math.max(0, Math.min(1, Number(p))) : null;
+  }, [activeJob]);
+  const progressB = useMemo(() => {
+    if (!activeJob) return null;
+    const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : [];
+    const historyTs = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.t ?? history[history.length - 1]?.total_timesteps) : null;
+    const ts = toNumberOrNull(activeJob?.latest_metrics?.total_timesteps) ?? historyTs;
+    const total = toNumberOrNull(activeJob?.timesteps);
+    if (!Number.isFinite(ts) || !Number.isFinite(total) || total <= 0) return null;
+    return Math.max(0, Math.min(1, Number(ts) / Number(total)));
+  }, [activeJob]);
+  const progressC = useMemo(() => {
+    if (!activeJob) return null;
+    const total = toNumberOrNull(activeJob?.timesteps);
+    if (!Number.isFinite(total) || total <= 0) {
+      return Number.isFinite(logProgressRatio) ? Number(logProgressRatio) : null;
+    }
+    const fromLogTs =
+      Number.isFinite(lastLoggedTimesteps) && Number(lastLoggedTimesteps) > 0
+        ? Math.max(0, Math.min(1, Number(lastLoggedTimesteps) / Number(total)))
+        : null;
+    if (Number.isFinite(fromLogTs) && Number.isFinite(logProgressRatio)) {
+      return Math.max(Number(fromLogTs), Number(logProgressRatio));
+    }
+    if (Number.isFinite(fromLogTs)) return Number(fromLogTs);
+    if (Number.isFinite(logProgressRatio)) return Number(logProgressRatio);
+    return null;
+  }, [activeJob, lastLoggedTimesteps, logProgressRatio]);
+  const effectiveProgress = useMemo(() => {
+    const values = [progressA, progressB, progressC].filter((v) => Number.isFinite(v));
+    return values.length > 0 ? Math.max(...values) : null;
+  }, [progressA, progressB, progressC]);
+  const rewardLatest = rewardPoints.length ? rewardPoints[rewardPoints.length - 1].value : null;
+  const rewardBest = rewardPoints.length ? Math.max(...rewardPoints.map((p) => p.value)) : null;
+  const scoreLatest = scorePoints.length ? scorePoints[scorePoints.length - 1].value : null;
+  const scoreBest = scorePoints.length ? Math.max(...scorePoints.map((p) => p.value)) : null;
+  const rewardSeries = useMemo(() => normalizeSeries(rewardPoints), [rewardPoints]);
+  const scoreSeries = useMemo(() => normalizeSeries(scorePoints), [scorePoints]);
+  const graphXMin = useMemo(() => {
+    const allTs = [...rewardSeries, ...scoreSeries].map((p) => Number(p.t)).filter(Number.isFinite);
+    if (allTs.length === 0) return 0;
+    return Math.min(...allTs);
+  }, [rewardSeries, scoreSeries]);
+  const graphXMax = useMemo(() => {
+    const allTs = [...rewardSeries, ...scoreSeries].map((p) => Number(p.t)).filter(Number.isFinite);
+    if (allTs.length === 0) return 1;
+    const mx = Math.max(...allTs);
+    return mx > graphXMin ? mx : graphXMin + 1;
+  }, [rewardSeries, scoreSeries, graphXMin]);
+  const rewardMin = rewardPoints.length ? Math.min(...rewardPoints.map((p) => p.value), -10) : -10;
+  const rewardMax = rewardPoints.length ? Math.max(...rewardPoints.map((p) => p.value), 10) : 10;
+  const scoreMin = scorePoints.length ? Math.min(...scorePoints.map((p) => p.value), 0) : 0;
+  const scoreMax = scorePoints.length ? Math.max(...scorePoints.map((p) => p.value), 1) : 1;
+  const rewardPolyline = useMemo(
+    () =>
+      toPolylineByT(rewardSeries, {
+        minX: graphXMin,
+        maxX: graphXMax,
+        minY: rewardMin,
+        maxY: rewardMax,
+        width: 700,
+        height: 260,
+      }),
+    [rewardSeries, graphXMin, graphXMax, rewardMin, rewardMax]
+  );
+  const scoreStairPolyline = useMemo(
+    () =>
+      toStairPolylineByT(scoreSeries, {
+        minX: graphXMin,
+        maxX: graphXMax,
+        minY: scoreMin,
+        maxY: scoreMax,
+        width: 700,
+        height: 260,
+      }),
+    [scoreSeries, graphXMin, graphXMax, scoreMin, scoreMax]
+  );
+  const llmStoryCards = useMemo(() => {
+    const cards = [];
+    let seq = 1;
+    if (activeJob) {
+      cards.push({
+        id: `story-${seq}`,
+        seq: seq++,
+        title: "Training Context",
+        text: `Phase ${activeJob?.phase || "-"} job ${String(activeJob?.job_id || "").slice(0, 8)} is ${activeJob?.status || "unknown"} at ${fmt((Number(activeJob?.progress || 0) * 100), 1)}%.`,
+        tone: "cyan",
+      });
+      if (rewardSeries.length >= 2 || scoreSeries.length >= 2) {
+        const rewardStart = rewardSeries.length > 0 ? rewardSeries[0].value : null;
+        const rewardEnd = rewardSeries.length > 0 ? rewardSeries[rewardSeries.length - 1].value : null;
+        const scoreStart = scoreSeries.length > 0 ? scoreSeries[0].value : null;
+        const scoreEnd = scoreSeries.length > 0 ? scoreSeries[scoreSeries.length - 1].value : null;
+        cards.push({
+          id: `story-${seq}`,
+          seq: seq++,
+          title: "Learning Trend",
+          text: `Reward ${rewardStart != null ? fmt(rewardStart, 2) : "-"} -> ${rewardEnd != null ? fmt(rewardEnd, 2) : "-"}; ${scoreSignalMeta.label.toLowerCase()} ${scoreStart != null ? fmt(scoreStart, 3) : "-"} -> ${scoreEnd != null ? fmt(scoreEnd, 3) : "-"}.`,
+          tone: "indigo",
+        });
+      }
+    }
+    for (const line of (logLines || []).slice(-14)) {
+      const row = summarizeLogLine(line);
+      cards.push({
+        id: `log-${seq}-${line.slice(0, 8)}`,
+        seq: seq++,
+        title: row.title,
+        text: row.text,
+        tone: row.tone,
+      });
+    }
+    const evalRows = Array.isArray(activeJob?.evaluation_rows) ? activeJob.evaluation_rows : [];
+    for (const row of evalRows) {
+      cards.push({
+        id: `eval-${seq}-${row.task_id}`,
+        seq: seq++,
+        title: "Evaluation Replay",
+        text: `${row.task_id}: score ${fmt(row.grader_score, 3)}, reward ${fmt(row.total_reward, 2)}, completed ${row.total_completed}, breaches ${row.total_sla_breaches}.`,
+        tone: "emerald",
+      });
+    }
+    if (toNumberOrNull(activeJob?.evaluation_avg_score) != null) {
+      cards.push({
+        id: `eval-avg-${seq}`,
+        seq: seq++,
+        title: "Evaluation Summary",
+        text: `Average grader score ${fmt(activeJob.evaluation_avg_score, 3)} across evaluated tasks.`,
+        tone: "emerald",
+      });
+    }
+    for (const event of (envFlowEvents || []).slice(-10)) {
+      cards.push({
+        id: `replay-${seq}-${event.id}`,
+        seq: seq++,
+        title: "OpenEnv Replay",
+        text: summarizeEnvEvent(event),
+        tone: event?.tone || "cyan",
+      });
+    }
+    return cards.slice(-32);
+  }, [activeJob, rewardSeries, scoreSeries, logLines, envFlowEvents, scoreSignalMeta.label]);
+  const progressText = (v) => (Number.isFinite(v) ? `${fmt(Number(v) * 100, 1)}%` : "-");
+  const currentTs = useMemo(() => {
+    const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : [];
+    const histTs = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.t ?? history[history.length - 1]?.total_timesteps) : null;
+    return toNumberOrNull(activeJob?.latest_metrics?.total_timesteps) ?? histTs ?? lastLoggedTimesteps;
+  }, [activeJob, lastLoggedTimesteps]);
+  const currentReward = useMemo(() => {
+    const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : [];
+    const histReward = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.ep_rew_mean ?? history[history.length - 1]?.mean_reward) : null;
+    return toNumberOrNull(activeJob?.latest_metrics?.ep_rew_mean)
+      ?? toNumberOrNull(activeJob?.latest_metrics?.mean_reward)
+      ?? histReward;
+  }, [activeJob]);
+  const currentScore = scoreLatest;
+  return (
+    <div className="space-y-6">
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+        <div className="flex items-center justify-between gap-3 mb-3">
+          <h2 className="text-lg font-black text-white flex items-center gap-2">
+            <span className="material-symbols-outlined text-indigo-400">hub</span>
+            Endpoint Connectivity Matrix
+          </h2>
+          <button
+            onClick={refreshEndpointHealth}
+            className="text-xs font-bold px-3 py-1.5 rounded-lg bg-indigo-600/70 hover:bg-indigo-500 text-white"
+          >
+            Refresh Endpoints
+          </button>
+        </div>
+        {endpointError && (
+          <div className="mb-3 text-xs font-semibold text-amber-300 bg-amber-500/10 border border-amber-500/20 rounded p-2">
+            {endpointError}
+          </div>
+        )}
+        <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
+          {endpointRows.map((row) => (
+            <div
+              key={row.key}
+              className={`border rounded-lg p-3 ${
+                row.ok ? "border-emerald-500/25 bg-emerald-500/5" : "border-rose-500/25 bg-rose-500/5"
+              }`}
+            >
+              <div className="flex items-center justify-between">
+                <div className="text-sm font-bold text-white">{row.label}</div>
+                <span className={`text-[10px] font-black ${row.ok ? "text-emerald-400" : "text-rose-400"}`}>
+                  {row.ok ? "UP" : "DOWN"}
+                </span>
+              </div>
+              <div className="text-xs text-slate-400 mt-1">
+                {row.ok ? `${row.ms} ms` : row.error || "unreachable"}
+              </div>
+            </div>
+          ))}
+        </div>
+      </div>
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+        <div className="flex flex-wrap items-center justify-between gap-3 mb-4">
+          <h2 className="text-lg font-black text-white flex items-center gap-2">
+            <span className="material-symbols-outlined text-violet-400">tune</span>
+            Live Training Control
+          </h2>
+          <div className="flex items-center gap-2">
+            <button
+              onClick={startTrainingJob}
+              className="text-sm font-bold px-4 py-2 rounded-lg bg-violet-600 hover:bg-violet-500 text-white"
+            >
+              Start Training Job
+            </button>
+            <button
+              onClick={stopTrainingJob}
+              disabled={!activeJobId}
+              className="text-sm font-bold px-4 py-2 rounded-lg bg-rose-600 hover:bg-rose-500 text-white disabled:opacity-50"
+            >
+              Stop Active Job
+            </button>
+            <button
+              onClick={clearTrainingHistory}
+              className="text-sm font-bold px-4 py-2 rounded-lg bg-slate-700 hover:bg-slate-600 text-white"
+            >
+              Clear Job History
+            </button>
+          </div>
+        </div>
+        {jobError && (
+          <div className="mb-3 text-xs font-semibold text-rose-300 bg-rose-500/10 border border-rose-500/20 rounded p-2">
+            {jobError}
+          </div>
+        )}
+        <div className="grid grid-cols-1 md:grid-cols-4 gap-3 mb-3">
+          <label className="text-xs text-slate-300">
+            Phase
+            <select
+              value={jobForm.phase}
+              onChange={(e) => setJobForm((prev) => ({ ...prev, phase: Number(e.target.value) }))}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            >
+              <option value={1}>Phase 1</option>
+              <option value={2}>Phase 2</option>
+            </select>
+          </label>
+          <label className="text-xs text-slate-300">
+            Timesteps
+            <input
+              value={jobForm.timesteps}
+              onChange={(e) => setJobForm((prev) => ({ ...prev, timesteps: e.target.value }))}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+          <label className="text-xs text-slate-300">
+            N Envs
+            <input
+              value={jobForm.n_envs}
+              onChange={(e) => setJobForm((prev) => ({ ...prev, n_envs: e.target.value }))}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+          <label className="text-xs text-slate-300">
+            Seed (optional)
+            <input
+              value={jobForm.seed}
+              onChange={(e) => setJobForm((prev) => ({ ...prev, seed: e.target.value }))}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+        </div>
+        <div className="flex flex-wrap gap-2">
+          <button
+            onClick={() => setJobForm((prev) => ({ ...prev, timesteps: 30000, n_envs: Math.max(4, Number(prev.n_envs || 4)) }))}
+            className="text-xs font-bold px-3 py-1.5 rounded bg-indigo-600/70 hover:bg-indigo-500 text-white"
+          >
+            Quick Demo Preset
+          </button>
+          <button
+            onClick={() => setJobForm((prev) => ({ ...prev, timesteps: 120000, n_envs: 4 }))}
+            className="text-xs font-bold px-3 py-1.5 rounded bg-slate-700 hover:bg-slate-600 text-white"
+          >
+            Default Preset
+          </button>
+        </div>
+      </div>
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+        <h2 className="text-lg font-black text-white flex items-center gap-2 mb-4">
+          <span className="material-symbols-outlined text-indigo-400">monitoring</span>
+          Live Metrics and Storytelling Timeline
+        </h2>
+        <div className="grid grid-cols-1 lg:grid-cols-4 gap-3 mb-4">
+          <div className="bg-slate-950/50 border border-white/5 rounded p-3">
+            <div className="text-[11px] uppercase text-slate-400">Active Job Status</div>
+            <div className={`mt-2 inline-flex px-2 py-1 rounded border text-xs font-bold ${statusClasses(activeJob?.status)}`}>
+              {activeJob?.status || "idle"}
+            </div>
+          </div>
+          <div className="bg-slate-950/50 border border-white/5 rounded p-3">
+            <div className="text-[11px] uppercase text-slate-400">Current Timesteps</div>
+            <div className="mt-2 text-xl font-black text-indigo-300">{currentTs != null ? Number(currentTs).toLocaleString() : "-"}</div>
+          </div>
+          <div className="bg-slate-950/50 border border-white/5 rounded p-3">
+            <div className="text-[11px] uppercase text-slate-400">Current Reward</div>
+            <div className="mt-2 text-xl font-black text-amber-300">{currentReward != null ? fmt(currentReward, 3) : "-"}</div>
+          </div>
+          <div className="bg-slate-950/50 border border-white/5 rounded p-3">
+            <div className="text-[11px] uppercase text-slate-400">Current {scoreSignalMeta.label}</div>
+            <div className="mt-2 text-xl font-black text-emerald-300">{currentScore != null ? fmt(currentScore, 3) : "-"}</div>
+          </div>
+        </div>
+        <div className="mb-4 flex flex-wrap items-center gap-3">
+          <label className="text-xs text-slate-300">
+            Story Job (active + history)
+            <select
+              value={activeJobId}
+              onChange={(e) => setActiveJobId(e.target.value)}
+              className="mt-1 min-w-[260px] bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            >
+              {jobs.map((job) => (
+                <option key={job.job_id} value={job.job_id}>
+                  {String(job.job_id).slice(0, 8)} | phase {job.phase || "-"} | {job.status}
+                </option>
+              ))}
+            </select>
+          </label>
+          <div className="text-[11px] text-slate-400">
+            Reward line (left axis) + {scoreSignalMeta.label} stair-step line (right axis), updated from live backend metrics.
+          </div>
+        </div>
+        <div className="bg-slate-950/50 border border-white/5 rounded p-3 mb-4">
+          <div className="flex items-center justify-between mb-2">
+            <div className="text-xs uppercase tracking-widest text-slate-400">Combined Reward and Score (Dual Axis)</div>
+            <div className="text-[11px] text-slate-500">
+              timesteps {Number.isFinite(graphXMin) ? Number(graphXMin).toLocaleString() : "-"} - {Number.isFinite(graphXMax) ? Number(graphXMax).toLocaleString() : "-"}
+            </div>
+          </div>
+          {rewardSeries.length === 0 && scoreSeries.length === 0 ? (
+            <div className="h-[260px] flex items-center justify-center text-slate-500 text-sm">
+              Waiting for live metric history from training logs...
+            </div>
+          ) : (
+            <div className="relative">
+              <svg viewBox="0 0 700 260" className="w-full h-[260px]">
+                {[0, 1, 2, 3, 4].map((i) => (
+                  <line
+                    key={`grid-${i}`}
+                    x1="0"
+                    x2="700"
+                    y1={String((260 / 4) * i)}
+                    y2={String((260 / 4) * i)}
+                    stroke="#334155"
+                    strokeOpacity="0.35"
+                    strokeWidth="1"
+                  />
+                ))}
+                {rewardPolyline ? (
+                  <polyline
+                    points={rewardPolyline}
+                    fill="none"
+                    stroke="#818cf8"
+                    strokeWidth="2.2"
+                    strokeLinejoin="round"
+                    strokeLinecap="round"
+                  />
+                ) : null}
+                {scoreStairPolyline ? (
+                  <polyline
+                    points={scoreStairPolyline}
+                    fill="none"
+                    stroke="#34d399"
+                    strokeWidth="2.2"
+                    strokeLinejoin="round"
+                    strokeLinecap="round"
+                  />
+                ) : null}
+              </svg>
+              <div className="absolute top-1 left-2 text-[10px] text-indigo-300">
+                Reward min {rewardMin.toFixed(2)} | max {rewardMax.toFixed(2)}
+              </div>
+              <div className="absolute top-1 right-2 text-[10px] text-emerald-300">
+                {scoreSignalMeta.label} min {scoreMin.toFixed(3)} | max {scoreMax.toFixed(3)}
+              </div>
+            </div>
+          )}
+          <div className="mt-2 text-xs text-slate-300">
+            reward current: {rewardLatest != null ? rewardLatest.toFixed(3) : "-"} | reward best: {rewardBest != null ? rewardBest.toFixed(3) : "-"} | {scoreSignalMeta.label.toLowerCase()} current: {scoreLatest != null ? scoreLatest.toFixed(3) : "-"} | {scoreSignalMeta.label.toLowerCase()} best: {scoreBest != null ? scoreBest.toFixed(3) : "-"}
+          </div>
+          <div className="mt-1 text-[11px] text-slate-500">
+            Legend: <span className="text-indigo-300">Reward (line)</span> - <span className="text-emerald-300">{scoreSignalMeta.label} (stair-step hold-last-value)</span>{scoreSignalMeta.fallback ? " - fallback metric used because grader score has no live movement yet." : ""}
+          </div>
+        </div>
+        <div className="bg-slate-950/50 border border-white/5 rounded p-3">
+          <div className="flex items-center justify-between mb-3">
+            <div className="text-xs uppercase tracking-widest text-slate-400">LLM Story Feed (logs + replay + evaluation)</div>
+            <div className="text-[11px] text-slate-500">Sequential order - {llmStoryCards.length} cards</div>
+          </div>
+          {llmStoryCards.length === 0 ? (
+            <div className="text-slate-500 text-sm">No storyline events yet.</div>
+          ) : (
+            <div className="space-y-2 max-h-[340px] overflow-auto pr-1">
+              {llmStoryCards.map((card) => (
+                <div key={card.id} className={`border rounded p-2.5 ${toneClasses(card.tone)}`}>
+                  <div className="flex items-center justify-between mb-1">
+                    <div className="text-[11px] font-bold text-white">{card.title}</div>
+                    <div className="text-[10px] text-slate-400">#{card.seq}</div>
+                  </div>
+                  <div className="text-[11px] text-slate-300 font-mono leading-relaxed break-words">{card.text}</div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      </div>
+      <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+          <div className="flex items-center justify-between mb-3">
+            <h2 className="text-lg font-black text-white flex items-center gap-2">
+              <span className="material-symbols-outlined text-amber-400">history</span>
+              Training Job History
+            </h2>
+            <div className="flex items-center gap-2">
+              <button
+                onClick={() => deleteTrainingJob(activeJobId)}
+                disabled={!activeJobId || !!deletingJobId}
+                className="text-xs font-bold px-3 py-1.5 rounded bg-rose-600/70 hover:bg-rose-500 text-white disabled:opacity-50"
+              >
+                {deletingJobId && deletingJobId === activeJobId ? "Deleting..." : "Delete Selected"}
+              </button>
+              <button
+                onClick={refreshJobs}
+                className="text-xs font-bold px-3 py-1.5 rounded bg-amber-600/70 hover:bg-amber-500 text-white"
+              >
+                Refresh Jobs
+              </button>
+            </div>
+          </div>
+          {jobsError && <div className="text-xs text-rose-300 mb-2">{jobsError}</div>}
+          {jobsLoading ? (
+            <div className="text-sm text-slate-400">Loading jobs...</div>
+          ) : (
+            <div className="max-h-80 overflow-auto border border-white/5 rounded">
+              <table className="w-full text-xs">
+                <thead className="bg-slate-800/70 text-slate-300 sticky top-0">
+                  <tr>
+                    <th className="px-2 py-2 text-left">Job</th>
+                    <th className="px-2 py-2 text-left">Status</th>
+                    <th className="px-2 py-2 text-left">Phase</th>
+                    <th className="px-2 py-2 text-left">Progress</th>
+                    <th className="px-2 py-2 text-left">Updated</th>
+                    <th className="px-2 py-2 text-left">Action</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {jobs.map((job) => {
+                    const updated = timestampToDate(job.updated_at);
+                    return (
+                      <tr
+                        key={job.job_id}
+                        className={`border-t border-white/5 cursor-pointer hover:bg-white/5 ${
+                          activeJobId === job.job_id ? "bg-indigo-500/10" : ""
+                        }`}
+                        onClick={() => setActiveJobId(job.job_id)}
+                      >
+                        <td className="px-2 py-2 text-indigo-300 font-mono">{String(job.job_id || "").slice(0, 8)}</td>
+                        <td className="px-2 py-2">
+                          <span className={`px-2 py-0.5 rounded border text-[11px] font-bold ${statusClasses(job.status)}`}>
+                            {job.status}
+                          </span>
+                        </td>
+                        <td className="px-2 py-2 text-slate-300">{job.phase || "-"}</td>
+                        <td className="px-2 py-2 text-slate-300">{fmt((Number(job.progress || 0) * 100), 1)}%</td>
+                        <td className="px-2 py-2 text-slate-400">{updated ? updated.toLocaleTimeString() : "-"}</td>
+                        <td className="px-2 py-2">
+                          <button
+                            onClick={(e) => {
+                              e.stopPropagation();
+                              deleteTrainingJob(job.job_id);
+                            }}
+                            disabled={!!deletingJobId}
+                            className="text-[11px] font-bold px-2 py-1 rounded bg-rose-600/70 hover:bg-rose-500 text-white disabled:opacity-50"
+                          >
+                            {deletingJobId === job.job_id ? "Deleting..." : "Delete"}
+                          </button>
+                        </td>
+                      </tr>
+                    );
+                  })}
+                  {jobs.length === 0 && (
+                    <tr>
+                      <td className="px-2 py-3 text-slate-500" colSpan={6}>
+                        No training jobs found.
+                      </td>
+                    </tr>
+                  )}
+                </tbody>
+              </table>
+            </div>
+          )}
+        </div>
+        <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+          <div className="flex items-center justify-between mb-3">
+            <h2 className="text-lg font-black text-white flex items-center gap-2">
+              <span className="material-symbols-outlined text-emerald-400">database</span>
+              Model Registry (Dynamic)
+            </h2>
+            <button
+              onClick={refreshCatalog}
+              className="text-xs font-bold px-3 py-1.5 rounded bg-emerald-600/70 hover:bg-emerald-500 text-white"
+            >
+              Refresh Models
+            </button>
+          </div>
+          {modelError && <div className="text-xs text-amber-300 mb-2">{modelError}</div>}
+          <div className="max-h-80 overflow-auto border border-white/5 rounded">
+            <table className="w-full text-xs">
+              <thead className="bg-slate-800/70 text-slate-300 sticky top-0">
+                <tr>
+                  <th className="px-2 py-2 text-left">Label</th>
+                  <th className="px-2 py-2 text-left">Phase</th>
+                  <th className="px-2 py-2 text-left">Source</th>
+                  <th className="px-2 py-2 text-left">Exists</th>
+                </tr>
+              </thead>
+              <tbody>
+                {modelRows.map((m) => (
+                  <tr key={`${m.path}-${m.source}`} className="border-t border-white/5">
+                    <td className="px-2 py-2 text-slate-200">
+                      <div>{m.label}</div>
+                      <div className="text-[11px] text-slate-500 truncate max-w-[280px]">{m.path || "-"}</div>
+                    </td>
+                    <td className="px-2 py-2 text-slate-300">{m.phase || "-"}</td>
+                    <td className="px-2 py-2 text-slate-300">{m.source || "-"}</td>
+                    <td className={`px-2 py-2 ${m.exists ? "text-emerald-300" : "text-rose-300"}`}>
+                      {m.exists ? "yes" : "no"}
+                    </td>
+                  </tr>
+                ))}
+                {modelRows.length === 0 && (
+                  <tr>
+                    <td className="px-2 py-3 text-slate-500" colSpan={4}>
+                      No models discovered.
+                    </td>
+                  </tr>
+                )}
+              </tbody>
+            </table>
+          </div>
+        </div>
+      </div>
+      <div className="bg-slate-900/70 border border-white/5 rounded-xl p-5">
+        <h2 className="text-lg font-black text-white flex items-center gap-2 mb-4">
+          <span className="material-symbols-outlined text-fuchsia-400">api</span>
+          Automated OpenEnv Workflow (`reset`, `step`, `state`, `grade`)
+        </h2>
+        <div className="text-xs text-slate-400 mb-3">
+          Runs sequentially across all available tasks and records each stage in chronological order.
+        </div>
+        {envError && (
+          <div className="mb-3 text-xs font-semibold text-rose-300 bg-rose-500/10 border border-rose-500/20 rounded p-2">
+            {envError}
+          </div>
+        )}
+        <div className="grid grid-cols-1 md:grid-cols-4 gap-3 mb-3">
+          <label className="text-xs text-slate-300">
+            Task Scope
+            <input
+              value={`${(Array.isArray(tasks) && tasks.length > 0 ? tasks.length : 1)} task(s) automatic`}
+              readOnly
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+          <label className="text-xs text-slate-300">
+            Seed (optional)
+            <input
+              value={envSeed}
+              onChange={(e) => setEnvSeed(e.target.value)}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+          <label className="text-xs text-slate-300">
+            Auto-Step Policy
+            <select
+              value={envPolicyName}
+              onChange={(e) => setEnvPolicyName(e.target.value)}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            >
+              {(agents.length > 0 ? agents : ["backlog_clearance"]).map((p) => (
+                <option key={p} value={p}>
+                  {p}
+                </option>
+              ))}
+            </select>
+          </label>
+          <label className="text-xs text-slate-300">
+            Max Automated Steps
+            <input
+              value={envMaxSteps}
+              onChange={(e) => setEnvMaxSteps(e.target.value)}
+              className="mt-1 w-full bg-slate-800 border border-white/10 rounded px-2 py-2 text-sm text-white"
+            />
+          </label>
+        </div>
+        <div className="flex gap-2 mb-4">
+          <button
+            onClick={runAutomatedOpenEnvFlow}
+            disabled={envBusy}
+            className="text-sm font-bold px-4 py-2 rounded-lg bg-fuchsia-600 hover:bg-fuchsia-500 text-white disabled:opacity-50"
+          >
+            {envBusy ? "Running Workflow..." : "Proceed"}
+          </button>
+        </div>
+        {envFlowSummary && (
+          <div className="mb-3 bg-slate-950/50 border border-white/5 rounded p-3 text-xs">
+            <div className="text-slate-300">Tasks Executed: <span className="font-bold text-white">{envFlowSummary.tasks_executed}</span></div>
+            <div className="text-slate-300">Total Steps Executed: <span className="font-bold text-white">{envFlowSummary.total_steps_executed}</span></div>
+            <div className="text-slate-300">Average Score: <span className="font-bold text-emerald-300">{envFlowSummary.avg_score != null ? fmt(envFlowSummary.avg_score, 3) : "-"}</span></div>
+            <div className="text-slate-300">Passed Tasks: <span className="font-bold text-cyan-300">{envFlowSummary.passed_tasks}</span></div>
+          </div>
+        )}
+        {envFlowRuns.length > 0 && (
+          <div className="mb-3 border border-white/5 rounded overflow-auto">
+            <table className="w-full text-xs">
+              <thead className="bg-slate-800/70 text-slate-300">
+                <tr>
+                  <th className="px-2 py-2 text-left">Task</th>
+                  <th className="px-2 py-2 text-left">Steps</th>
+                  <th className="px-2 py-2 text-left">Score</th>
+                  <th className="px-2 py-2 text-left">Completed</th>
+                  <th className="px-2 py-2 text-left">Backlog</th>
+                  <th className="px-2 py-2 text-left">SLA Breaches</th>
+                  <th className="px-2 py-2 text-left">Passed</th>
+                </tr>
+              </thead>
+              <tbody>
+                {envFlowRuns.map((row) => (
+                  <tr key={`run-${row.task_id}`} className="border-t border-white/5">
+                    <td className="px-2 py-2 text-slate-200">{row.task_id}</td>
+                    <td className="px-2 py-2 text-slate-300">{row.steps_executed}</td>
+                    <td className="px-2 py-2 text-emerald-300">{row.score != null ? fmt(row.score, 3) : "-"}</td>
+                    <td className="px-2 py-2 text-slate-300">{row.final_completed ?? "-"}</td>
+                    <td className="px-2 py-2 text-slate-300">{row.final_backlog ?? "-"}</td>
+                    <td className="px-2 py-2 text-slate-300">{row.final_sla_breaches ?? "-"}</td>
+                    <td className={`px-2 py-2 ${row.passed === true ? "text-emerald-300" : row.passed === false ? "text-rose-300" : "text-slate-400"}`}>
+                      {row.passed === true ? "true" : row.passed === false ? "false" : "-"}
+                    </td>
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+        )}
+        <div className="space-y-2 max-h-[380px] overflow-auto pr-1">
+          {envFlowEvents.length === 0 ? (
+            <div className="text-slate-500 text-sm">No automated workflow events yet.</div>
+          ) : (
+            envFlowEvents.map((event) => (
+              <div key={event.id} className={`border rounded p-3 ${toneClasses(event.tone)}`}>
+                <div className="flex items-center justify-between mb-1">
+                  <div className="text-xs uppercase tracking-widest text-slate-400">{workflowStageLabel(event.stage)}</div>
+                  <div className="text-[10px] text-slate-400">
+                    #{event.seq} | {new Date(event.ts).toLocaleTimeString()}
+                  </div>
+                </div>
+                <div className="text-xs text-slate-200 leading-relaxed">
+                  {summarizeEnvEvent(event)}
+                </div>
+                {payloadHighlights(event.payload).length > 0 && (
+                  <div className="mt-2 flex flex-wrap gap-1">
+                    {payloadHighlights(event.payload).map(([k, v]) => (
+                      <span
+                        key={`${event.id}-${k}`}
+                        className="text-[10px] bg-slate-800/70 border border-white/10 rounded px-1.5 py-0.5 text-slate-300"
+                      >
+                        {k}: {v}
+                      </span>
+                    ))}
+                  </div>
+                )}
+              </div>
+            ))
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/react/src/hooks/useStorySimulation.js ADDED Viewed

	@@ -0,0 +1,474 @@

+import { useState, useRef, useCallback, useEffect } from "react";
+import { api } from "../api/client";
+// ─────────────────────────────────────────────────────────────────────────────
+// Narrative translator: maps raw action → human-readable cause→effect story
+// ─────────────────────────────────────────────────────────────────────────────
+function mapActionToStory(actionType, payload, reward, backlogDelta, slaDelta, fairnessDelta) {
+  let title = "Standard Processing Cycle";
+  let desc = "The system advanced one cycle and continued normal queue processing.";
+  let reason = "No override was required, so routine processing continued.";
+  let icon = "schedule";
+  let type = reward > 0 ? "success" : "info";
+  const changes = [];
+  if (backlogDelta < 0) changes.push(`backlog improved by ${Math.abs(backlogDelta)} case(s)`);
+  else if (backlogDelta > 0) changes.push(`backlog increased by ${backlogDelta} case(s)`);
+  else changes.push("backlog stayed stable");
+  if (slaDelta > 0) changes.push(`${slaDelta} new SLA breach(es) occurred`);
+  else if (slaDelta < 0) changes.push(`${Math.abs(slaDelta)} SLA breach(es) recovered`);
+  if (Number.isFinite(Number(fairnessDelta)) && Number(fairnessDelta) !== 0) {
+    const v = Number(fairnessDelta);
+    changes.push(`fairness gap ${v > 0 ? "worsened" : "improved"} by ${Math.abs(v).toFixed(3)}`);
+  }
+  const effectClause = `${changes.join(", ")}.`;
+  if (slaDelta > 0) type = "error";
+  switch (actionType) {
+    case "assign_capacity":
+      title = "Capacity Assigned";
+      desc = `Officers were assigned to '${payload.service_target ?? payload.service ?? "target queue"}'; ${effectClause}`;
+      reason = "The agent detected staffing pressure and increased capacity where it could reduce delay.";
+      icon = "group_add";
+      break;
+    case "reallocate_officers":
+      title = "Staff Reallocated";
+      desc = `Officers were reallocated toward higher-pressure services; ${effectClause}`;
+      reason = `The agent shifted staffing to reduce bottlenecks in '${payload.service_target ?? "priority"}' services.`;
+      icon = "compare_arrows";
+      break;
+    case "request_missing_documents":
+      title = "Documents Requested";
+      desc = `Missing documents were requested to unblock pending files; ${effectClause}`;
+      reason = "The agent prioritized document blockers to avoid queue stagnation.";
+      icon = "rule_folder";
+      type = type !== "error" ? "success" : type;
+      break;
+    case "escalate_service":
+      title = "Service Escalated";
+      desc = `At-risk services were escalated for faster handling; ${effectClause}`;
+      reason = "Escalation was used to protect SLA-critical cases.";
+      icon = "warning";
+      type = "warning";
+      break;
+    case "set_priority_mode":
+      title = "Priority Mode Updated";
+      desc = `Priority mode switched to '${payload.priority_mode ?? "balanced"}'; ${effectClause}`;
+      reason = "The agent changed queue strategy to better match current workload pressure.";
+      icon = "model_training";
+      break;
+    default:
+      desc = `Routine processing executed; ${effectClause}`;
+      break;
+  }
+  if (reward < 0 && type === "info") type = "warning";
+  const isHighReward = reward >= 1.0;
+  const isHugeImpact = backlogDelta <= -5;
+  return { title, desc, reason, icon, type, isHighReward, isHugeImpact };
+}
+// Determines the simulation phase label from step index and total
+function getPhase(step, maxSteps) {
+  const pct = step / Math.max(maxSteps, 1);
+  if (pct < 0.33) return "early";
+  if (pct < 0.67) return "middle";
+  return "late";
+}
+// Detect if a step is a "key decision" turning point
+function isKeyDecision(s, backlogDelta) {
+  return (
+    Math.abs(Number(s.reward)) >= 1.0 || // high reward magnitude
+    (backlogDelta !== 0 && Math.abs(backlogDelta) >= 5) || // large backlog swing
+    Boolean(s.invalid_action) // failed action = notable event
+  );
+}
+// ─────────────────────────────────────────────────────────────────────────────
+// Hook
+// ─────────────────────────────────────────────────────────────────────────────
+export function useStorySimulation({ defaultTask }) {
+  const [taskId, setTaskId] = useState(defaultTask || "district_backlog_easy");
+  const [maxSteps, setMaxSteps] = useState(40);
+  const [agentMode, setAgentMode] = useState("trained_rl");
+  const [policyName, setPolicyName] = useState("backlog_clearance");
+  const [modelPath, setModelPath] = useState("");
+  const [modelType, setModelType] = useState("maskable");
+  const [availablePolicies, setAvailablePolicies] = useState([]);
+  const [availableModels, setAvailableModels] = useState([]);
+  const [configError, setConfigError] = useState("");
+  const [running, setRunning] = useState(false);
+  const [starting, setStarting] = useState(false);
+  const [runId, setRunId] = useState("");
+  const [kpis, setKpis] = useState({
+    backlog: 0, backlogDelta: 0,
+    slaBreaches: 0, slaDelta: 0,
+    fairness: 0, fairnessDelta: 0,
+  });
+  const [timeline, setTimeline] = useState([]);
+  const [resources, setResources] = useState([]);
+  // Progress tracking
+  const [currentStep, setCurrentStep] = useState(0);
+  // Before vs after journey stats
+  const [journeyStats, setJourneyStats] = useState(null); // null = not yet done
+  // Internal refs
+  const lastState = useRef({ backlog: 0, sla: 0, fairness: 0 });
+  const initialSnapshot = useRef(null); // captured on first real step
+  const stepCount = useRef(0);
+  const maxStepsRef = useRef(40);
+  useEffect(() => {
+    let mounted = true;
+    (async () => {
+      try {
+        const [policiesRes, modelsV1Res, modelsV2Res] = await Promise.allSettled([
+          api("/agents"),
+          api("/rl_models"),
+          api("/rl/models"),
+        ]);
+        if (!mounted) return;
+        const policyRows = policiesRes.status === "fulfilled" && Array.isArray(policiesRes.value) ? policiesRes.value : [];
+        setAvailablePolicies(policyRows);
+        if (policyRows.length > 0 && !policyRows.includes(policyName)) {
+          setPolicyName(policyRows[0]);
+        }
+        const modelRowsV1 = modelsV1Res.status === "fulfilled" && Array.isArray(modelsV1Res.value?.models)
+          ? modelsV1Res.value.models
+          : [];
+        const modelRowsV2 = modelsV2Res.status === "fulfilled" && Array.isArray(modelsV2Res.value)
+          ? modelsV2Res.value.map((row) => ({
+            label: row?.model_path ? String(row.model_path).split(/[\\/]/).pop() : "model",
+            path: row?.model_path ? (String(row.model_path).toLowerCase().endsWith(".zip") ? row.model_path : `${row.model_path}.zip`) : "",
+            exists: Boolean(row?.exists),
+            model_type: "maskable",
+          }))
+          : [];
+        const dedupe = new Map();
+        for (const m of [...modelRowsV1, ...modelRowsV2]) {
+          const key = String(m?.path || "").replace(/\\/g, "/").toLowerCase();
+          if (!key || dedupe.has(key)) continue;
+          dedupe.set(key, m);
+        }
+        const existingModels = Array.from(dedupe.values()).filter((m) => Boolean(m?.exists));
+        setAvailableModels(existingModels);
+        const preferred =
+          existingModels.find((m) => String(m.path || "").toLowerCase().includes("phase2_final")) ||
+          existingModels[0];
+        if (preferred?.path) {
+          setModelPath(preferred.path);
+          setModelType(preferred.model_type || "maskable");
+          setAgentMode((prev) => (prev === "baseline_policy" ? "trained_rl" : prev));
+        }
+      } catch (err) {
+        if (!mounted) return;
+        setConfigError(err?.message || "Failed to load simulation options.");
+      }
+    })();
+    return () => {
+      mounted = false;
+    };
+  }, []);
+  const startSimulation = async () => {
+    setStarting(true);
+    setConfigError("");
+    setJourneyStats(null);
+    setCurrentStep(0);
+    initialSnapshot.current = null;
+    stepCount.current = 0;
+    maxStepsRef.current = maxSteps;
+    try {
+      const payload = {
+        task_id: taskId,
+        agent_mode: agentMode,
+        max_steps: maxSteps,
+        policy_name: policyName,
+        model_path: modelPath || null,
+        model_type: modelType,
+      };
+      const started = await api("/simulation/live/start", {
+        method: "POST",
+        body: JSON.stringify(payload),
+      });
+      setRunId(started.run_id);
+      setTimeline([{
+        id: "start",
+        time: "Step 0",
+        title: "Simulation Initialized",
+        desc: `Scenario locked: ${taskId.replace(/_/g, " ")}. Agent mode '${agentMode}' engaged — agent begins resolving backlog.`,
+        impact: 0,
+        type: "info",
+        icon: "rocket_launch",
+        phase: "early",
+        key: false,
+      }]);
+      setResources([]);
+      lastState.current = { backlog: 0, sla: 0, fairness: 0 };
+      setRunning(true);
+    } catch (err) {
+      console.error("Start failed:", err);
+      setTimeline([{
+        id: "error",
+        time: "—",
+        title: "Initialization Failed",
+        desc: `Backend error: ${err.message || "Cannot start simulation."}`,
+        impact: 0,
+        type: "error",
+        icon: "error",
+        phase: "early",
+        key: false,
+      }]);
+      setConfigError(err?.message || "Cannot start simulation.");
+    } finally {
+      setStarting(false);
+    }
+  };
+  const stopSimulation = async () => {
+    if (!runId) return;
+    try {
+      await api(`/simulation/live/${runId}/stop`, { method: "POST" });
+    } catch (err) {
+      console.error(err);
+    } finally {
+      setRunning(false);
+    }
+  };
+  // Polling loop — runs while running=true
+  const runLoop = useCallback(async (rid, cancelled) => {
+    if (cancelled.v) return;
+    try {
+      const res = await api("/simulation/live/step", {
+        method: "POST",
+        body: JSON.stringify({ run_id: rid }),
+      });
+      if (cancelled.v) return;
+      if (res.step) {
+        const s = res.step;
+        stepCount.current += 1;
+        const stepNum = Number(s.step ?? stepCount.current);
+        setCurrentStep(stepNum);
+        const currentBacklog = Number(s.backlog ?? 0);
+        const currentSla = Number(s.sla_breaches ?? 0);
+        const currentFairness = Number(s.fairness_gap ?? 0);
+        // Capture initial snapshot from step 1
+        if (initialSnapshot.current === null) {
+          initialSnapshot.current = {
+            backlog: currentBacklog,
+            sla: currentSla,
+            fairness: currentFairness,
+          };
+        }
+        const backlogDelta = currentBacklog - lastState.current.backlog;
+        const slaDelta = currentSla - lastState.current.sla;
+        const fairnessDelta = currentFairness - lastState.current.fairness;
+        setKpis({
+          backlog: currentBacklog,
+          backlogDelta,
+          slaBreaches: currentSla,
+          slaDelta,
+          fairness: currentFairness,
+          fairnessDelta,
+        });
+        lastState.current = { backlog: currentBacklog, sla: currentSla, fairness: currentFairness };
+        const payload = typeof s.action_payload === "string"
+          ? (() => { try { return JSON.parse(s.action_payload); } catch { return {}; } })()
+          : (s.action_payload || {});
+        const story = mapActionToStory(
+          s.action_type || "advance_time",
+          payload,
+          Number(s.reward),
+          backlogDelta,
+          slaDelta,
+          fairnessDelta
+        );
+        const phase = getPhase(stepNum, maxStepsRef.current);
+        const key = isKeyDecision(s, backlogDelta);
+        const improvesBacklog = backlogDelta < 0;
+        const worsensBacklog = backlogDelta > 0;
+        const worsensSla = slaDelta > 0;
+        const improvesSla = slaDelta < 0;
+        const outcomeLabel = improvesBacklog || improvesSla
+          ? "Improvement"
+          : worsensBacklog || worsensSla
+            ? "Degradation"
+            : "Stable";
+        const outcomeType = outcomeLabel === "Improvement" ? "success" : outcomeLabel === "Degradation" ? "warning" : "info";
+        const newEvent = {
+          id: `step-${stepNum}`,
+          time: `Step ${stepNum}`,
+          title: s.invalid_action ? "Action Blocked" : story.title,
+          desc: s.invalid_action
+            ? "This action was blocked by environment constraints; the agent adapts on the next step."
+            : story.desc,
+          reason: s.invalid_action ? "The attempted operation violated environment constraints (e.g. over-assignment)." : story.reason,
+          impact: Number(s.reward),
+          type: s.invalid_action ? "error" : story.type,
+          icon: s.invalid_action ? "block" : story.icon,
+          isHighReward: story.isHighReward && !s.invalid_action,
+          isHugeImpact: story.isHugeImpact && !s.invalid_action,
+          phase,
+          key,
+          outcomeLabel,
+          outcomeType,
+          backlogDelta, // Used for phase summary
+        };
+        // Collapse consecutive identical titles (deduplication for repeated events)
+        setTimeline((prev) => {
+          const [top, ...rest] = prev;
+          if (
+            top &&
+            top.title === newEvent.title &&
+            top.phase === newEvent.phase &&
+            !top.key &&
+            !newEvent.key
+          ) {
+            // Merge: bump count, accumulate reward and backlog diff
+            const merged = {
+              ...top,
+              id: newEvent.id,
+              time: `${top.time?.split("–")[0]?.trim()}–${newEvent.time}`,
+              desc: top.desc,
+              impact: Number(top.impact) + Number(newEvent.impact),
+              backlogDelta: (top.backlogDelta || 0) + backlogDelta,
+              _count: (top._count || 1) + 1,
+            };
+            return [merged, ...rest].slice(0, 30);
+          }
+          return [newEvent, ...prev].slice(0, 30);
+        });
+        // Update queue monitors
+        if (Array.isArray(s.queue_rows) && s.queue_rows.length > 0) {
+          const maxCases = Math.max(...s.queue_rows.map((q) => q.active_cases ?? 0), 1);
+          setResources(s.queue_rows.map((q) => ({
+            name: (q.service ?? q.service_type ?? "unknown").replace(/_/g, " ").toUpperCase(),
+            activeCases: q.active_cases ?? 0,
+            percentage: Math.min(100, Math.floor(((q.active_cases ?? 0) / maxCases) * 100)),
+          })));
+        }
+      }
+      // Episode done
+      if (res.done || res.step?.done) {
+        const finalBacklog = lastState.current.backlog;
+        const initSnap = initialSnapshot.current ?? { backlog: finalBacklog, sla: 0, fairness: 0 };
+        const backlogImprovement = initSnap.backlog > 0
+          ? Math.round(((initSnap.backlog - finalBacklog) / initSnap.backlog) * 100)
+          : 0;
+        setJourneyStats({
+          initialBacklog: initSnap.backlog,
+          finalBacklog,
+          backlogImprovement,
+          initialSla: initSnap.sla,
+          finalSla: lastState.current.sla,
+          totalSteps: stepCount.current,
+          finalScore: res.score ?? null,
+          totalReward: res.total_reward ?? null,
+        });
+        setTimeline((prev) => [{
+          id: "end",
+          time: "Final",
+          title: "Episode Complete",
+          desc: `Resolution finished in ${stepCount.current} steps. Final score: ${res.score != null ? (res.score * 100).toFixed(1) + "%" : "N/A"}. Backlog ${finalBacklog < initSnap.backlog ? "reduced" : "unchanged"} — SLAs verified.`,
+          impact: res.total_reward ?? 0,
+          type: "success",
+          icon: "verified",
+          phase: "late",
+          key: true,
+        }, ...prev]);
+        setRunning(false);
+        return;
+      }
+      setTimeout(() => runLoop(rid, cancelled), 1000);
+    } catch (err) {
+      if (!cancelled.v) {
+        setRunning(false);
+        setTimeline((prev) => [{
+          id: `error-${Date.now()}`,
+          time: "Halted",
+          title: "System Error Detected",
+          desc: `Backend synchronization failed: ${err.message}`,
+          impact: 0,
+          type: "error",
+          icon: "warning",
+          phase: "late",
+          key: false,
+        }, ...prev]);
+      }
+    }
+  }, []);
+  // Start/stop the polling loop reactively
+  const cancelRef = useRef({ v: false });
+  useEffect(() => {
+    if (!running || !runId) {
+      cancelRef.current.v = true;
+      return undefined;
+    }
+    cancelRef.current = { v: false };
+    const boot = setTimeout(() => {
+      if (!cancelRef.current.v) {
+        runLoop(runId, cancelRef.current);
+      }
+    }, 100);
+    return () => {
+      clearTimeout(boot);
+      cancelRef.current.v = true;
+    };
+  }, [running, runId, runLoop]);
+  return {
+    taskId, setTaskId,
+    maxSteps, setMaxSteps,
+    agentMode, setAgentMode,
+    policyName, setPolicyName,
+    modelPath, setModelPath,
+    modelType, setModelType,
+    availablePolicies,
+    availableModels,
+    configError,
+    running, starting,
+    currentStep,
+    kpis, timeline, resources,
+    journeyStats,
+    startSimulation, stopSimulation,
+  };
+}

frontend/react/src/main.jsx ADDED Viewed

	@@ -0,0 +1,15 @@

+import React from "react";
+import { createRoot } from "react-dom/client";
+import App from "./App";
+import "./styles.css";
+const rootEl = document.getElementById("app-root");
+if (!rootEl) {
+  throw new Error("Missing #app-root mount node");
+}
+createRoot(rootEl).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+);

frontend/react/src/styles.css ADDED Viewed

	@@ -0,0 +1,525 @@

+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+:root {
+  --bg: #030303;
+  --panel: #0d0d0d;
+  --line: #272727;
+  --text: #f5f5f5;
+  --muted: #a7a7a7;
+  --accent: #ffffff;
+}
+* {
+  box-sizing: border-box;
+}
+html,
+body,
+#root {
+  margin: 0;
+  min-height: 100%;
+  background: radial-gradient(circle at 5% 5%, #1a1a1a 0%, #050505 45%, #000 100%);
+  color: var(--text);
+  font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+}
+.app-shell {
+  display: grid;
+  grid-template-columns: 280px 1fr;
+  min-height: 100vh;
+}
+.sidebar {
+  border-right: 1px solid var(--line);
+  background: linear-gradient(180deg, #0a0a0a, #050505);
+  padding: 18px;
+}
+.sidebar h1 {
+  margin: 0;
+  font-size: 24px;
+}
+.sidebar-sub {
+  color: var(--muted);
+  font-size: 13px;
+  margin: 10px 0 14px;
+}
+.nav-btn {
+  width: 100%;
+  text-align: left;
+  border: 1px solid #3b3b3b;
+  color: #d8d8d8;
+  background: transparent;
+  border-radius: 10px;
+  padding: 10px 12px;
+  margin-bottom: 8px;
+  cursor: pointer;
+}
+.nav-btn.active {
+  background: #fff;
+  color: #000;
+  border-color: #fff;
+  font-weight: 700;
+}
+.content {
+  padding: 20px;
+}
+.status-banner {
+  border: 1px solid var(--line);
+  background: #0a0a0a;
+  border-radius: 10px;
+  padding: 10px 12px;
+  color: var(--muted);
+  font-size: 12px;
+  margin-bottom: 12px;
+}
+.module-grid {
+  display: grid;
+  grid-template-columns: 1fr;
+  gap: 12px;
+}
+.panel {
+  border: 1px solid var(--line);
+  border-radius: 12px;
+  background: var(--panel);
+  padding: 14px;
+}
+.hero-panel {
+  background: linear-gradient(120deg, #fff 0%, #d7d7d7 40%, #8c8c8c 100%);
+  color: #000;
+}
+.hero-panel code {
+  background: rgba(0, 0, 0, 0.12);
+  padding: 2px 6px;
+  border-radius: 8px;
+}
+h2,
+h3 {
+  margin: 0 0 10px;
+}
+.control-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+  gap: 10px;
+}
+label {
+  display: grid;
+  gap: 6px;
+  color: var(--muted);
+  font-size: 12px;
+}
+input,
+select,
+button {
+  border: 1px solid #3a3a3a;
+  border-radius: 8px;
+  padding: 8px 10px;
+  font-size: 13px;
+  color: var(--text);
+  background: #111;
+}
+button {
+  background: var(--accent);
+  color: #000;
+  border: none;
+  font-weight: 700;
+  cursor: pointer;
+}
+button.ghost {
+  border: 1px solid #505050;
+  background: transparent;
+  color: var(--text);
+}
+button:disabled {
+  opacity: 0.6;
+  cursor: wait;
+}
+.row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  margin-top: 10px;
+}
+.loading-inline {
+  margin-top: 10px;
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  border: 1px solid #2a2a2a;
+  background: #090909;
+  border-radius: 999px;
+  padding: 6px 10px;
+  color: #cdcdcd;
+  font-size: 12px;
+}
+.spinner-dot {
+  width: 10px;
+  height: 10px;
+  border-radius: 999px;
+  background: #fff;
+  display: inline-block;
+  animation: pulse 1s ease-in-out infinite;
+}
+@keyframes pulse {
+  0% { opacity: 0.25; transform: scale(0.8); }
+  50% { opacity: 1; transform: scale(1); }
+  100% { opacity: 0.25; transform: scale(0.8); }
+}
+.metric-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(145px, 1fr));
+  gap: 10px;
+}
+.metric-card {
+  border: 1px solid var(--line);
+  border-radius: 10px;
+  background: #0a0a0a;
+  padding: 10px;
+  display: grid;
+  gap: 4px;
+}
+.metric-card span {
+  color: var(--muted);
+  font-size: 12px;
+}
+.metric-card strong {
+  font-size: 20px;
+}
+.flow-list {
+  margin: 0;
+  padding-left: 20px;
+  color: #d8d8d8;
+  line-height: 1.5;
+}
+.tag-wrap {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+.tag {
+  border: 1px solid #444;
+  border-radius: 999px;
+  padding: 4px 10px;
+  font-size: 12px;
+}
+.chart-canvas {
+  width: 100%;
+  border: 1px solid #1d2f42;
+  border-radius: 10px;
+  background: #03070d;
+}
+.step-card {
+  margin-top: 10px;
+  border: 1px solid #2a2a2a;
+  border-radius: 10px;
+  padding: 12px;
+  background: #090909;
+}
+.animate-in {
+  animation: rise 0.35s ease-out;
+}
+@keyframes rise {
+  from {
+    transform: translateY(8px);
+    opacity: 0;
+  }
+  to {
+    transform: translateY(0);
+    opacity: 1;
+  }
+}
+.step-head {
+  display: flex;
+  justify-content: space-between;
+  margin-bottom: 8px;
+}
+.step-meta {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+  color: #c5c5c5;
+  font-size: 12px;
+}
+.queue-list {
+  margin-top: 10px;
+  display: grid;
+  gap: 7px;
+}
+.queue-row {
+  display: grid;
+  grid-template-columns: 150px 1fr 40px;
+  gap: 8px;
+  align-items: center;
+}
+.queue-label {
+  font-size: 12px;
+  color: #cfcfcf;
+}
+.queue-bar-wrap {
+  background: #121212;
+  border: 1px solid #2b2b2b;
+  border-radius: 999px;
+  overflow: hidden;
+  height: 10px;
+}
+.queue-bar {
+  height: 100%;
+  background: linear-gradient(90deg, #fff, #8f8f8f);
+  transition: width 0.5s ease;
+}
+.queue-val {
+  text-align: right;
+  font-size: 12px;
+  color: #ddd;
+}
+.jobs-list {
+  display: grid;
+  gap: 8px;
+}
+.job-item {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  text-align: left;
+  border: 1px solid #3b3b3b;
+  border-radius: 10px;
+  background: #0b0b0b;
+  color: #ededed;
+}
+.job-item.active {
+  border-color: #fff;
+}
+.job-status {
+  text-transform: uppercase;
+  font-size: 11px;
+  letter-spacing: 0.05em;
+  color: #ccc;
+}
+.job-status.running {
+  color: #fff;
+}
+.job-status.completed {
+  color: #bfbfbf;
+}
+.job-status.failed {
+  color: #8f8f8f;
+}
+.progress-track {
+  margin-top: 10px;
+  height: 10px;
+  border-radius: 999px;
+  background: #111;
+  border: 1px solid #2a2a2a;
+  overflow: hidden;
+}
+.progress-fill {
+  height: 100%;
+  background: linear-gradient(90deg, #fff, #888);
+  transition: width 0.5s ease;
+}
+.compare-bars {
+  display: grid;
+  gap: 8px;
+}
+.compare-row {
+  display: grid;
+  grid-template-columns: 180px 1fr 60px;
+  gap: 10px;
+  align-items: center;
+}
+.compare-label,
+.compare-value {
+  font-size: 12px;
+}
+.compare-track {
+  height: 12px;
+  border: 1px solid #2f2f2f;
+  background: #0f0f0f;
+  border-radius: 999px;
+  overflow: hidden;
+}
+.compare-fill {
+  height: 100%;
+  background: linear-gradient(90deg, #fff, #8d8d8d);
+  transition: width 0.6s ease;
+}
+.table-wrap {
+  margin-top: 10px;
+  border: 1px solid #252525;
+  border-radius: 10px;
+  overflow: auto;
+}
+table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 12px;
+}
+th,
+td {
+  border-bottom: 1px solid #1d1d1d;
+  text-align: left;
+  padding: 8px;
+  white-space: nowrap;
+}
+th {
+  background: #0b0b0b;
+}
+.muted {
+  color: var(--muted);
+  font-size: 12px;
+}
+.mono {
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+  font-size: 12px;
+}
+.compliance-card {
+  border-width: 1px;
+}
+.compliance-card.status-pass {
+  border-color: #4f4f4f;
+  box-shadow: inset 0 0 0 1px #2d2d2d;
+}
+.compliance-card.status-fail {
+  border-color: #7a7a7a;
+  box-shadow: inset 0 0 0 1px #545454;
+}
+.compliance-card.status-unknown {
+  border-color: #3a3a3a;
+}
+.log-grid {
+  display: grid;
+  gap: 8px;
+  max-height: 320px;
+  overflow: auto;
+  margin-top: 8px;
+  padding-right: 2px;
+}
+.log-card {
+  border: 1px solid #2a2a2a;
+  border-radius: 10px;
+  background: #090909;
+  padding: 10px;
+  display: grid;
+  gap: 4px;
+}
+.log-title {
+  font-weight: 700;
+  letter-spacing: 0.04em;
+  font-size: 12px;
+}
+.log-row {
+  font-size: 12px;
+  color: #d4d4d4;
+  line-height: 1.4;
+}
+.log-start {
+  border-left: 3px solid #c8c8c8;
+}
+.log-step {
+  border-left: 3px solid #8f8f8f;
+}
+.log-end {
+  border-left: 3px solid #ffffff;
+}
+.log-info {
+  border-left: 3px solid #5b5b5b;
+}
+.terminal-log {
+  max-height: 280px;
+  overflow: auto;
+  border: 1px solid #262626;
+  border-radius: 10px;
+  background: #070707;
+  padding: 10px;
+  margin: 0;
+  font-size: 12px;
+}
+@media (max-width: 980px) {
+  .app-shell {
+    grid-template-columns: 1fr;
+  }
+  .sidebar {
+    border-right: none;
+    border-bottom: 1px solid var(--line);
+  }
+  .queue-row {
+    grid-template-columns: 120px 1fr 30px;
+  }
+}

frontend/react/tailwind.config.js ADDED Viewed

	@@ -0,0 +1,100 @@

+/** @type {import('tailwindcss').Config} */
+export default {
+  darkMode: "class",
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {
+      "colors": {
+              "on-error": "#690005",
+              "surface-container-high": "#292932",
+              "on-primary-fixed-variant": "#2f2ebe",
+              "tertiary-fixed-dim": "#ffb783",
+              "on-secondary-fixed": "#002113",
+              "inverse-surface": "#e4e1ed",
+              "inverse-on-surface": "#303038",
+              "coral-warning": "#fb7185",
+              "surface-container": "#1f1f27",
+              "inverse-primary": "#494bd6",
+              "on-tertiary": "#4f2500",
+              "on-error-container": "#ffdad6",
+              "secondary-fixed-dim": "#4edea3",
+              "outline": "#908fa0",
+              "on-surface-variant": "#c7c4d7",
+              "error": "#ffb4ab",
+              "on-secondary-container": "#00311f",
+              "tertiary-container": "#d97721",
+              "surface-dim": "#13131b",
+              "primary": "#c0c1ff",
+              "surface-variant": "#34343d",
+              "surface-container-low": "#1b1b23",
+              "error-container": "#93000a",
+              "surface-bright": "#393841",
+              "on-tertiary-container": "#452000",
+              "secondary-container": "#00a572",
+              "on-tertiary-fixed-variant": "#703700",
+              "indigo-primary": "#6366f1",
+              "primary-fixed-dim": "#c0c1ff",
+              "on-primary-container": "#0d0096",
+              "on-tertiary-fixed": "#301400",
+              "tertiary": "#ffb783",
+              "on-primary-fixed": "#07006c",
+              "background": "#13131b",
+              "primary-fixed": "#e1e0ff",
+              "secondary-fixed": "#6ffbbe",
+              "primary-container": "#8083ff",
+              "emerald-positive": "#10b981",
+              "on-surface": "#e4e1ed",
+              "on-background": "#e4e1ed",
+              "surface-tint": "#c0c1ff",
+              "on-secondary-fixed-variant": "#005236",
+              "outline-variant": "#464554",
+              "on-primary": "#1000a9",
+              "on-secondary": "#003824",
+              "secondary": "#4edea3",
+              "violet-action": "#8b5cf6",
+              "rose-alert": "#f43f5e",
+              "amber-soft": "#f59e0b",
+              "surface": "#13131b",
+              "surface-container-lowest": "#0d0d15",
+              "surface-container-highest": "#34343d",
+              "surface-glass": "rgba(30, 41, 59, 0.7)",
+              "tertiary-fixed": "#ffdcc5",
+              "background-deep": "#0f172a"
+      },
+      "borderRadius": {
+              "DEFAULT": "0.25rem",
+              "lg": "0.5rem",
+              "xl": "0.75rem",
+              "full": "9999px"
+      },
+      "spacing": {
+              "container-padding": "2rem",
+              "card-padding": "1.25rem",
+              "section-gap": "1.5rem",
+              "grid-gutter": "1rem"
+      },
+      "fontFamily": {
+              "display-metric": ["Manrope"],
+              "delta-pill": ["Inter"],
+              "label-caps": ["Inter"],
+              "headline-md": ["Manrope"],
+              "headline-lg": ["Manrope"],
+              "body-sm": ["Inter"],
+              "body-base": ["Inter"]
+      },
+      "fontSize": {
+              "display-metric": ["48px", { "lineHeight": "1.1", "letterSpacing": "-0.02em", "fontWeight": "700" }],
+              "delta-pill": ["12px", { "lineHeight": "12px", "fontWeight": "700" }],
+              "label-caps": ["12px", { "lineHeight": "16px", "letterSpacing": "0.05em", "fontWeight": "600" }],
+              "headline-md": ["18px", { "lineHeight": "24px", "fontWeight": "600" }],
+              "headline-lg": ["24px", { "lineHeight": "32px", "fontWeight": "600" }],
+              "body-sm": ["14px", { "lineHeight": "20px", "fontWeight": "400" }],
+              "body-base": ["16px", { "lineHeight": "24px", "fontWeight": "400" }]
+      }
+    },
+  },
+  plugins: [],
+}

frontend/react/vite.config.js ADDED Viewed

	@@ -0,0 +1,20 @@

+import { defineConfig } from "vite";
+import react from "@vitejs/plugin-react";
+const devApiTarget = process.env.VITE_DEV_API_TARGET || "http://127.0.0.1:7860";
+export default defineConfig({
+  plugins: [react()],
+  base: "/ui/",
+  server: {
+    host: "0.0.0.0",
+    port: 5173,
+    strictPort: true,
+    proxy: {
+      "/api": {
+        target: devApiTarget,
+        changeOrigin: true,
+      },
+    },
+  },
+});