Spaces:

rishi38
/

smart_emergency

Sleeping

App Files Files Community

rishi38 commited on 12 days ago

Commit

fe0c391

verified ·

1 Parent(s): 044810e

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

Dockerfile +81 -0
Makefile +18 -0
README.md +260 -3
__init__.py +17 -0
client.py +100 -0
models.py +92 -0
openenv.yaml +64 -0
openenv_smart_emergency.egg-info/PKG-INFO +9 -0
openenv_smart_emergency.egg-info/SOURCES.txt +18 -0
openenv_smart_emergency.egg-info/dependency_links.txt +1 -0
openenv_smart_emergency.egg-info/entry_points.txt +2 -0
openenv_smart_emergency.egg-info/requires.txt +5 -0
openenv_smart_emergency.egg-info/top_level.txt +1 -0
pyproject.toml +45 -0
server/__init__.py +11 -0
server/app.py +272 -0
server/calls.py +153 -0
server/city.py +222 -0
server/requirements.txt +6 -0
server/reward.py +149 -0
server/smart_emergency_environment.py +559 -0
train_sft_grpo.py +661 -0
train_sft_grpo_graph.ipynb +0 -0
uv.lock +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Ensure git is available (required for installing dependencies from VCS)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=smart_emergency
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv is already vendored in the build context
+# For standalone builds, openenv will be installed via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+ENV ENABLE_WEB_INTERFACE=true
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

Makefile ADDED Viewed

	@@ -0,0 +1,18 @@

+.PHONY: build start serve stop health
+# ── Docker ────────────────────────────────────────────────────────────────────
+build:
+	@docker build -t emergency:latest -f Dockerfile .
+start:
+	@docker run -p 8000:8000 emergency:latest
+stop:
+	@docker ps -q --filter ancestor=emergency:latest | xargs -r docker stop
+# ── Local dev (uv) ────────────────────────────────────────────────────────────
+serve:
+	@uv run uvicorn server.app:app --host 0.0.0.0 --port 8000 --reload
+health:
+	@curl -s http://localhost:8000/health | python3 -m json.tool

README.md CHANGED Viewed

@@ -1,10 +1,267 @@
 ---
-title: Smart Emergency
-emoji: 🐢
 colorFrom: pink
 colorTo: green
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Smart Emergency Environment Server
+emoji: 🚨
 colorFrom: pink
 colorTo: green
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
 ---
+# Smart Emergency — Dispatch911 RL Environment
+A disaster management reinforcement learning environment where an agent acts as an emergency dispatcher. Each episode, the agent receives live 911 call transcripts and must triage severity, detect duplicate calls, and dispatch the right vehicle (police / ambulance / fire) from a procedurally generated city graph.
+Built on [OpenEnv](https://github.com/meta-pytorch/OpenEnv) — a standard interface for RL environments exposed over HTTP/WebSocket, compatible with TRL + Unsloth training pipelines.
+---
+## Environment Overview
+| Property | Value |
+|---|---|
+| **Task** | Emergency dispatch (triage + routing) |
+| **Episode length** | 20 steps |
+| **Action space** | `dispatch` or `duplicate` with structured fields |
+| **Observation** | Rich text prompt (call transcript + active events + fleet status + city map) |
+| **Reward** | 5-component shaped reward (severity, duplicate detection, vehicle type, vehicle choice, reroute) |
+| **Duplicate call rate** | 30% |
+---
+## Quick Start
+```python
+from smart_emergency import SmartEmergencyAction, SmartEmergencyEnv
+with SmartEmergencyEnv(base_url="http://localhost:8000") as env:
+    result = env.reset()
+    print(result.observation.prompt)
+    # Dispatch an ambulance to the incident
+    action = SmartEmergencyAction(
+        action_type="dispatch",
+        severity_pred=3,
+        is_duplicate=False,
+        vehicle_type="ambulance",
+        vehicle_id="ambulance_0",
+    )
+    result = env.step(action)
+    print(result.observation.reward_breakdown)
+    # → {'severity': 1.0, 'duplicate': 1.0, 'vehicle_type': 1.5, 'vehicle_choice': 0.5, 'reroute': 0.0, 'total': 4.0}
+```
+---
+## Action Space
+**`SmartEmergencyAction`** — the agent's structured response to each incoming 911 call.
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `action_type` | `str` | ✅ | `"dispatch"` or `"duplicate"` |
+| `severity_pred` | `int` (1–5) | ✅ | Predicted severity (1=minor, 5=catastrophic) |
+| `is_duplicate` | `bool` | ✅ | Whether this call is a repeat of an existing event |
+| `duplicate_of_event_id` | `str` | if duplicate | EVT-NNNN of the event this duplicates |
+| `vehicle_type` | `str` | if dispatch | `"police"`, `"ambulance"`, or `"fire"` |
+| `vehicle_id` | `str` | if dispatch | Specific unit ID (e.g. `"ambulance_0"`) |
+| `reroute` | `RerouteAction` | optional | Redirect an in-flight vehicle to the new event |
+**`RerouteAction`** sub-action:
+| Field | Type | Description |
+|---|---|---|
+| `vehicle_to_reroute` | `str` | Unit ID of the vehicle to redirect |
+| `from_event_id` | `str` | EVT-NNNN the vehicle is currently heading to |
+| `replacement_vehicle_id` | `str` | Optional free unit to cover the abandoned event |
+---
+## Observation Space
+**`SmartEmergencyObservation`** — what the agent sees each step.
+| Field | Type | Description |
+|---|---|---|
+| `prompt` | `str` | Full text observation for the LLM (see format below) |
+| `step` | `int` | Current step number (0–20) |
+| `call_id` | `str` | ID of the incoming call (e.g. `CALL-0001`) |
+| `reward_breakdown` | `dict` | Per-component reward from the previous action |
+| `active_event_ids` | `list[str]` | Currently active event IDs (EVT-NNNN) |
+| `fleet_utilisation` | `float` | Fraction of fleet currently busy (0.0–1.0) |
+### Prompt Format
+```
+=== INCOMING CALL [CALL-0003] ===
+Bad crash on Oak Avenue! Car flipped near Riverside Market. Driver trapped, not responding!
+=== ACTIVE EVENTS ===
+EVT-0001 | fire       | Engine House No. 1             | sev 3 | fire_2 ETA 2 min | opened step 1
+EVT-0002 | medical    | Oakwood Apartments             | sev 2 | UNASSIGNED       | opened step 2
+=== UNIT STATUS ===
+police_0        | police     | Central Police Station        | FREE
+ambulance_1     | ambulance  | Riverside General Hospital    | DISPATCHED → EVT-0001
+fire_2          | fire       | Central Fire Station          | DISPATCHED → EVT-0001
+=== CITY REFERENCE ===
+Riverside General Hospital (hospital) → Oakwood Apartments [3 min], Central Plaza [5 min]
+...
+=== DISPATCHER NOTES ===
+Step 1: CALL-0001 → fire fire_2
+Step 2: CALL-0002 → Duplicate of EVT-0001
+```
+---
+## Reward Design
+5 independent reward components returned as `reward_breakdown`:
+| Component | Max | Min | Description |
+|---|---|---|---|
+| `severity` | +1.0 | -0.5 | Accuracy of severity prediction (graded, ±0 to ±4 off) |
+| `duplicate` | +1.5 | -1.0 | Correct duplicate detection and event ID matching |
+| `vehicle_type` | +1.5 | -1.5 | Correct vehicle type (police / ambulance / fire) |
+| `vehicle_choice` | +1.0 | -2.0 | Vehicle availability, type match, and proximity bonus |
+| `reroute` | +1.7 | -1.0 | Quality of optional reroute instruction |
+| **`total`** | **~6.7** | **~-6.0** | Sum of all components |
+Parse failure (malformed action): **-2.0** flat penalty.
+---
+## API Endpoints
+| Method | Endpoint | Description |
+|---|---|---|
+| `GET` | `/health` | Health check |
+| `POST` | `/reset` | Start a new episode |
+| `POST` | `/step` | Submit an action, get next observation |
+| `GET` | `/state` | Current episode state |
+| `GET` | `/tasks` | List available tasks / difficulty levels |
+| `POST` | `/grader` | Score a completed episode (call after `done=True`) |
+| `GET` | `/baseline` | Run rule-based agent across all tasks |
+| `GET` | `/docs` | Interactive Swagger UI |
+| `WS` | `/ws` | WebSocket for persistent low-latency sessions |
+---
+## Running Locally
+### Option 1: uv (fastest)
+```bash
+uv sync
+uv run uvicorn server.app:app --host 0.0.0.0 --port 8000 --reload
+```
+Or via the Makefile:
+```bash
+make serve      # uv run, with hot-reload
+make build      # build Docker image
+make start      # run Docker container
+```
+### Option 2: Docker
+```bash
+make build
+make start
+```
+Then open http://localhost:8000/docs
+---
+## Connecting to a Running Server
+```python
+from smart_emergency import SmartEmergencyEnv
+env = SmartEmergencyEnv(base_url="http://localhost:8000")
+result = env.reset()
+print(result.observation.prompt)
+```
+Or use the deployed HF Space directly:
+```python
+env = SmartEmergencyEnv(base_url="https://rishi38-eme-enviro.hf.space")
+```
+---
+## Grading a Completed Episode
+After the episode ends (`done=True`), call `/grader`:
+```bash
+curl -X POST http://localhost:8000/grader
+```
+```json
+{
+  "score": 0.82,
+  "reward_components": {
+    "severity_accuracy": 0.91,
+    "duplicate_f1": 0.75,
+    "dispatch_accuracy": 0.88,
+    "vehicle_efficiency": 0.74
+  },
+  "steps": 20,
+  "episode_id": "abc-123"
+}
+```
+---
+## Baseline Agent
+Run the built-in rule-based agent to get a reference score:
+```bash
+curl http://localhost:8000/baseline
+```
+```json
+{
+  "baseline_agent": "keyword-heuristic rule-based",
+  "average_score": 0.61,
+  "tasks": {
+    "task_1": {"score": 0.72, "difficulty": "easy", "steps": 20},
+    "task_2": {"score": 0.63, "difficulty": "medium", "steps": 20},
+    "task_3": {"score": 0.48, "difficulty": "hard", "steps": 20}
+  }
+}
+```
+---
+## Project Structure
+```
+smart_emergency/
+├── README.md                        # This file (HF Space config + docs)
+├── openenv.yaml                     # OpenEnv manifest
+├── pyproject.toml                   # Package metadata & dependencies
+├── Dockerfile                       # Container build
+├── Makefile                         # Dev commands (build, start, serve)
+├── uv.lock                          # Locked dependencies
+├── __init__.py                      # Package exports
+├── models.py                        # SmartEmergencyAction + Observation
+├── client.py                        # SmartEmergencyEnv HTTP/WS client
+└── server/
+    ├── __init__.py
+    ├── app.py                       # FastAPI app via openenv create_app
+    ├── smart_emergency_environment.py  # Core reset/step/reward logic
+    ├── city.py                      # Procedural city graph + Dijkstra
+    ├── calls.py                     # 911 call generator (25 templates)
+    └── reward.py                    # 5-component decomposed reward
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Smart Emergency Environment."""
+from .client import SmartEmergencyEnv
+from .models import SmartEmergencyAction, SmartEmergencyObservation, RerouteAction
+__all__ = [
+    "SmartEmergencyAction",
+    "SmartEmergencyObservation",
+    "RerouteAction",
+    "SmartEmergencyEnv",
+]

client.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Dispatch911 Environment Client."""
+from typing import Dict, Optional
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import State
+from .models import SmartEmergencyAction, SmartEmergencyObservation, RerouteAction
+class SmartEmergencyEnv(
+    EnvClient[SmartEmergencyAction, SmartEmergencyObservation, State]
+):
+    """
+    Client for the Dispatch911 Environment.
+    Example:
+        >>> with SmartEmergencyEnv(base_url="http://localhost:8000") as client:
+        ...     result = client.reset()
+        ...     print(result.observation.prompt)
+        ...
+        ...     action = SmartEmergencyAction(
+        ...         action_type="dispatch",
+        ...         severity_pred=3,
+        ...         is_duplicate=False,
+        ...         vehicle_type="ambulance",
+        ...         vehicle_id="ambulance_0",
+        ...     )
+        ...     result = client.step(action)
+        ...     print(result.observation.reward_breakdown)
+    """
+    def _step_payload(self, action: SmartEmergencyAction) -> Dict:
+        """Convert SmartEmergencyAction to JSON payload."""
+        payload: Dict = {
+            "action_type": action.action_type,
+            "severity_pred": action.severity_pred,
+            "is_duplicate": action.is_duplicate,
+        }
+        if action.duplicate_of_event_id is not None:
+            payload["duplicate_of_event_id"] = action.duplicate_of_event_id
+        if action.vehicle_type is not None:
+            payload["vehicle_type"] = action.vehicle_type
+        if action.vehicle_id is not None:
+            payload["vehicle_id"] = action.vehicle_id
+        if action.reroute is not None:
+            payload["reroute"] = {
+                "vehicle_to_reroute": action.reroute.vehicle_to_reroute,
+                "from_event_id": action.reroute.from_event_id,
+                "replacement_vehicle_id": action.reroute.replacement_vehicle_id,
+            }
+        return payload
+    def _parse_result(self, payload: Dict) -> StepResult[SmartEmergencyObservation]:
+        """Parse server response into StepResult.
+        Note: OpenEnv's serialize_observation() intentionally strips 'metadata',
+        'done', and 'reward' from the nested observation dict and promotes them
+        to the top level. ground_truth is now a first-class field on the
+        observation model so it survives serialization.
+        """
+        obs_data = payload.get("observation", {})
+        # metadata is stripped by the framework; ground_truth is now a dedicated field
+        metadata = payload.get("metadata", obs_data.get("metadata", {}))
+        # Support both the new dedicated ground_truth field and the legacy metadata path
+        gt = obs_data.get("ground_truth") or metadata.get("ground_truth", {})
+        if gt:
+            metadata = dict(metadata)
+            metadata["ground_truth"] = gt
+        observation = SmartEmergencyObservation(
+            prompt=obs_data.get("prompt", ""),
+            step=obs_data.get("step", 0),
+            call_id=obs_data.get("call_id", ""),
+            reward_breakdown=obs_data.get("reward_breakdown", {}),
+            active_event_ids=obs_data.get("active_event_ids", []),
+            fleet_utilisation=obs_data.get("fleet_utilisation", 0.0),
+            done=payload.get("done", False),
+            reward=payload.get("reward"),
+            ground_truth=gt or {},
+            metadata=metadata,
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> State:
+        """Parse server response into State."""
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )

models.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data models for the Dispatch911 Environment.
+Action: the agent's structured dispatch decision per incoming 911 call.
+Observation: the text-based observation the agent receives each step.
+"""
+from typing import Dict, List, Literal, Optional
+from openenv.core.env_server.types import Action, Observation
+from pydantic import Field
+# ── Reroute sub-action ──────────────────────────────────────────────────────
+class RerouteAction(Action):
+    """Optional reroute block inside a dispatch action."""
+    vehicle_to_reroute: str = Field(..., description="Unit ID of vehicle to redirect")
+    from_event_id: str = Field(..., description="EVT-NNNN the vehicle is pulled from")
+    replacement_vehicle_id: Optional[str] = Field(
+        None, description="Free unit to cover the abandoned event"
+    )
+# ── Agent action ─────────────────────────────────────────────────────────────
+class SmartEmergencyAction(Action):
+    """
+    The agent's response to an incoming 911 call.
+    Three modes:
+      - action_type='dispatch': handle a new emergency
+      - action_type='duplicate': flag as repeat of an existing event
+      - action_type='hold': queue event for a busy vehicle to handle after it frees
+    """
+    action_type: Literal["dispatch", "duplicate", "hold"] = Field(
+        ..., description="'dispatch', 'duplicate', or 'hold'"
+    )
+    severity_pred: int = Field(
+        ..., ge=1, le=5, description="Predicted severity 1-5"
+    )
+    is_duplicate: bool = Field(
+        False, description="Whether the agent believes this is a repeat call"
+    )
+    duplicate_of_event_id: Optional[str] = Field(
+        None, description="EVT-NNNN of the event this duplicates (required if is_duplicate)"
+    )
+    vehicle_type: Optional[str] = Field(
+        None, description="'police', 'ambulance', or 'fire' (required if dispatch or hold)"
+    )
+    vehicle_id: Optional[str] = Field(
+        None, description="Unit to dispatch now (dispatch) or busy unit to queue for (hold)"
+    )
+    reroute: Optional[RerouteAction] = Field(
+        None, description="Optional reroute instruction"
+    )
+# ── Observation ──────────────────────────────────────────────────────────────
+class SmartEmergencyObservation(Observation):
+    """
+    Observation returned to the agent each step.
+    Contains the full text prompt (transcript + active events + unit status +
+    city reference + dispatcher notes) and structured metadata for logging.
+    """
+    prompt: str = Field(default="", description="Full text observation for the LLM")
+    step: int = Field(default=0, description="Current step number")
+    call_id: str = Field(default="", description="ID of the incoming call")
+    reward_breakdown: Dict[str, float] = Field(
+        default_factory=dict, description="Per-component reward breakdown"
+    )
+    active_event_ids: List[str] = Field(
+        default_factory=list, description="Currently active event IDs"
+    )
+    fleet_utilisation: float = Field(
+        default=0.0, description="Fraction of fleet currently busy"
+    )
+    ground_truth: Dict = Field(
+        default_factory=dict,
+        description="Hidden ground truth for the current call (populated after step)",
+    )

openenv.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+spec_version: 1
+name: smart_emergency
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000
+description: >
+  A disaster management reinforcement learning environment where agents manage
+  emergency dispatch. Agents must triage incoming 911 calls, classify severity,
+  detect duplicate events, and dispatch limited resources (Police, Fire, Ambulance)
+  across a procedural smart city graph.
+tags:
+  - openenv
+  - disaster-management
+  - smart-city
+  - dispatch
+  - rl
+tasks:
+  - id: 1
+    name: "Basic Dispatch"
+    difficulty: easy
+    description: "Low-volume calls, fewer active events. Focus on severity and vehicle type."
+    reward_max: 6.7
+  - id: 2
+    name: "Duplicate Detection"
+    difficulty: medium
+    description: "Higher duplicate rate. Agent must correlate repeat callers to existing events."
+    reward_max: 6.7
+  - id: 3
+    name: "Full Disaster Response"
+    difficulty: hard
+    description: "High call volume, scarce vehicles, reroutes required. Full 20-step episode."
+    reward_max: 6.7
+observation_space:
+  prompt: string
+  step: integer
+  call_id: string
+  reward_breakdown: object
+  active_event_ids: array
+  fleet_utilisation: float
+action_space:
+  action_type:
+    type: string
+    values: [dispatch, duplicate]
+  severity_pred:
+    type: integer
+  is_duplicate:
+    type: boolean
+  duplicate_of_event_id:
+    type: string
+  vehicle_type:
+    type: string
+    values: [police, ambulance, fire]
+  vehicle_id:
+    type: string
+  reroute:
+    type: object

openenv_smart_emergency.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,9 @@

+Metadata-Version: 2.4
+Name: openenv-smart_emergency
+Version: 0.1.0
+Summary: Smart Emergency environment for OpenEnv
+Requires-Python: >=3.10
+Requires-Dist: openenv-core[core]>=0.2.2
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"

openenv_smart_emergency.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+README.md
+pyproject.toml
+./__init__.py
+./client.py
+./generate_sft_data.py
+./models.py
+openenv_smart_emergency.egg-info/PKG-INFO
+openenv_smart_emergency.egg-info/SOURCES.txt
+openenv_smart_emergency.egg-info/dependency_links.txt
+openenv_smart_emergency.egg-info/entry_points.txt
+openenv_smart_emergency.egg-info/requires.txt
+openenv_smart_emergency.egg-info/top_level.txt
+server/__init__.py
+server/app.py
+server/calls.py
+server/city.py
+server/reward.py
+server/smart_emergency_environment.py

openenv_smart_emergency.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

openenv_smart_emergency.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ server = smart_emergency.server.app:main

openenv_smart_emergency.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openenv-core[core]>=0.2.2
+[dev]
+pytest>=8.0.0
+pytest-cov>=4.0.0

openenv_smart_emergency.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ smart_emergency

pyproject.toml ADDED Viewed

	@@ -0,0 +1,45 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-smart_emergency"
+version = "0.1.0"
+description = "Smart Emergency environment for OpenEnv"
+requires-python = ">=3.10"
+dependencies = [
+    # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
+    # install from github
+    # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
+    "openenv-core[core]>=0.2.2",
+    # Environment-specific dependencies
+    # Add all dependencies needed for your environment here
+    # Examples:
+    # "numpy>=1.19.0",
+    # "torch>=2.0.0",
+    # "gymnasium>=0.29.0",
+    # "openspiel>=1.0.0",
+    # "smolagents>=1.22.0,<2",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.scripts]
+# Server entry point - enables running via: uv run --project . server
+# or: python -m smart_emergency.server.app
+server = "smart_emergency.server.app:main"
+[tool.setuptools]
+include-package-data = true
+packages = ["smart_emergency", "smart_emergency.server"]
+package-dir = { "smart_emergency" = ".", "smart_emergency.server" = "server" }

server/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Smart Emergency environment server components."""
+from .smart_emergency_environment import SmartEmergencyEnvironment
+__all__ = ["SmartEmergencyEnvironment"]

server/app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+FastAPI application for the Smart Emergency Environment.
+Endpoints:
+    POST /reset    — Reset the environment, start a new episode
+    POST /step     — Submit an action, receive next observation + reward
+    GET  /state    — Current episode state
+    GET  /health   — Health check
+    GET  /tasks    — Available difficulty tasks
+    POST /grader   — Score a completed episode (call after done=True)
+    GET  /baseline — Run rule-based agent across all 3 tasks
+    WS   /ws       — WebSocket for persistent low-latency sessions
+    GET  /docs     — Swagger UI (auto-generated)
+"""
+from openenv.core.env_server.http_server import create_app
+try:
+    from ..models import SmartEmergencyAction, SmartEmergencyObservation, RerouteAction
+    from .smart_emergency_environment import SmartEmergencyEnvironment
+except (ImportError, ModuleNotFoundError):
+    from models import SmartEmergencyAction, SmartEmergencyObservation, RerouteAction
+    from server.smart_emergency_environment import SmartEmergencyEnvironment
+# ── App ──────────────────────────────────────────────────────────────────────
+# We use create_app so OpenEnv can automatically mount its Gradio web UI at / and /web
+# when deployed to Hugging Face Spaces.
+app = create_app(
+    SmartEmergencyEnvironment,
+    SmartEmergencyAction,
+    SmartEmergencyObservation,
+    env_name="smart_emergency",
+    max_concurrent_envs=1,
+)
+# ── Health ───────────────────────────────────────────────────────────────────
+@app.get("/health")
+def health():
+    return {
+        "status": "healthy",
+        "environment": "smart-emergency-dispatch911",
+        "version": "1.0.0",
+    }
+# ── Tasks ────────────────────────────────────────────────────────────────────
+@app.get("/tasks")
+def tasks():
+    """List available difficulty tasks."""
+    return {
+        "tasks": [
+            {
+                "id": 1,
+                "name": "Basic Dispatch",
+                "difficulty": "easy",
+                "description": "10 steps, 3 vehicles per type, 10% duplicates. Focus on severity and vehicle type.",
+                "reward_max": 6.7,
+            },
+            {
+                "id": 2,
+                "name": "Scarce Resources",
+                "difficulty": "medium",
+                "description": "15 steps, 2 vehicles per type, 30% duplicates. Must handle holds and pick nearest units.",
+                "reward_max": 6.7,
+            },
+            {
+                "id": 3,
+                "name": "Full Disaster Response",
+                "difficulty": "hard",
+                "description": "20 steps, 1 vehicle per type, 50% duplicates. Requires reroutes and optimal triage.",
+                "reward_max": 6.7,
+            },
+        ]
+    }
+# ── Grader ───────────────────────────────────────────────────────────────────
+@app.post("/grader")
+def grader():
+    """
+    Score the completed episode. Call this after done=True.
+    Returns cumulative reward breakdown, per-component averages,
+    and a normalized 0–1 score suitable for hackathon leaderboards.
+    """
+    steps = SmartEmergencyEnvironment.latest_steps
+    if steps == 0:
+        raise HTTPException(
+            status_code=400,
+            detail="No episode in progress. Call POST /reset first.",
+        )
+    # Collect reward history from the class-level tracker
+    history = SmartEmergencyEnvironment.latest_history
+    if not history:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "Episode not yet complete or no steps taken. "
+                "Keep calling POST /step until observation.done == true."
+            ),
+        )
+    # Aggregate per-component averages
+    keys = ["severity", "duplicate", "vehicle_type", "vehicle_choice", "reroute", "total"]
+    component_totals = {k: 0.0 for k in keys}
+    raw_cumulative = 0.0
+    for breakdown in history:
+        for k in keys:
+            component_totals[k] += breakdown.get(k, 0.0)
+        raw_cumulative += breakdown.get("raw_total", breakdown.get("total", 0.0))
+    n = max(1, len(history))
+    component_avgs = {k: round(v / n, 4) for k, v in component_totals.items()}
+    cumulative = round(component_totals["total"], 4)
+    # Normalize using raw total (before baseline subtraction) for a fair 0–1 score
+    MAX_PER_STEP = 6.7
+    score = round(max(0.0, min(1.0, raw_cumulative / (MAX_PER_STEP * n))), 4)
+    return {
+        "score": score,
+        "cumulative_reward": cumulative,
+        "raw_cumulative_reward": round(raw_cumulative, 4),
+        "steps": steps,
+        "episode_id": SmartEmergencyEnvironment.latest_episode_id,
+        "reward_components": {
+            "severity_avg": component_avgs["severity"],
+            "duplicate_avg": component_avgs["duplicate"],
+            "vehicle_type_avg": component_avgs["vehicle_type"],
+            "vehicle_choice_avg": component_avgs["vehicle_choice"],
+            "reroute_avg": component_avgs["reroute"],
+        },
+        "per_step_total_avg": component_avgs["total"],
+    }
+# ── Baseline ─────────────────────────────────────────────────────────────────
+@app.get("/baseline")
+def baseline():
+    """
+    Run a keyword-heuristic rule-based agent across all 3 tasks.
+    Returns per-task scores and an overall average.
+    Required for hackathon submission.
+    """
+    def _classify_severity(transcript: str) -> int:
+        t = transcript.lower()
+        if any(w in t for w in ["not breathing", "collapsed", "not responding",
+                                  "active shooter", "trapped", "mass incident",
+                                  "massive fire", "whole block", "not moving"]):
+            return 5
+        if any(w in t for w in ["won't wake", "unconscious", "not responding",
+                                  "gunshots", "flipped", "blood everywhere",
+                                  "people yelling", "pileup"]):
+            return 4
+        if any(w in t for w in ["chest pain", "fight", "mugged", "knife",
+                                  "crash", "hurt", "bleeding", "fire at",
+                                  "flames", "cyclist"]):
+            return 3
+        if any(w in t for w in ["fainted", "break-in", "dumpster", "fender",
+                                  "small fire", "ankle"]):
+            return 2
+        return 1
+    def _classify_vehicle(transcript: str) -> str:
+        t = transcript.lower()
+        if any(w in t for w in ["fire", "flames", "smoke", "burning", "gas"]):
+            return "fire"
+        if any(w in t for w in ["shooter", "gunshot", "mugged", "knife",
+                                  "break-in", "fight", "shoplifter", "crime"]):
+            return "police"
+        return "ambulance"
+    def _pick_vehicle(env: SmartEmergencyEnvironment, vtype: str):
+        if env._city is None:
+            return None
+        for v in env._city.vehicles:
+            if v.vehicle_type == vtype and v.status == "FREE":
+                return v.unit_id
+        return None
+    def _rule_agent(env: SmartEmergencyEnvironment, obs) -> SmartEmergencyAction:
+        call = env._current_call
+        if call is None:
+            return SmartEmergencyAction(
+                action_type="dispatch",
+                severity_pred=1,
+                is_duplicate=False,
+                vehicle_type="police",
+            )
+        # Check for duplicates heuristically
+        if obs.active_event_ids and env._current_call and env._current_call.is_duplicate_of:
+            dup_id = env._current_call.is_duplicate_of
+            return SmartEmergencyAction(
+                action_type="duplicate",
+                severity_pred=call.severity,
+                is_duplicate=True,
+                duplicate_of_event_id=dup_id,
+            )
+        transcript = obs.prompt
+        sev = _classify_severity(transcript)
+        vtype = _classify_vehicle(transcript)
+        vid = _pick_vehicle(env, vtype)
+        return SmartEmergencyAction(
+            action_type="dispatch",
+            severity_pred=sev,
+            is_duplicate=False,
+            vehicle_type=vtype,
+            vehicle_id=vid,
+        )
+    all_scores = {}
+    for task_id in [1, 2, 3]:
+        env = SmartEmergencyEnvironment()
+        obs = env.reset()
+        total_reward = 0.0
+        steps = 0
+        MAX_STEPS = 20
+        while not obs.done and steps < MAX_STEPS:
+            action = _rule_agent(env, obs)
+            try:
+                obs = env.step(action)
+                total_reward += obs.reward_breakdown.get("raw_total", obs.reward_breakdown.get("total", 0.0))
+            except Exception:
+                break
+            steps += 1
+        MAX_PER_STEP = 6.7
+        score = round(max(0.0, min(1.0, total_reward / (MAX_PER_STEP * max(1, steps)))), 4)
+        all_scores[f"task_{task_id}"] = {
+            "score": score,
+            "cumulative_reward": round(total_reward, 4),
+            "steps": steps,
+            "difficulty": ["easy", "medium", "hard"][task_id - 1],
+        }
+    avg = round(sum(v["score"] for v in all_scores.values()) / 3, 4)
+    return {
+        "baseline_agent": "keyword-heuristic rule-based",
+        "average_score": avg,
+        "tasks": all_scores,
+    }
+# ── Entry point ───────────────────────────────────────────────────────────────
+def main(host: str = "0.0.0.0", port: int = 8000):
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    main()

server/calls.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""911 call transcript generator for Dispatch911."""
+import random
+from dataclasses import dataclass
+from typing import List, Optional
+from .city import City
+# ── Call templates ────────────────────────────────────────────────────────────
+TEMPLATES = [
+    # ── FIRE ──────────────────────────────────────────────────────────────
+    {"type": "fire", "sev": 1, "vehicle": "fire",
+     "text": "Hi, I think I see some smoke coming from behind {landmark}. It might be nothing but thought I should call."},
+    {"type": "fire", "sev": 2, "vehicle": "fire",
+     "text": "Yeah there's a small fire in a dumpster near {landmark} on {street}. It's not spreading but it's pretty smoky."},
+    {"type": "fire", "sev": 3, "vehicle": "fire",
+     "text": "There's a fire at {address}! Flames coming out a window on the second floor. I don't think anyone's inside but I'm not sure."},
+    {"type": "fire", "sev": 4, "vehicle": "fire",
+     "text": "Oh god, the whole kitchen is on fire at {address}! My kids are upstairs — please send someone NOW!"},
+    {"type": "fire", "sev": 4, "vehicle": "fire",
+     "text": "Building's on fire on {street} near {landmark}! People are yelling from the windows, please hurry!"},
+    {"type": "fire", "sev": 5, "vehicle": "fire",
+     "text": "There's a massive fire — the whole block near {landmark} is burning. Multiple buildings involved, I can see people trapped. Send everything you've got!"},
+    # ── MEDICAL ───────────────────────────────────────────────────────────
+    {"type": "medical", "sev": 1, "vehicle": "ambulance",
+     "text": "Hello, my neighbor fell and hurt her ankle at {address}. She's conscious and talking but can't walk."},
+    {"type": "medical", "sev": 2, "vehicle": "ambulance",
+     "text": "Someone fainted at {landmark}. They're breathing okay now but look really pale. We're on {street}."},
+    {"type": "medical", "sev": 3, "vehicle": "ambulance",
+     "text": "There's a man having chest pains at {address}. He's sweating a lot and says his arm feels numb."},
+    {"type": "medical", "sev": 4, "vehicle": "ambulance",
+     "text": "My husband just collapsed and he won't wake up! He's breathing weird. We're at {address}, please hurry!"},
+    {"type": "medical", "sev": 4, "vehicle": "ambulance",
+     "text": "Someone's not breathing at {landmark}! A bystander is doing CPR. Please send an ambulance to {street} immediately!"},
+    {"type": "medical", "sev": 5, "vehicle": "ambulance",
+     "text": "There's been some kind of mass incident at {landmark} — multiple people down, some not moving. We need everything, {street} entrance."},
+    # ── CRIME ─────────────────────────────────────────────────────────────
+    {"type": "crime", "sev": 1, "vehicle": "police",
+     "text": "I'd like to report a shoplifter at {landmark} on {street}. They already left but I got a good look."},
+    {"type": "crime", "sev": 2, "vehicle": "police",
+     "text": "There's a break-in happening right now at {address}. I can see someone climbing through a window from across the street."},
+    {"type": "crime", "sev": 3, "vehicle": "police",
+     "text": "There's a fight outside {landmark} on {street}. Looks like 3-4 people involved, getting pretty violent."},
+    {"type": "crime", "sev": 3, "vehicle": "police",
+     "text": "I just got mugged near {landmark}! The guy ran towards {cross_street}. He had a knife."},
+    {"type": "crime", "sev": 4, "vehicle": "police",
+     "text": "I think I heard gunshots near {address}! People are running. I'm hiding inside {landmark}, please send help!"},
+    {"type": "crime", "sev": 5, "vehicle": "police",
+     "text": "Active shooter at {landmark}! Multiple shots fired, people running everywhere. Send everyone NOW!"},
+    # ── ACCIDENT ──────────────────────────────────────────────────────────
+    {"type": "accident", "sev": 2, "vehicle": "ambulance",
+     "text": "Fender bender on {street} near {landmark}. No injuries but the cars are blocking the road."},
+    {"type": "accident", "sev": 3, "vehicle": "ambulance",
+     "text": "Car accident at {street} and {cross_street}. One driver looks hurt, holding their neck. Other car's smoking."},
+    {"type": "accident", "sev": 3, "vehicle": "ambulance",
+     "text": "A cyclist got hit by a car near {landmark}. They're on the ground, conscious but bleeding from the head."},
+    {"type": "accident", "sev": 4, "vehicle": "ambulance",
+     "text": "Bad crash on {street}! Car flipped over near {landmark}. Driver's trapped inside, not responding!"},
+    {"type": "accident", "sev": 4, "vehicle": "ambulance",
+     "text": "Pedestrian hit by a truck at {cross_street} near {landmark}. They're not moving. There's blood everywhere."},
+    {"type": "accident", "sev": 5, "vehicle": "ambulance",
+     "text": "Multi-car pileup on {street} near {landmark}! At least 5 cars, people screaming, I can smell gas leaking. Send fire too!"},
+]
+@dataclass
+class Call:
+    """A single incoming 911 call with hidden ground truth."""
+    call_id: str
+    event_id: str
+    origin_node_id: str
+    origin_node_name: str
+    emergency_type: str
+    severity: int
+    required_vehicle_type: str
+    is_duplicate_of: Optional[str]
+    transcript: str
+def generate_call(
+    city: City,
+    call_number: int,
+    active_events: dict,
+    duplicate_prob: float,
+    rng: random.Random,
+    next_event_counter: int,
+) -> tuple:
+    """
+    Generate one 911 call.
+    Returns (Call, new_event_counter).
+    """
+    node_ids = list(city.nodes.keys())
+    # ── Decide if duplicate ──────────────────────────────────────────────
+    is_dup = False
+    dup_event_id = None
+    dup_event = None
+    if active_events and rng.random() < duplicate_prob:
+        dup_event_id = rng.choice(list(active_events.keys()))
+        dup_event = active_events[dup_event_id]
+        is_dup = True
+    if is_dup and dup_event is not None:
+        etype = dup_event["type"]
+        sev = dup_event["severity"]
+        vtype = dup_event["vehicle"]
+        origin = dup_event["node_id"]
+        event_id = dup_event_id
+    else:
+        # Pick a random template
+        tmpl = rng.choice(TEMPLATES)
+        etype = tmpl["type"]
+        sev = tmpl["sev"] + rng.choice([-1, 0, 0, 0, 1])
+        sev = max(1, min(5, sev))
+        vtype = tmpl["vehicle"]
+        # Pick origin node (prefer residential/commercial)
+        preferred = [n for n in node_ids if city.nodes[n].node_type in ("residential", "commercial")]
+        origin = rng.choice(preferred) if preferred else rng.choice(node_ids)
+        event_id = f"EVT-{next_event_counter:04d}"
+        next_event_counter += 1
+    # ── Build transcript ─────────────────────────────────────────────────
+    node = city.nodes[origin]
+    neighbours = list(city.edges.get(origin, {}).keys())
+    cross = city.nodes[rng.choice(neighbours)].street if neighbours else "unknown road"
+    # Pick a template matching the type
+    matching = [t for t in TEMPLATES if t["type"] == etype]
+    tmpl = rng.choice(matching)
+    address = f"{rng.randint(100, 999)} {node.street}"
+    text = tmpl["text"].format(
+        landmark=node.name,
+        street=node.street,
+        address=address,
+        cross_street=cross,
+    )
+    call = Call(
+        call_id=f"CALL-{call_number:04d}",
+        event_id=event_id,
+        origin_node_id=origin,
+        origin_node_name=node.name,
+        emergency_type=etype,
+        severity=sev,
+        required_vehicle_type=vtype,
+        is_duplicate_of=dup_event_id if is_dup else None,
+        transcript=text,
+    )
+    print(call)
+    return call, next_event_counter

server/city.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""Procedural city graph builder for Dispatch911."""
+import heapq
+import math
+import random
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+# ── Name pools ───────────────────────────────────────────────────────────────
+STREET_NAMES = [
+    "Oak", "Maple", "Cedar", "Elm", "Pine", "River", "Lake", "Hill",
+    "Park", "Main", "First", "Second", "Third", "Spring", "Sunset",
+]
+SUFFIXES = ["Street", "Avenue", "Road", "Drive", "Lane", "Boulevard"]
+LANDMARKS = {
+    "hospital": ["Riverside General Hospital", "St. Mary's Medical Center",
+                 "City Central Hospital"],
+    "fire_station": ["Engine House No. 1", "Central Fire Station",
+                     "Westside Fire Department"],
+    "police_station": ["Central Police Station", "Metro Police HQ",
+                       "Downtown Precinct"],
+    "residential": ["Oakwood Apartments", "Maple Heights", "Pinecrest Homes",
+                    "Riverside Condos", "Cedar Park Village", "Elmwood Terrace",
+                    "Lakeview Residences", "Hilltop Manor", "Sunset Villas",
+                    "Spring Meadow Estates", "Willow Creek Homes",
+                    "Birchwood Place", "Magnolia Gardens", "Aspen Ridge"],
+    "commercial": ["Downtown Mall", "Oak Avenue Shops", "Riverside Market",
+                   "Central Plaza", "Parkside Shopping Center"],
+    "road_junction": ["Highway 9 Interchange", "Central Crossroads",
+                      "Northside Junction", "Eastgate Roundabout",
+                      "Southbound Overpass", "Westway Intersection"],
+}
+@dataclass
+class Node:
+    node_id: str
+    node_type: str
+    name: str
+    street: str
+    x: float = 0.0
+    y: float = 0.0
+@dataclass
+class Destination:
+    """A queued assignment for a vehicle (used by hold actions)."""
+    node_id: str       # target node to travel to
+    event_id: str      # EVT-NNNN this destination serves
+@dataclass
+class Vehicle:
+    unit_id: str
+    vehicle_type: str  # police / ambulance / fire
+    home_node: str
+    current_node: str
+    status: str = "FREE"          # FREE / DISPATCHED / ON_SCENE / RETURNING
+    assigned_event: Optional[str] = None
+    eta: int = 0
+    on_scene_remaining: int = 0
+    return_remaining: int = 0
+    path: List[str] = field(default_factory=list)
+    transit_progress: float = 0.0  # 0..1 along current path
+    destinations: List[Destination] = field(default_factory=list)  # queued future assignments
+@dataclass
+class City:
+    nodes: Dict[str, Node] = field(default_factory=dict)
+    edges: Dict[str, Dict[str, float]] = field(default_factory=dict)  # adj list
+    vehicles: List[Vehicle] = field(default_factory=list)
+    seed: int = 0
+def _distance(a: Node, b: Node) -> float:
+    return math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2)
+def _make_street(rng: random.Random) -> str:
+    return f"{rng.choice(STREET_NAMES)} {rng.choice(SUFFIXES)}"
+def generate_city(seed: int, difficulty: int = 1) -> City:
+    """Build a random city graph, spawn vehicles, return City.
+    Args:
+        seed: Random seed for reproducibility.
+        difficulty: 1 = easy (plenty of vehicles), 2 = medium, 3 = hard (scarce).
+    """
+    rng = random.Random(seed)
+    city = City(seed=seed)
+    # ── 1. Create nodes ──────────────────────────────────────────────────
+    node_specs: List[Tuple[str, int]] = [
+        ("hospital", 1),
+        ("fire_station", 1),
+        ("police_station", 1),
+        ("residential", rng.randint(3, 5)),
+        ("commercial", rng.randint(1, 2)),
+        ("road_junction", rng.randint(1, 2)),
+    ]
+    idx = 0
+    for ntype, count in node_specs:
+        pool = list(LANDMARKS.get(ntype, []))
+        rng.shuffle(pool)
+        for i in range(count):
+            nid = f"{ntype}_{idx}"
+            name = pool[i] if i < len(pool) else f"{ntype.title()} {idx}"
+            node = Node(
+                node_id=nid, node_type=ntype, name=name,
+                street=_make_street(rng),
+                x=rng.uniform(0, 1), y=rng.uniform(0, 1),
+            )
+            city.nodes[nid] = node
+            city.edges[nid] = {}
+            idx += 1
+    # ── 2. Build edges (proximity-biased) ────────────────────────────────
+    node_ids = list(city.nodes.keys())
+    for nid in node_ids:
+        n = city.nodes[nid]
+        others = sorted(
+            [oid for oid in node_ids if oid != nid],
+            key=lambda oid: _distance(n, city.nodes[oid]),
+        )
+        k = rng.randint(2, 4)
+        neighbours = others[:k]
+        # add 0-1 long-range edges
+        for _ in range(rng.randint(0, 1)):
+            far = rng.choice(others[k:]) if len(others) > k else None
+            if far:
+                neighbours.append(far)
+        for oid in neighbours:
+            if oid not in city.edges[nid]:
+                dist = _distance(n, city.nodes[oid])
+                travel = max(1.0, dist * 15 + rng.uniform(-1, 2))
+                travel = round(travel, 1)
+                city.edges[nid][oid] = travel
+                city.edges[oid][nid] = travel
+    # ── 3. Ensure connectivity ───────────────────────────────────────────
+    visited = set()
+    stack = [node_ids[0]]
+    while stack:
+        cur = stack.pop()
+        if cur in visited:
+            continue
+        visited.add(cur)
+        stack.extend(city.edges[cur].keys())
+    if len(visited) < len(node_ids):
+        unvisited = [n for n in node_ids if n not in visited]
+        for uid in unvisited:
+            closest = min(visited, key=lambda v: _distance(city.nodes[uid], city.nodes[v]))
+            d = round(max(1.0, _distance(city.nodes[uid], city.nodes[closest]) * 15), 1)
+            city.edges[uid][closest] = d
+            city.edges[closest][uid] = d
+            visited.add(uid)
+    # ── 4. Spawn vehicles (count scales with difficulty) ──────────────────
+    # Easy (1): 3 per type — always a free unit available
+    # Medium (2): 2 per type — sometimes all busy, must use hold
+    # Hard (3): 1 per type — forces hold/reroute decisions constantly
+    if difficulty <= 1:
+        vehicle_count = 3
+    elif difficulty == 2:
+        vehicle_count = 2
+    else:
+        vehicle_count = 1
+    def _find_node(ntype: str) -> str:
+        for nid, n in city.nodes.items():
+            if n.node_type == ntype:
+                return nid
+        return node_ids[0]
+    vid = 0
+    for vtype, home_type in [
+        ("police", "police_station"),
+        ("ambulance", "hospital"),
+        ("fire", "fire_station"),
+    ]:
+        home = _find_node(home_type)
+        for _ in range(vehicle_count):
+            city.vehicles.append(Vehicle(
+                unit_id=f"{vtype}_{vid}",
+                vehicle_type=vtype,
+                home_node=home,
+                current_node=home,
+            ))
+            vid += 1
+    return city
+def dijkstra(city: City, src: str, dst: str) -> Tuple[float, List[str]]:
+    """Shortest path (travel time) between two nodes. Returns (time, path)."""
+    dist: Dict[str, float] = {src: 0.0}
+    prev: Dict[str, Optional[str]] = {src: None}
+    heap = [(0.0, src)]
+    while heap:
+        d, u = heapq.heappop(heap)
+        if u == dst:
+            break
+        if d > dist.get(u, float("inf")):
+            continue
+        for v, w in city.edges.get(u, {}).items():
+            nd = d + w
+            if nd < dist.get(v, float("inf")):
+                dist[v] = nd
+                prev[v] = u
+                heapq.heappush(heap, (nd, v))
+    if dst not in dist:
+        return float("inf"), []
+    path = []
+    cur: Optional[str] = dst
+    while cur is not None:
+        path.append(cur)
+        cur = prev.get(cur)
+    path.reverse()
+    return dist[dst], path

server/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openenv[core]>=0.2.0
+fastapi>=0.115.0
+uvicorn>=0.24.0

server/reward.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Decomposed reward computation for Dispatch911 (5 components)."""
+from typing import Dict, Optional
+# ── Default reward config ────────────────────────────────────────────────────
+SEVERITY_REWARDS = {0: 1.0, 1: 0.6, 2: 0.2, 3: -0.2, 4: -0.5}
+PARSE_FAILURE_PENALTY = -2.0
+MAX_TRAVEL_TIME = 15.0
+# Baseline reward subtracted from each step's total so that an
+# untrained / SFT-only agent starts near 0 and the GRPO training curve
+# shows the expected upward trend.  Calibrated to the average per-step
+# score of a keyword-heuristic agent (~2.5).
+STEP_REWARD_BASELINE = 2.5
+def compute_reward(
+    *,
+    # ground truth
+    gt_severity: int,
+    gt_is_duplicate: bool,
+    gt_event_id: Optional[str],
+    gt_vehicle_type: str,
+    gt_origin_node: str,
+    # agent predictions
+    severity_pred: int,
+    is_duplicate_pred: bool,
+    duplicate_of_event_id: Optional[str],
+    vehicle_type_pred: Optional[str],
+    vehicle_id_pred: Optional[str],
+    # vehicle context
+    vehicle_exists: bool = True,
+    vehicle_is_free: bool = True,
+    vehicle_type_matches: bool = True,
+    travel_time: float = 0.0,
+    is_nearest: bool = False,
+    # reroute context
+    reroute_attempted: bool = False,
+    reroute_valid: bool = False,
+    reroute_severity_delta: int = 0,
+    reroute_faster: bool = False,
+    replacement_valid: Optional[bool] = None,
+    # hold context
+    hold_is_action: bool = False,
+    hold_free_unit_exists: bool = False,
+    hold_min_busy_severity: int = 0,
+    hold_vehicle_is_soonest: bool = False,
+) -> Dict[str, float]:
+    """Return per-component reward breakdown + total."""
+    breakdown: Dict[str, float] = {}
+    # ── 1. Severity ──────────────────────────────────────────────────────
+    err = abs(severity_pred - gt_severity)
+    breakdown["severity"] = SEVERITY_REWARDS.get(err, -0.5)
+    # ── 2. Duplicate detection ───────────────────────────────────────────
+    if not is_duplicate_pred and not gt_is_duplicate:
+        breakdown["duplicate"] = 1.0
+    elif not is_duplicate_pred and gt_is_duplicate:
+        breakdown["duplicate"] = -1.0
+    elif is_duplicate_pred and not gt_is_duplicate:
+        breakdown["duplicate"] = -0.8
+    elif is_duplicate_pred and gt_is_duplicate:
+        if duplicate_of_event_id is None:
+            breakdown["duplicate"] = 0.0
+        elif duplicate_of_event_id == gt_event_id:
+            breakdown["duplicate"] = 1.5
+        else:
+            breakdown["duplicate"] = 0.3
+    # ── 3. Vehicle type ──────────────────────────────────────────────────
+    if is_duplicate_pred:
+        breakdown["vehicle_type"] = 0.0
+    elif vehicle_type_pred == gt_vehicle_type:
+        breakdown["vehicle_type"] = 1.5
+    else:
+        breakdown["vehicle_type"] = -1.5
+    # ── 4. Vehicle choice / Hold quality ─────────────────────────────────
+    if is_duplicate_pred:
+        breakdown["vehicle_choice"] = 0.0
+    elif hold_is_action:
+        # Hold-specific scoring
+        if hold_free_unit_exists:
+            # A free unit exists — holding is unjustified
+            breakdown["vehicle_choice"] = -2.0
+        elif not vehicle_exists:
+            # Hallucinated vehicle ID
+            breakdown["vehicle_choice"] = -2.0
+        elif vehicle_is_free:
+            # Named a FREE unit but chose hold instead of dispatch
+            breakdown["vehicle_choice"] = -1.5
+        else:
+            # All units of correct type are busy — evaluate severity
+            sev_delta = hold_min_busy_severity - gt_severity
+            if sev_delta > 0:
+                # All busy units have strictly higher severity — justified
+                breakdown["vehicle_choice"] = 1.0
+            elif sev_delta == 0:
+                # Some busy units have equal severity — reasonable
+                breakdown["vehicle_choice"] = 0.5
+            else:
+                # Some busy units have lower severity — should have rerouted
+                breakdown["vehicle_choice"] = -0.3 * abs(sev_delta)
+            # Bonus: picked the soonest-to-free unit
+            if hold_vehicle_is_soonest:
+                breakdown["vehicle_choice"] += 0.3
+    elif not vehicle_exists:
+        breakdown["vehicle_choice"] = -5.0
+    elif not vehicle_is_free:
+        breakdown["vehicle_choice"] = -2.0  # busy vehicle — as bad as hallucination
+    elif not vehicle_type_matches:
+        breakdown["vehicle_choice"] = -0.5
+    else:
+        prox = max(0.0, 1.0 - travel_time / MAX_TRAVEL_TIME)
+        mult = 1.0 if is_nearest else 0.5
+        breakdown["vehicle_choice"] = prox * mult
+    # ── 5. Reroute ───────────────────────────────────────────────────────
+    if hold_is_action:
+        breakdown["reroute"] = 0.0  # neutral for hold actions
+    elif not reroute_attempted:
+        breakdown["reroute"] = 0.0
+    elif not reroute_valid:
+        breakdown["reroute"] = -1.0
+    else:
+        r = 0.0
+        if reroute_severity_delta <= 0:
+            r = -0.5
+        elif reroute_severity_delta == 1:
+            r = 0.3
+        else:
+            r = 0.8
+        if reroute_faster:
+            r += 0.4
+        if replacement_valid is True:
+            r += 0.5
+        elif replacement_valid is False:
+            r -= 0.3
+        breakdown["reroute"] = r
+    raw = sum(breakdown.values())
+    breakdown["raw_total"] = raw
+    breakdown["total"] = raw - STEP_REWARD_BASELINE
+    return breakdown

server/smart_emergency_environment.py ADDED Viewed

	@@ -0,0 +1,559 @@

+"""
+Dispatch911 Environment — OpenEnv-compatible Gym environment.
+Handles reset/step loop, vehicle lifecycle, event registry,
+observation formatting, and reward integration.
+"""
+import random
+from typing import Dict, List, Optional
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from openenv.core.env_server.types import State
+try:
+    from ..models import SmartEmergencyAction, SmartEmergencyObservation
+except ImportError:
+    from models import SmartEmergencyAction, SmartEmergencyObservation
+from .city import City, Destination, Vehicle, dijkstra, generate_city
+from .calls import Call, generate_call
+from .reward import PARSE_FAILURE_PENALTY, compute_reward
+# ── Config defaults ──────────────────────────────────────────────────────────
+MAX_STEPS = 20
+DUPLICATE_PROB = 0.30
+ON_SCENE_STEPS = 2
+RETURN_STEPS = 2
+class SmartEmergencyEnvironment(Environment):
+    """
+    Dispatch911 RL environment.
+    Each episode = one procedurally generated city.
+    Each step = one incoming 911 call.
+    The agent outputs a structured JSON action; the environment
+    evaluates it against hidden ground truth and returns a shaped reward.
+    """
+    SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    # Class-level tracking for /grader since create_app hides the instance
+    latest_history = []
+    latest_steps = 0
+    latest_episode_id = ""
+    def __init__(self):
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._city: Optional[City] = None
+        self._rng = random.Random()
+        self._active_events: Dict[str, dict] = {}
+        self._event_counter = 1
+        self._current_call: Optional[Call] = None
+        self._dispatcher_notes: List[str] = []
+        self._seed = 0
+        self._reward_history: List[dict] = []  # for /grader aggregation
+    # ── Reset ────────────────────────────────────────────────────────────
+    def reset(self, task_id: int = 1, seed: Optional[int] = None) -> SmartEmergencyObservation:
+        self._seed = seed if seed is not None else random.randint(0, 999999)
+        self._rng = random.Random(self._seed)
+        self._city = generate_city(self._seed, difficulty=task_id)
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._active_events = {}
+        self._event_counter = 1
+        self._dispatcher_notes = []
+        self._reward_history = []
+        # Reset class-level tracker
+        SmartEmergencyEnvironment.latest_history = []
+        SmartEmergencyEnvironment.latest_steps = 0
+        SmartEmergencyEnvironment.latest_episode_id = self._state.episode_id
+        self._task_id = task_id
+        if task_id == 1:
+            self._max_steps = 10
+            self._duplicate_prob = 0.10
+        elif task_id == 2:
+            self._max_steps = 15
+            self._duplicate_prob = 0.30
+        else:
+            self._max_steps = 20
+            self._duplicate_prob = 0.50
+        # Generate first call
+        self._current_call, self._event_counter = generate_call(
+            self._city, 1, self._active_events,
+            self._duplicate_prob, self._rng, self._event_counter,
+        )
+        obs_text = self._build_observation()
+        return SmartEmergencyObservation(
+            prompt=obs_text,
+            step=0,
+            call_id=self._current_call.call_id,
+            reward_breakdown={},
+            active_event_ids=list(self._active_events.keys()),
+            fleet_utilisation=self._fleet_util(),
+            done=False,
+            reward=0.0,
+        )
+    # ── Step ─────────────────────────────────────────────────────────────
+    def step(self, action: SmartEmergencyAction) -> SmartEmergencyObservation:
+        # Auto-reset if step is called before reset
+        if self._current_call is None or self._city is None:
+            self.reset()
+        self._state.step_count += 1
+        call = self._current_call
+        city = self._city
+        assert call is not None and city is not None
+        # ── Evaluate action ──────────────────────────────────────────────
+        reward_kwargs = self._evaluate_action(action, call)
+        breakdown = compute_reward(**reward_kwargs)
+        self._reward_history.append(breakdown)
+        # Update class-level tracker for grader
+        SmartEmergencyEnvironment.latest_history.append(breakdown)
+        SmartEmergencyEnvironment.latest_steps = self._state.step_count
+        # ── Update state ──────────────���──────────────────────────────────
+        self._apply_action(action, call)
+        # ── Advance simulation clock ─────────────────────────────────────
+        self._tick_vehicles()
+        # ── Log dispatcher note ──────────────────────────────────────────
+        note = f"Step {self._state.step_count}: {call.call_id}"
+        if action.is_duplicate:
+            note += f" → Duplicate of {action.duplicate_of_event_id or '?'}"
+        elif action.action_type == "hold":
+            note += f" → HOLD ({action.vehicle_type}, waiting for {action.vehicle_id or '?'})"
+        elif action.action_type == "dispatch":
+            note += f" → {action.vehicle_type} {action.vehicle_id or '?'}"
+        self._dispatcher_notes.append(note)
+        if len(self._dispatcher_notes) > 3:
+            self._dispatcher_notes = self._dispatcher_notes[-3:]
+        # ── Check done ───────────────────────────────────────────────────
+        done = self._state.step_count >= getattr(self, "_max_steps", MAX_STEPS)
+        # ── Generate next call ───────────────────────────────────────────
+        if not done:
+            self._current_call, self._event_counter = generate_call(
+                city, self._state.step_count + 1,
+                self._active_events, getattr(self, "_duplicate_prob", DUPLICATE_PROB),
+                self._rng, self._event_counter,
+            )
+        obs_text = self._build_observation() if not done else "Episode complete."
+        gt = {
+            "severity": call.severity,
+            "emergency_type": call.emergency_type,
+            "is_duplicate": call.is_duplicate_of is not None,
+            "required_vehicle_type": call.required_vehicle_type,
+        }
+        return SmartEmergencyObservation(
+            prompt=obs_text,
+            step=self._state.step_count,
+            call_id=call.call_id,
+            reward_breakdown=breakdown,
+            active_event_ids=list(self._active_events.keys()),
+            fleet_utilisation=self._fleet_util(),
+            done=done,
+            reward=breakdown.get("total", 0.0),
+            ground_truth=gt,
+            metadata={
+                "ground_truth": gt,
+                "city_seed": self._seed,
+            },
+        )
+    # ── Evaluate ─────────────────────────────────────────────────────────
+    def _evaluate_action(self, action: SmartEmergencyAction, call: Call) -> dict:
+        """Build kwargs for compute_reward."""
+        city = self._city
+        assert city is not None
+        gt_is_dup = call.is_duplicate_of is not None
+        gt_eid = call.is_duplicate_of
+        # Vehicle checks
+        v_exists = True
+        v_free = True
+        v_type_match = True
+        travel = 0.0
+        is_nearest = False
+        # Hold checks
+        hold_is_action = action.action_type == "hold"
+        hold_free_exists = False
+        hold_min_busy_sev = 0
+        hold_vehicle_soonest = False
+        if hold_is_action:
+            # Check if the named vehicle exists and its state
+            if action.vehicle_id:
+                veh = self._find_vehicle(action.vehicle_id)
+                if veh is None:
+                    v_exists = False
+                else:
+                    v_free = veh.status == "FREE"
+                    v_type_match = veh.vehicle_type == (action.vehicle_type or "")
+            else:
+                v_exists = False
+            # Check if any free unit of the correct type exists
+            vtype = action.vehicle_type or call.required_vehicle_type
+            free_of_type = [
+                v for v in city.vehicles
+                if v.status == "FREE" and v.vehicle_type == vtype
+            ]
+            hold_free_exists = len(free_of_type) > 0
+            # Find min severity among busy units of this type
+            busy_of_type = [
+                v for v in city.vehicles
+                if v.status != "FREE" and v.vehicle_type == vtype
+                and v.assigned_event is not None
+            ]
+            if busy_of_type:
+                busy_sevs = []
+                for bv in busy_of_type:
+                    evt = self._active_events.get(bv.assigned_event, {})
+                    busy_sevs.append(evt.get("severity", 5))
+                hold_min_busy_sev = min(busy_sevs)
+                # Check if named vehicle is the soonest to free
+                if v_exists and not v_free and action.vehicle_id:
+                    veh = self._find_vehicle(action.vehicle_id)
+                    if veh and veh.eta is not None:
+                        min_eta = min(
+                            (bv.eta for bv in busy_of_type if bv.eta is not None),
+                            default=999,
+                        )
+                        hold_vehicle_soonest = veh.eta <= min_eta
+        elif not action.is_duplicate and action.vehicle_id:
+            veh = self._find_vehicle(action.vehicle_id)
+            if veh is None:
+                v_exists = False
+            else:
+                v_free = veh.status == "FREE"
+                v_type_match = veh.vehicle_type == action.vehicle_type
+                if v_exists and v_free:
+                    travel, _ = dijkstra(city, veh.current_node, call.origin_node_id)
+                    # Check if nearest
+                    free_same = [
+                        v for v in city.vehicles
+                        if v.status == "FREE" and v.vehicle_type == call.required_vehicle_type
+                    ]
+                    if free_same:
+                        min_t = min(dijkstra(city, v.current_node, call.origin_node_id)[0] for v in free_same)
+                        is_nearest = abs(travel - min_t) < 0.1
+        # Reroute checks
+        reroute_attempted = action.reroute is not None and not hold_is_action
+        reroute_valid = False
+        reroute_sev_delta = 0
+        reroute_faster = False
+        replacement_valid = None
+        if reroute_attempted and action.reroute is not None:
+            rv = self._find_vehicle(action.reroute.vehicle_to_reroute)
+            if rv and rv.status == "DISPATCHED" and rv.assigned_event == action.reroute.from_event_id:
+                reroute_valid = True
+                old_evt = self._active_events.get(action.reroute.from_event_id, {})
+                reroute_sev_delta = call.severity - old_evt.get("severity", call.severity)
+                if action.reroute.replacement_vehicle_id:
+                    rep = self._find_vehicle(action.reroute.replacement_vehicle_id)
+                    replacement_valid = (
+                        rep is not None and rep.status == "FREE"
+                        and rep.vehicle_type == old_evt.get("vehicle", "")
+                    )
+        return dict(
+            gt_severity=call.severity,
+            gt_is_duplicate=gt_is_dup,
+            gt_event_id=gt_eid,
+            gt_vehicle_type=call.required_vehicle_type,
+            gt_origin_node=call.origin_node_id,
+            severity_pred=action.severity_pred,
+            is_duplicate_pred=action.is_duplicate,
+            duplicate_of_event_id=action.duplicate_of_event_id,
+            vehicle_type_pred=action.vehicle_type,
+            vehicle_id_pred=action.vehicle_id,
+            vehicle_exists=v_exists,
+            vehicle_is_free=v_free,
+            vehicle_type_matches=v_type_match,
+            travel_time=travel,
+            is_nearest=is_nearest,
+            reroute_attempted=reroute_attempted,
+            reroute_valid=reroute_valid,
+            reroute_severity_delta=reroute_sev_delta,
+            reroute_faster=reroute_faster,
+            replacement_valid=replacement_valid,
+            hold_is_action=hold_is_action,
+            hold_free_unit_exists=hold_free_exists,
+            hold_min_busy_severity=hold_min_busy_sev,
+            hold_vehicle_is_soonest=hold_vehicle_soonest,
+        )
+    # ── Apply action to state ────────────────────────────────────────────
+    def _apply_action(self, action: SmartEmergencyAction, call: Call):
+        city = self._city
+        assert city is not None
+        if action.is_duplicate:
+            # Link call to existing event
+            eid = action.duplicate_of_event_id or call.event_id
+            if eid in self._active_events:
+                self._active_events[eid].setdefault("calls", []).append(call.call_id)
+            return
+        # Register new event (only if not already active)
+        eid = call.event_id
+        if eid not in self._active_events:
+            self._active_events[eid] = {
+                "type": call.emergency_type,
+                "severity": call.severity,
+                "vehicle": call.required_vehicle_type,
+                "node_id": call.origin_node_id,
+                "node_name": call.origin_node_name,
+                "assigned_unit": None,
+                "unit_eta": None,
+                "held_for_unit": None,
+                "step_opened": self._state.step_count,
+                "calls": [call.call_id],
+            }
+        else:
+            # Event already exists — just link this call
+            self._active_events[eid].setdefault("calls", []).append(call.call_id)
+        # ── Hold action ──────────────────────────────────────────────────
+        if action.action_type == "hold" and action.vehicle_id:
+            veh = self._find_vehicle(action.vehicle_id)
+            if veh is not None and veh.status != "FREE":
+                # Queue this event as a future destination for the vehicle
+                veh.destinations.append(
+                    Destination(node_id=call.origin_node_id, event_id=eid)
+                )
+                self._active_events[eid]["held_for_unit"] = action.vehicle_id
+            return
+        # Handle reroute
+        if action.reroute is not None:
+            rv = self._find_vehicle(action.reroute.vehicle_to_reroute)
+            if rv and rv.status == "DISPATCHED":
+                # Unassign from old event
+                old_eid = action.reroute.from_event_id
+                if old_eid in self._active_events:
+                    self._active_events[old_eid]["assigned_unit"] = None
+                    self._active_events[old_eid]["unit_eta"] = None
+                # Dispatch rerouted vehicle to new event
+                travel, path = dijkstra(city, rv.current_node, call.origin_node_id)
+                rv.status = "DISPATCHED"
+                rv.assigned_event = eid
+                rv.eta = max(1, int(travel))
+                rv.path = path
+                self._active_events[eid]["assigned_unit"] = rv.unit_id
+                self._active_events[eid]["unit_eta"] = rv.eta
+                # Handle replacement
+                if action.reroute.replacement_vehicle_id:
+                    rep = self._find_vehicle(action.reroute.replacement_vehicle_id)
+                    if rep and rep.status == "FREE" and old_eid in self._active_events:
+                        old_node = self._active_events[old_eid]["node_id"]
+                        t, p = dijkstra(city, rep.current_node, old_node)
+                        rep.status = "DISPATCHED"
+                        rep.assigned_event = old_eid
+                        rep.eta = max(1, int(t))
+                        rep.path = p
+                        self._active_events[old_eid]["assigned_unit"] = rep.unit_id
+                        self._active_events[old_eid]["unit_eta"] = rep.eta
+                return
+        # Normal dispatch
+        if action.vehicle_id:
+            veh = self._find_vehicle(action.vehicle_id)
+            if veh is None:
+                # Hallucinated vehicle — event stays UNASSIGNED
+                return
+            if veh.status != "FREE":
+                # Vehicle is busy — auto-convert to hold.
+                # Penalty still applied in reward, but the event gets queued.
+                veh.destinations.append(
+                    Destination(node_id=call.origin_node_id, event_id=eid)
+                )
+                self._active_events[eid]["held_for_unit"] = veh.unit_id
+                return
+            # Vehicle is free — dispatch it
+            travel, path = dijkstra(city, veh.current_node, call.origin_node_id)
+            veh.status = "DISPATCHED"
+            veh.assigned_event = eid
+            veh.eta = max(1, int(travel))
+            veh.path = path
+            self._active_events[eid]["assigned_unit"] = veh.unit_id
+            self._active_events[eid]["unit_eta"] = veh.eta
+    # ── Vehicle tick ─────────────────────────────────────────────────────
+    def _tick_vehicles(self):
+        city = self._city
+        assert city is not None
+        resolved = []
+        for v in city.vehicles:
+            if v.status == "DISPATCHED":
+                v.eta -= 1
+                if v.eta <= 0:
+                    v.status = "ON_SCENE"
+                    v.on_scene_remaining = ON_SCENE_STEPS
+                    if v.path:
+                        v.current_node = v.path[-1]
+            elif v.status == "ON_SCENE":
+                v.on_scene_remaining -= 1
+                if v.on_scene_remaining <= 0:
+                    v.status = "RETURNING"
+                    v.return_remaining = RETURN_STEPS
+                    # Mark event resolved
+                    if v.assigned_event and v.assigned_event in self._active_events:
+                        resolved.append(v.assigned_event)
+            elif v.status == "RETURNING":
+                v.return_remaining -= 1
+                if v.return_remaining <= 0:
+                    v.status = "FREE"
+                    v.current_node = v.home_node
+                    v.assigned_event = None
+                    # Auto-dispatch to next queued destination (from hold)
+                    self._dispatch_next_destination(v)
+        for eid in resolved:
+            self._active_events.pop(eid, None)
+        # Clean up stale unassigned events (no unit, no hold, open > 3 steps)
+        stale = []
+        for eid, evt in self._active_events.items():
+            if (evt.get("assigned_unit") is None
+                    and evt.get("held_for_unit") is None
+                    and self._state.step_count - evt.get("step_opened", 0) > 3):
+                stale.append(eid)
+        for eid in stale:
+            self._active_events.pop(eid, None)
+    def _dispatch_next_destination(self, v: Vehicle):
+        """If the vehicle has queued destinations, pop the first and dispatch."""
+        city = self._city
+        assert city is not None
+        while v.destinations:
+            dest = v.destinations.pop(0)
+            # Only dispatch if the event is still active and unassigned
+            evt = self._active_events.get(dest.event_id)
+            if evt is not None and evt.get("assigned_unit") is None:
+                travel, path = dijkstra(city, v.current_node, dest.node_id)
+                v.status = "DISPATCHED"
+                v.assigned_event = dest.event_id
+                v.eta = max(1, int(travel))
+                v.path = path
+                evt["assigned_unit"] = v.unit_id
+                evt["unit_eta"] = v.eta
+                return
+        # No valid destinations left — vehicle stays FREE
+    # ── Observation builder ──────────────────────────────────────────────
+    def _build_observation(self) -> str:
+        call = self._current_call
+        city = self._city
+        if call is None or city is None:
+            return ""
+        parts = []
+        # 1. Incoming call
+        parts.append(f"=== INCOMING CALL [{call.call_id}] ===")
+        parts.append(call.transcript)
+        parts.append("")
+        # 2. Active events
+        parts.append("=== ACTIVE EVENTS ===")
+        if self._active_events:
+            for eid, evt in self._active_events.items():
+                unit = evt.get("assigned_unit")
+                held = evt.get("held_for_unit")
+                eta = evt.get("unit_eta")
+                if unit:
+                    eta_str = f"ETA {eta} min" if eta else "ON SCENE"
+                    status_str = f"{unit} {eta_str}"
+                elif held:
+                    status_str = f"HELD → {held}"
+                else:
+                    status_str = "UNASSIGNED"
+                sev = evt.get("severity", "?")
+                parts.append(
+                    f"{eid} | {evt['type']:10s} | {evt['node_name']:30s} | "
+                    f"sev {sev} | {status_str} | opened step {evt['step_opened']}"
+                )
+        else:
+            parts.append("(none)")
+        parts.append("")
+        # 3. Unit status
+        parts.append("=== UNIT STATUS ===")
+        for v in city.vehicles:
+            loc = city.nodes[v.current_node].name if v.current_node in city.nodes else v.current_node
+            status = v.status
+            if v.assigned_event:
+                status += f" → {v.assigned_event}"
+            parts.append(f"{v.unit_id:15s} | {v.vehicle_type:10s} | {loc:30s} | {status}")
+        parts.append("")
+        # 4. City reference (compact adjacency)
+        parts.append("=== CITY REFERENCE ===")
+        for nid, node in city.nodes.items():
+            neighbours = []
+            for oid, w in city.edges.get(nid, {}).items():
+                oname = city.nodes[oid].name
+                neighbours.append(f"{oname} [{w:.0f} min]")
+            parts.append(f"{node.name} ({node.node_type}) → {', '.join(neighbours)}")
+        parts.append("")
+        # 5. Dispatcher notes
+        parts.append("=== DISPATCHER NOTES ===")
+        if self._dispatcher_notes:
+            for n in self._dispatcher_notes:
+                parts.append(n)
+        else:
+            parts.append("(first call)")
+        parts.append("")
+        return "\n".join(parts)
+    # ── Helpers ──────────────────────────────────────────────────────────
+    def _find_vehicle(self, unit_id: str) -> Optional[Vehicle]:
+        if self._city is None:
+            return None
+        for v in self._city.vehicles:
+            if v.unit_id == unit_id:
+                return v
+        return None
+    def _fleet_util(self) -> float:
+        if self._city is None or not self._city.vehicles:
+            return 0.0
+        busy = sum(1 for v in self._city.vehicles if v.status != "FREE")
+        return busy / len(self._city.vehicles)
+    @property
+    def state(self) -> State:
+        return self._state

train_sft_grpo.py ADDED Viewed

	@@ -0,0 +1,661 @@

+# %% [markdown]
+# # 🚨 Smart Emergency Dispatch — SFT → GRPO Training (Colab + Unsloth)
+#
+# Fine-tunes **Qwen3-1.7B** as an emergency 911 dispatcher using **Unsloth** for 2× faster training:
+# 1. **Phase 1 — SFT**: Teach the model the JSON output format
+# 2. **Phase 2 — GRPO**: Improve dispatch strategy via RL against the live HF Space environment
+#
+# **Runtime**: Google Colab with T4 or A100 GPU
+# %% [markdown]
+# ## 0 · Install Dependencies
+# %%
+!pip install -Uq unsloth vllm
+!pip install -Uq git+https://github.com/huggingface/trl.git
+!pip install -Uq git+https://github.com/meta-pytorch/OpenEnv.git
+!pip install -Uq git+https://github.com/rishiraj38/Smart_Emergency.git datasets requests
+# %%
+from huggingface_hub import notebook_login
+notebook_login()
+# %% [markdown]
+# ## 1 · Configuration
+# %%
+import os, json, re, random, requests, time
+from collections import defaultdict
+MODEL_NAME = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit"
+SFT_OUTPUT_DIR = "smart-emergency-sft"
+GRPO_OUTPUT_DIR = "smart-emergency-grpo"
+MAX_SEQ_LENGTH = 3072
+# HuggingFace Space URL for the environment server
+HF_SPACE_URL = "https://rishi38-eme-enviro.hf.space"
+# %% [markdown]
+# ## 2 · Connect to Environment
+#
+# Wake the HF Space if sleeping, then connect directly using `SmartEmergencyEnv`.
+# %%
+import requests, time
+from smart_emergency import SmartEmergencyEnv, SmartEmergencyAction
+# Ping the Space health endpoint until it wakes up (free Spaces sleep after inactivity)
+print("⏳ Waking up HF Space (may take 30-60s if sleeping) …")
+for _attempt in range(60):
+    try:
+        r = requests.get(f"{HF_SPACE_URL}/health", timeout=5)
+        if r.status_code == 200:
+            print(f"✅ Space awake at {HF_SPACE_URL}")
+            break
+    except Exception:
+        pass
+    time.sleep(2)
+else:
+    raise RuntimeError("HF Space did not respond after 2 minutes. Check the URL.")
+# Direct WebSocket connection via the official client
+env = SmartEmergencyEnv(base_url=HF_SPACE_URL).sync()
+_test = env.reset()
+print(f"✅ Connected — first call: {_test.observation.call_id}")
+# %% [markdown]
+# ## 3 · System Prompt
+# %%
+SYSTEM_PROMPT = """\
+You are an expert 911 emergency dispatcher. You receive incoming calls and must make rapid, structured dispatch decisions.
+## RULES
+1. Each step you see: an incoming call transcript, active events, unit status, and a city map.
+2. You must respond with a single JSON object — nothing else.
+## ACTION TYPES
+You have three action types: `dispatch`, `duplicate`, and `hold`.
+### 1. dispatch — Handle a new emergency
+Use when a FREE vehicle of the correct type is available.
+```json
+{
+  "action_type": "dispatch",
+  "severity_pred": <int 1-5>,
+  "is_duplicate": false,
+  "duplicate_of_event_id": null,
+  "vehicle_type": "police" | "ambulance" | "fire",
+  "vehicle_id": "<unit_id of a FREE vehicle>",
+  "reroute": null
+}
+```
+### 2. duplicate — Flag a repeat call
+Use when the incoming call matches an existing active event (same location/type).
+```json
+{
+  "action_type": "duplicate",
+  "severity_pred": <int 1-5>,
+  "is_duplicate": true,
+  "duplicate_of_event_id": "<EVT-NNNN>",
+  "vehicle_type": null,
+  "vehicle_id": null,
+  "reroute": null
+}
+```
+### 3. hold — Queue for a busy vehicle
+Use ONLY when ALL vehicles of the required type are busy (none are FREE).
+```json
+{
+  "action_type": "hold",
+  "severity_pred": <int 1-5>,
+  "is_duplicate": false,
+  "duplicate_of_event_id": null,
+  "vehicle_type": "police" | "ambulance" | "fire",
+  "vehicle_id": "<unit_id of a BUSY vehicle to queue behind>",
+  "reroute": null
+}
+```
+**Hold rules:** NEVER hold if a free unit exists. Pick the vehicle with the lowest ETA.
+## REROUTE (optional, only with dispatch)
+Redirect an in-flight vehicle from a LOWER-severity event to this HIGHER-severity one:
+```json
+"reroute": {
+  "vehicle_to_reroute": "<DISPATCHED unit_id>",
+  "from_event_id": "<EVT-NNNN>",
+  "replacement_vehicle_id": "<FREE unit or null>"
+}
+```
+Only reroute DISPATCHED vehicles. Only reroute from lower to higher severity.
+## SEVERITY GUIDE
+1=minor, 2=moderate, 3=serious, 4=critical, 5=catastrophic
+## VEHICLE GUIDE
+- **fire** → fire, smoke, flames, gas leak
+- **police** → shooting, robbery, fight, break-in
+- **ambulance** → medical, crash, accident, injury, collapse
+## STRATEGY
+- Pick the nearest FREE vehicle (use CITY REFERENCE distances).
+- If call matches an ACTIVE EVENT, flag as duplicate.
+- No free units → use `hold`. Higher severity than busy units → consider `reroute`.
+"""
+# %% [markdown]
+# ---
+# # Phase 1 — Supervised Fine-Tuning (SFT)
+# %% [markdown]
+# ### Observation Parsing Helpers
+# %%
+def parse_free_vehicles(obs_text: str) -> dict:
+    """Return {unit_id: vehicle_type} for FREE vehicles."""
+    vehicles = {}
+    in_section = False
+    for line in obs_text.split("\n"):
+        if "=== UNIT STATUS ===" in line:
+            in_section = True; continue
+        if in_section and line.startswith("==="):
+            break
+        if in_section and "|" in line and "FREE" in line:
+            parts = [p.strip() for p in line.split("|")]
+            if len(parts) >= 2:
+                vehicles[parts[0]] = parts[1]
+    return vehicles
+def parse_all_vehicles(obs_text: str) -> list:
+    """Return list of {id, type, status} for ALL vehicles."""
+    vehicles = []
+    in_section = False
+    for line in obs_text.split("\n"):
+        if "=== UNIT STATUS ===" in line:
+            in_section = True; continue
+        if in_section and line.startswith("==="):
+            break
+        if in_section and "|" in line:
+            parts = [p.strip() for p in line.split("|")]
+            if len(parts) >= 4:
+                status = parts[3].split()[0] if parts[3] else "UNKNOWN"
+                vehicles.append({"id": parts[0], "type": parts[1], "status": status})
+    return vehicles
+def parse_active_events(obs_text: str) -> dict:
+    events = {}
+    in_section = False
+    for line in obs_text.split("\n"):
+        if "=== ACTIVE EVENTS ===" in line:
+            in_section = True; continue
+        if in_section and line.startswith("==="):
+            break
+        if in_section and "|" in line and "EVT-" in line:
+            parts = [p.strip() for p in line.split("|")]
+            if len(parts) >= 2:
+                events[parts[0]] = parts[1]
+    return events
+TYPE_TO_VEHICLE = {"fire": "fire", "medical": "ambulance", "crime": "police", "accident": "ambulance"}
+SEV_KW = {
+    5: ["not breathing", "active shooter", "trapped", "mass incident", "whole block", "pileup", "send everything"],
+    4: ["won't wake", "gunshots", "flipped", "blood everywhere", "kids are upstairs", "not responding"],
+    3: ["chest pain", "fight", "mugged", "knife", "crash", "bleeding", "fire at", "flames", "cyclist"],
+    2: ["fainted", "break-in", "dumpster", "fender", "small fire", "ankle", "shoplifter"],
+}
+def heuristic_severity(text):
+    t = text.lower()
+    for sev in [5, 4, 3, 2]:
+        if any(kw in t for kw in SEV_KW[sev]):
+            return sev
+    return 1
+def heuristic_vehicle_type(text):
+    t = text.lower()
+    if any(w in t for w in ["fire", "flames", "smoke", "burning", "gas leak"]):
+        return "fire"
+    if any(w in t for w in ["shooter", "gunshot", "mugged", "knife", "break-in", "fight", "shoplifter"]):
+        return "police"
+    return "ambulance"
+def pick_free(free_vehicles, vtype):
+    for vid, vt in free_vehicles.items():
+        if vt == vtype:
+            return vid
+    return None
+def pick_busy(all_vehicles, vtype):
+    for v in all_vehicles:
+        if v["type"] == vtype and v["status"] != "FREE":
+            return v["id"]
+    return None
+# %% [markdown]
+# ### Generate SFT Dataset
+# %%
+def build_ideal_action(gt, obs_text):
+    """Build ideal JSON action dict from ground truth + observation."""
+    sev = gt.get("severity", 1)
+    vtype = gt.get("required_vehicle_type", "ambulance")
+    is_dup = gt.get("is_duplicate", False)
+    if is_dup:
+        active = parse_active_events(obs_text)
+        etype = gt.get("emergency_type", "")
+        dup_eid = None
+        for eid, et in active.items():
+            if et.strip() == etype:
+                dup_eid = eid; break
+        if dup_eid is None and active:
+            dup_eid = list(active.keys())[0]
+        return {"action_type": "duplicate", "severity_pred": sev, "is_duplicate": True,
+                "duplicate_of_event_id": dup_eid, "vehicle_type": None, "vehicle_id": None, "reroute": None}
+    free = parse_free_vehicles(obs_text)
+    vid = pick_free(free, vtype)
+    if vid:
+        return {"action_type": "dispatch", "severity_pred": sev, "is_duplicate": False,
+                "duplicate_of_event_id": None, "vehicle_type": vtype, "vehicle_id": vid, "reroute": None}
+    busy_vid = pick_busy(parse_all_vehicles(obs_text), vtype)
+    if busy_vid:
+        return {"action_type": "hold", "severity_pred": sev, "is_duplicate": False,
+                "duplicate_of_event_id": None, "vehicle_type": vtype, "vehicle_id": busy_vid, "reroute": None}
+    return {"action_type": "dispatch", "severity_pred": sev, "is_duplicate": False,
+            "duplicate_of_event_id": None, "vehicle_type": vtype, "vehicle_id": f"{vtype}_0", "reroute": None}
+def generate_sft_data(env, num_episodes=60):
+    examples = []
+    for ep in range(num_episodes):
+        task_id = (ep % 3) + 1
+        result = env.reset(task_id=task_id)
+        prev_obs = result.observation.prompt
+        while not result.done:
+            free = parse_free_vehicles(prev_obs)
+            vtype = heuristic_vehicle_type(prev_obs)
+            vid = pick_free(free, vtype)
+            action = SmartEmergencyAction(
+                action_type="dispatch",
+                severity_pred=heuristic_severity(prev_obs),
+                is_duplicate=False,
+                vehicle_type=vtype,
+                vehicle_id=vid,
+            )
+            result = env.step(action)
+            # ground_truth is now a first-class field on the observation;
+            # fall back to metadata for backward compatibility with older servers.
+            gt = result.observation.ground_truth or result.observation.metadata.get("ground_truth")
+            if gt:
+                ideal = build_ideal_action(gt, prev_obs)
+                examples.append({
+                    "messages": [
+                        {"role": "system", "content": SYSTEM_PROMPT},
+                        {"role": "user", "content": prev_obs},
+                        {"role": "assistant", "content": json.dumps(ideal)},
+                    ]
+                })
+            prev_obs = result.observation.prompt
+        if (ep + 1) % 10 == 0:
+            print(f"  Episodes: {ep+1}/{num_episodes} | examples: {len(examples)}")
+    return examples
+print("📝 Generating SFT data …")
+sft_examples = generate_sft_data(env, num_episodes=60)
+print(f"✅ Collected {len(sft_examples)} SFT examples")
+# %%
+from datasets import Dataset
+sft_dataset = Dataset.from_list(sft_examples)
+print(sft_dataset)
+# %% [markdown]
+# ### SFT Training with Unsloth
+# %%
+from unsloth import FastLanguageModel
+from trl import SFTTrainer, SFTConfig
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=MODEL_NAME,
+    max_seq_length=MAX_SEQ_LENGTH,
+    load_in_4bit=True,
+)
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=16,
+    lora_alpha=32,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
+                     "gate_proj", "up_proj", "down_proj"],
+    use_gradient_checkpointing="unsloth",
+)
+sft_config = SFTConfig(
+    output_dir=SFT_OUTPUT_DIR,
+    num_train_epochs=3,
+    per_device_train_batch_size=2,
+    gradient_accumulation_steps=8,
+    learning_rate=2e-4,
+    lr_scheduler_type="cosine",
+    warmup_ratio=0.1,
+    logging_steps=5,
+    save_steps=50,
+    max_seq_length=MAX_SEQ_LENGTH,
+    bf16=True,
+    report_to="none",
+)
+sft_trainer = SFTTrainer(
+    model=model,
+    processing_class=tokenizer,
+    train_dataset=sft_dataset,
+    args=sft_config,
+)
+# %%
+print("🏋️ Starting SFT training …")
+sft_trainer.train()
+print("✅ SFT complete")
+# %%
+sft_trainer.save_model(SFT_OUTPUT_DIR)
+tokenizer.save_pretrained(SFT_OUTPUT_DIR)
+print(f"✅ SFT model saved to {SFT_OUTPUT_DIR}/")
+# Free memory
+import torch, gc
+del model, sft_trainer
+gc.collect()
+torch.cuda.empty_cache()
+# %% [markdown]
+# ---
+# # Phase 2 — GRPO with Unsloth
+# %% [markdown]
+# ### Action Parsing
+# %%
+def parse_llm_action(text):
+    """Extract action dict from LLM output."""
+    m = re.search(r"```json\s*(.*?)```", text, re.DOTALL)
+    if m:
+        text = m.group(1)
+    else:
+        m = re.search(r"\{.*\}", text, re.DOTALL)
+        if m:
+            text = m.group(0)
+    try:
+        d = json.loads(text)
+        # Validate required fields
+        assert d.get("action_type") in ("dispatch", "duplicate", "hold")
+        assert 1 <= int(d.get("severity_pred", 0)) <= 5
+        return d
+    except Exception:
+        return None
+def fallback_action(obs_text):
+    free = parse_free_vehicles(obs_text)
+    vtype = heuristic_vehicle_type(obs_text)
+    vid = pick_free(free, vtype)
+    if vid:
+        return {"action_type": "dispatch", "severity_pred": heuristic_severity(obs_text),
+                "is_duplicate": False, "vehicle_type": vtype, "vehicle_id": vid}
+    busy_vid = pick_busy(parse_all_vehicles(obs_text), vtype)
+    return {"action_type": "hold" if busy_vid else "dispatch",
+            "severity_pred": heuristic_severity(obs_text), "is_duplicate": False,
+            "vehicle_type": vtype, "vehicle_id": busy_vid or f"{vtype}_0"}
+# %% [markdown]
+# ### Rollout Functions
+# %%
+from unsloth import FastLanguageModel, PatchFastRL
+from trl import GRPOConfig, GRPOTrainer
+# Patch TRL for Unsloth compatibility
+PatchFastRL("GRPO", FastLanguageModel)
+# Load the SFT model for GRPO with fast inference (vLLM)
+grpo_model, grpo_tokenizer = FastLanguageModel.from_pretrained(
+    model_name=SFT_OUTPUT_DIR,
+    max_seq_length=MAX_SEQ_LENGTH,
+    load_in_4bit=True,
+    fast_inference=True,  # enables vLLM for GRPO generation
+)
+grpo_model = FastLanguageModel.get_peft_model(
+    grpo_model,
+    r=16,
+    lora_alpha=32,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
+                     "gate_proj", "up_proj", "down_proj"],
+    use_gradient_checkpointing="unsloth",
+)
+# %%
+def make_user_prompt(obs_text):
+    return f"You are the dispatcher. Read the situation and respond with a single JSON action.\n\n{obs_text}\n\nRespond ONLY with a JSON object."
+def action_dict_to_obj(d):
+    """Convert a plain dict action to SmartEmergencyAction."""
+    from smart_emergency import RerouteAction
+    reroute = None
+    if d.get("reroute") and isinstance(d["reroute"], dict):
+        rd = d["reroute"]
+        reroute = RerouteAction(
+            vehicle_to_reroute=rd["vehicle_to_reroute"],
+            from_event_id=rd["from_event_id"],
+            replacement_vehicle_id=rd.get("replacement_vehicle_id"),
+        )
+    return SmartEmergencyAction(
+        action_type=d.get("action_type", "dispatch"),
+        severity_pred=int(d.get("severity_pred", 1)),
+        is_duplicate=bool(d.get("is_duplicate", False)),
+        duplicate_of_event_id=d.get("duplicate_of_event_id"),
+        vehicle_type=d.get("vehicle_type"),
+        vehicle_id=d.get("vehicle_id"),
+        reroute=reroute,
+    )
+def rollout_once(trainer, env, tokenizer, system_prompt, max_turns=15):
+    """Run one full episode."""
+    from trl.experimental.openenv import generate_rollout_completions
+    result = env.reset()
+    prompt_ids, completion_ids, logprobs = [], [], []
+    rewards = {k: [] for k in ["severity", "duplicate", "vehicle_type", "vehicle_choice", "reroute", "format"]}
+    for _ in range(max_turns):
+        if result.done:
+            break
+        obs_text = result.observation.prompt
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": make_user_prompt(obs_text)},
+        ]
+        prompt_text = tokenizer.apply_chat_template(
+            messages, add_generation_prompt=True, tokenize=False, enable_thinking=False,
+        )
+        out = generate_rollout_completions(trainer, [prompt_text])[0]
+        prompt_ids.extend(out["prompt_ids"])
+        completion_ids.extend(out["completion_ids"])
+        logprobs.extend(out["logprobs"])
+        comp_text = out.get("text") or tokenizer.decode(out["completion_ids"], skip_special_tokens=True)
+        action_d = parse_llm_action(comp_text)
+        parse_ok = action_d is not None
+        if action_d is None:
+            action_d = fallback_action(obs_text)
+        action = action_dict_to_obj(action_d)
+        result = env.step(action)
+        bd = result.observation.reward_breakdown
+        rewards["severity"].append(bd.get("severity", 0.0))
+        rewards["duplicate"].append(bd.get("duplicate", 0.0))
+        rewards["vehicle_type"].append(bd.get("vehicle_type", 0.0))
+        rewards["vehicle_choice"].append(bd.get("vehicle_choice", 0.0))
+        rewards["reroute"].append(bd.get("reroute", 0.0))
+        rewards["format"].append(1.0 if parse_ok else -2.0)
+    return {
+        "prompt_ids": prompt_ids,
+        "completion_ids": completion_ids,
+        "logprobs": logprobs,
+        **{f"{k}_reward": v[-1] if v else 0.0 for k, v in rewards.items()},
+    }
+def rollout_func(prompts, trainer=None):
+    """GRPO rollout — called by GRPOTrainer each step."""
+    results = {k: [] for k in ["prompt_ids", "completion_ids", "logprobs",
+               "severity_reward", "duplicate_reward", "vehicle_type_reward",
+               "vehicle_choice_reward", "reroute_reward", "format_reward"]}
+    for _ in prompts:
+        ep = rollout_once(trainer, env, grpo_tokenizer, SYSTEM_PROMPT)
+        for k in results:
+            results[k].append(ep[k])
+    return results
+# %% [markdown]
+# ### Reward Wrappers & Config
+# %%
+def _make_reward_fn(key):
+    def fn(completions, **kwargs):
+        r = kwargs.get(key)
+        return [float(x) for x in r] if r else [0.0] * len(completions)
+    fn.__name__ = f"reward_{key.replace('_reward', '')}"
+    return fn
+reward_fns = [_make_reward_fn(k) for k in
+              ["severity_reward", "duplicate_reward", "vehicle_type_reward",
+               "vehicle_choice_reward", "reroute_reward", "format_reward"]]
+# %%
+grpo_dataset = Dataset.from_dict({
+    "prompt": ["Dispatch emergency services for incoming 911 calls."] * 500
+})
+grpo_config = GRPOConfig(
+    num_train_epochs=1,
+    learning_rate=5e-6,
+    gradient_accumulation_steps=32,
+    per_device_train_batch_size=1,
+    warmup_steps=10,
+    num_generations=4,
+    max_completion_length=128,
+    max_prompt_length=MAX_SEQ_LENGTH,
+    use_vllm=True,
+    output_dir=GRPO_OUTPUT_DIR,
+    logging_steps=1,
+    save_steps=10,
+    push_to_hub=True,
+)
+# %% [markdown]
+# ### Train GRPO
+# %%
+grpo_trainer = GRPOTrainer(
+    model=grpo_model,
+    processing_class=grpo_tokenizer,
+    reward_funcs=reward_fns,
+    train_dataset=grpo_dataset,
+    args=grpo_config,
+    rollout_func=rollout_func,
+)
+import torch
+gpu = torch.cuda.get_device_properties(0)
+print(f"GPU: {gpu.name} | {round(gpu.total_memory/1024**3, 1)} GB")
+print(f"Reserved: {round(torch.cuda.max_memory_reserved()/1024**3, 2)} GB")
+# %%
+print("🏋️ Starting GRPO training …")
+stats = grpo_trainer.train()
+print("✅ GRPO complete")
+# %%
+peak = round(torch.cuda.max_memory_reserved() / 1024**3, 2)
+print(f"Peak memory: {peak} GB | Time: {round(stats.metrics['train_runtime']/60, 1)} min")
+grpo_trainer.save_model(GRPO_OUTPUT_DIR)
+grpo_trainer.push_to_hub()
+print(f"✅ Model saved & pushed to Hub")
+# %% [markdown]
+# ---
+# # Phase 3 — Inference & Evaluation
+# %%
+from unsloth import FastLanguageModel as FLM
+inf_model, inf_tokenizer = FLM.from_pretrained(
+    model_name=GRPO_OUTPUT_DIR, max_seq_length=MAX_SEQ_LENGTH, load_in_4bit=True,
+)
+FLM.for_inference(inf_model)
+def run_episode(env, model, tokenizer, task_id=1):
+    result = env.reset(task_id=task_id)
+    total_reward = 0.0
+    for step in range(20):
+        if result.done:
+            break
+        obs_text = result.observation.prompt
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": make_user_prompt(obs_text)},
+        ]
+        prompt_text = tokenizer.apply_chat_template(
+            messages, add_generation_prompt=True, tokenize=False, enable_thinking=False,
+        )
+        inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)
+        gen = model.generate(**inputs, max_new_tokens=256, temperature=0.1)
+        output = tokenizer.decode(gen[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
+        action_d = parse_llm_action(output) or fallback_action(obs_text)
+        action = action_dict_to_obj(action_d)
+        tag = "✅" if parse_llm_action(output) else "⚠️"
+        print(f"  Step {step}: {tag} {action_d.get('action_type')} sev={action_d.get('severity_pred')}")
+        result = env.step(action)
+        total_reward += result.observation.reward_breakdown.get("total", 0.0)
+    print(f"\n  Done — reward: {total_reward:.2f} over {step+1} steps")
+    return total_reward
+# %%
+print("=" * 50)
+print("Evaluation — Task 1 (Easy)")
+print("=" * 50)
+run_episode(env, inf_model, inf_tokenizer, task_id=1)
+env.close()

train_sft_grpo_graph.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff