"""FastAPI server exposing HelpdeskEnv over HTTP and a lightweight dashboard UI.""" from typing import Any, Dict, Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn from ..environment import HelpdeskEnv from ..graders.score_utils import ensure_open_unit_interval from ..models import Action, Reward app = FastAPI(title="Helpdesk OpenEnv") _env: Optional[HelpdeskEnv] = None UI_HTML = """ UPI Banking Support Environment
HF Space Dashboard

UPI Banking Support Environment

Run the benchmark like an operator: reset an episode, choose the exact action your agent would take, and inspect the live observation, conversation, and current reward after each step.

Environment Checking...
Current Reward 0.000 Most recent reward value
Difficulty - Current episode track
Turn 0 Current step count
Status Idle Episode completion
Current Ticket
Customer Message
Reset the environment to load a ticket.
Pick a difficulty, reset the env, then use one of your supported actions. This UI is tuned for classify, FAQ lookup, clarification, reply, escalate, and resolve flows.
Case
-
Required Slots
No episode loaded yet.
Available Actions
Reset to populate actions.
Collected Facts
-
Action Console
Conversation Timeline
Waiting Reset the environment to start an episode.
Step Details
Current Reward Breakdown
-
Episode Info
-
Observation Snapshot
-
""" def get_env() -> HelpdeskEnv: global _env if _env is None: _env = HelpdeskEnv() return _env class ResetBody(BaseModel): task_id: str = "easy" def _zero_reward() -> Dict[str, Any]: return Reward( value=ensure_open_unit_interval(0.0), correctness=ensure_open_unit_interval(0.0), safety=ensure_open_unit_interval(1.0), resolution=ensure_open_unit_interval(0.0), efficiency=ensure_open_unit_interval(0.0), penalties=0.0, done=False, info={}, ).model_dump() @app.get("/health") def health() -> Dict[str, str]: return {"status": "healthy"} @app.get("/", response_class=HTMLResponse) def root() -> HTMLResponse: return HTMLResponse(UI_HTML) @app.post("/reset") def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]: obs = get_env().reset(body.task_id) return { "observation": obs.model_dump(), "reward": _zero_reward(), "done": False, "info": {}, } @app.post("/step") def step(body: Dict[str, Any]) -> Dict[str, Any]: action = Action(**body["action"]) obs, reward, done, info = get_env().step(action) return { "observation": obs.model_dump(), "reward": reward.model_dump(), "done": done, "info": info, } @app.get("/state") def state() -> Dict[str, Any]: obs = get_env().state() return {"observation": obs.model_dump()} def main() -> None: uvicorn.run("helpdesk_env.server.app:app", host="0.0.0.0", port=8000) if __name__ == "__main__": main()