"""FastAPI server exposing HelpdeskEnv over HTTP and a lightweight dashboard UI.""" from typing import Any, Dict, Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn from ..environment import HelpdeskEnv from ..graders.score_utils import ensure_open_unit_interval from ..models import Action, Reward app = FastAPI(title="Helpdesk OpenEnv") _env: Optional[HelpdeskEnv] = None UI_HTML = """ UPI Banking Support Environment

HF Space Dashboard

UPI Banking Support Environment

Run the benchmark like an operator: reset an episode, choose the exact action your agent would take, and inspect the live observation, conversation, and current reward after each step.

Health API Docs Raw State

Environment Checking...

Current Reward 0.000 Most recent reward value

Difficulty - Current episode track

Turn 0 Current step count

Status Idle Episode completion

Current Ticket

Customer Message

Reset the environment to load a ticket.

Pick a difficulty, reset the env, then use one of your supported actions. This UI is tuned for classify, FAQ lookup, clarification, reply, escalate, and resolve flows.

Case

-

Required Slots

No episode loaded yet.

Available Actions

Reset to populate actions.

Collected Facts

-

Action Console

Difficulty

Action Type

Category

FAQ

Message

Conversation Timeline

Waiting Reset the environment to start an episode.

Step Details

Current Reward Breakdown

-

Episode Info

-

Observation Snapshot

-

""" def get_env() -> HelpdeskEnv: global _env if _env is None: _env = HelpdeskEnv() return _env class ResetBody(BaseModel): task_id: str = "easy" def _zero_reward() -> Dict[str, Any]: return Reward( value=ensure_open_unit_interval(0.0), correctness=ensure_open_unit_interval(0.0), safety=ensure_open_unit_interval(1.0), resolution=ensure_open_unit_interval(0.0), efficiency=ensure_open_unit_interval(0.0), penalties=0.0, done=False, info={}, ).model_dump() @app.get("/health") def health() -> Dict[str, str]: return {"status": "healthy"} @app.get("/", response_class=HTMLResponse) def root() -> HTMLResponse: return HTMLResponse(UI_HTML) @app.post("/reset") def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]: obs = get_env().reset(body.task_id) return { "observation": obs.model_dump(), "reward": _zero_reward(), "done": False, "info": {}, } @app.post("/step") def step(body: Dict[str, Any]) -> Dict[str, Any]: action = Action(**body["action"]) obs, reward, done, info = get_env().step(action) return { "observation": obs.model_dump(), "reward": reward.model_dump(), "done": done, "info": info, } @app.get("/state") def state() -> Dict[str, Any]: obs = get_env().state() return {"observation": obs.model_dump()} def main() -> None: uvicorn.run("helpdesk_env.server.app:app", host="0.0.0.0", port=8000) if __name__ == "__main__": main()