"""Smoke tests for the SupportDesk environment.""" import importlib import pytest import yaml try: from fastapi.testclient import TestClient except RuntimeError: TestClient = None # type: ignore[assignment] from graders import grade_case from models import SupportCaseProgress, SupportDeskAction from server.supportdesk_environment import SupportDeskEnvironment from tasks import get_task, list_task_ids def test_all_tasks_are_registered(): assert list_task_ids() == [ "billing_refund_easy", "account_takeover_medium", "api_incident_hard", "regulated_export_exception_hard", ] def test_environment_reset_and_state(): env = SupportDeskEnvironment(task_id="billing_refund_easy") observation = env.reset() assert observation.task_id == "billing_refund_easy" assert observation.workflow_stage == "intake" assert "classify" in observation.required_next_actions assert observation.current_sla_minutes_remaining == 240 assert env.state.step_count == 0 assert env.state.current_score == 0.15 def test_perfect_solution_grades_full_score(): task = get_task("billing_refund_easy") env = SupportDeskEnvironment(task_id=task.task_id) env.reset() env.step( SupportDeskAction( operation="classify", queue=task.gold_queue, priority=task.gold_priority, issue_type=task.gold_issue_type, ) ) env.step( SupportDeskAction( operation="draft_reply", reply="Refund approved for the duplicate charge and it should arrive within 5-7 business days.", ) ) env.step( SupportDeskAction( operation="add_internal_note", internal_note="Duplicate charge verified and refund approved.", ) ) env.step( SupportDeskAction( operation="submit", status=task.gold_status, resolution_code=task.gold_resolution_code, ) ) breakdown = grade_case(task, env.state.case) assert breakdown.total_score == 0.99 def test_max_steps_ends_episode(): env = SupportDeskEnvironment(task_id="billing_refund_easy") observation = env.reset() for _ in range(6): observation = env.step(SupportDeskAction(operation="classify")) assert observation.done is True assert env.state.step_count == 6 def test_grade_is_bounded_between_zero_and_one(): task = get_task("regulated_export_exception_hard") env = SupportDeskEnvironment(task_id=task.task_id) env.reset() breakdown = grade_case(task, env.state.case) assert 0.0 < breakdown.total_score < 1.0 def test_task_specific_graders_are_importable_and_clamped(): from graders import ( AccountTakeoverMediumGrader, ApiIncidentHardGrader, BillingRefundEasyGrader, RegulatedExportExceptionHardGrader, ) from models import SupportCaseProgress case = SupportCaseProgress() scores = [ BillingRefundEasyGrader().grade(case), AccountTakeoverMediumGrader().grade(case), ApiIncidentHardGrader().grade(case), RegulatedExportExceptionHardGrader().grade(case), ] assert scores == [0.15, 0.01, 0.01, 0.01] def test_openenv_manifest_graders_are_importable(): manifest = yaml.safe_load(open("openenv.yaml", encoding="utf-8")) assert "tasks" in manifest assert len(manifest["tasks"]) >= 4 for task in manifest["tasks"]: grader_block = task["grader"] assert isinstance(grader_block, dict) assert grader_block.get("type") == "llm" assert isinstance(grader_block.get("prompt_template"), str) def test_state_includes_episode_id_after_reset(): env = SupportDeskEnvironment(task_id="billing_refund_easy") env.reset(episode_id="episode-123") assert env.state.episode_id == "episode-123" assert env.state.workflow_stage == "intake" assert "finance_close_risk" in env.state.risk_flags def test_premature_submit_gets_penalized(): env = SupportDeskEnvironment(task_id="api_incident_hard") env.reset() observation = env.step( SupportDeskAction( operation="submit", status="resolved", resolution_code="incident_opened", ) ) assert observation.reward < 0 assert observation.done is True def test_follow_up_arrives_after_wait(): env = SupportDeskEnvironment(task_id="account_takeover_medium") env.reset() env.step( SupportDeskAction( operation="classify", queue="trust_and_safety", priority="urgent", issue_type="account_compromise", ) ) observation = env.step( SupportDeskAction( operation="request_info", requested_fields=["workspace_id", "last_successful_login", "billing_email"], ) ) assert observation.case.customer_follow_up.status == "pending" observation = env.step(SupportDeskAction(operation="wait")) assert observation.case.customer_follow_up.status == "partial" assert "customer_reply_incomplete" in observation.risk_flags @pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient") def test_http_reset_step_state_are_session_consistent(): from server.app import app client = TestClient(app) reset_response = client.post("/reset", json={"episode_id": "http-episode"}) assert reset_response.status_code == 200 reset_payload = reset_response.json() assert "score" in reset_payload assert 0.0 < reset_payload["score"] < 1.0 step_response = client.post( "/step", json={ "action": { "operation": "classify", "queue": "billing_ops", "priority": "high", "issue_type": "duplicate_charge", "status": "new", "requested_fields": [], "reply": "", "internal_note": "", } }, ) assert step_response.status_code == 200 step_payload = step_response.json() assert "score" in step_payload assert 0.0 < step_payload["score"] < 1.0 state_response = client.get("/state") assert state_response.status_code == 200 state_payload = state_response.json() assert state_payload["episode_id"] == "http-episode" assert state_payload["step_count"] == 1 assert state_payload["case"]["queue"] == "billing_ops" assert state_payload["case"]["priority"] == "high" assert state_payload["case"]["issue_type"] == "duplicate_charge" @pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient") def test_http_explicit_episode_helpers_work(): from server.app import app client = TestClient(app) episode_id = "explicit-http-episode" reset_response = client.post("/reset", json={"episode_id": episode_id}) assert reset_response.status_code == 200 step_response = client.post( f"/episodes/{episode_id}/step", json={ "action": { "operation": "classify", "queue": "billing_ops", "priority": "high", "issue_type": "duplicate_charge", } }, ) assert step_response.status_code == 200 step_payload = step_response.json() assert "score" in step_payload assert 0.0 < step_payload["score"] < 1.0 state_response = client.get(f"/episodes/{episode_id}/state") assert state_response.status_code == 200 state_payload = state_response.json() assert state_payload["episode_id"] == episode_id assert state_payload["step_count"] == 1 assert state_payload["case"]["queue"] == "billing_ops" assert state_payload["case"]["priority"] == "high" assert state_payload["case"]["issue_type"] == "duplicate_charge" @pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient") def test_http_tasks_include_truthy_grader_field(): from server.app import app client = TestClient(app) tasks_response = client.get("/tasks") assert tasks_response.status_code == 200 payload = tasks_response.json() assert payload["total_tasks"] >= 4 assert len(payload["tasks"]) >= 4 for task in payload["tasks"]: assert task["grader"].startswith("graders:")