Spaces:
Sleeping
Sleeping
| """Smoke tests for the SupportDesk environment.""" | |
| import pytest | |
| try: | |
| from fastapi.testclient import TestClient | |
| except RuntimeError: | |
| TestClient = None # type: ignore[assignment] | |
| from supportdesk_env.graders import grade_case | |
| from supportdesk_env.models import SupportDeskAction | |
| from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment | |
| from supportdesk_env.tasks import get_task, list_task_ids | |
| def test_all_tasks_are_registered(): | |
| assert list_task_ids() == [ | |
| "billing_refund_easy", | |
| "account_takeover_medium", | |
| "api_incident_hard", | |
| ] | |
| def test_environment_reset_and_state(): | |
| env = SupportDeskEnvironment(task_id="billing_refund_easy") | |
| observation = env.reset() | |
| assert observation.task_id == "billing_refund_easy" | |
| assert env.state.step_count == 0 | |
| assert env.state.current_score == 0.15 | |
| def test_perfect_solution_grades_full_score(): | |
| task = get_task("billing_refund_easy") | |
| env = SupportDeskEnvironment(task_id=task.task_id) | |
| env.reset() | |
| env.step( | |
| SupportDeskAction( | |
| operation="classify", | |
| queue=task.gold_queue, | |
| priority=task.gold_priority, | |
| issue_type=task.gold_issue_type, | |
| ) | |
| ) | |
| env.step( | |
| SupportDeskAction( | |
| operation="draft_reply", | |
| reply="Refund approved for the duplicate charge and it should arrive within 5-7 business days.", | |
| ) | |
| ) | |
| env.step( | |
| SupportDeskAction( | |
| operation="add_internal_note", | |
| internal_note="Duplicate charge verified and refund approved.", | |
| ) | |
| ) | |
| env.step( | |
| SupportDeskAction( | |
| operation="submit", | |
| status=task.gold_status, | |
| resolution_code=task.gold_resolution_code, | |
| ) | |
| ) | |
| breakdown = grade_case(task, env.state.case) | |
| assert breakdown.total_score == 1.0 | |
| def test_max_steps_ends_episode(): | |
| env = SupportDeskEnvironment(task_id="billing_refund_easy") | |
| observation = env.reset() | |
| for _ in range(6): | |
| observation = env.step(SupportDeskAction(operation="classify")) | |
| assert observation.done is True | |
| assert env.state.step_count == 6 | |
| def test_grade_is_bounded_between_zero_and_one(): | |
| task = get_task("api_incident_hard") | |
| env = SupportDeskEnvironment(task_id=task.task_id) | |
| env.reset() | |
| breakdown = grade_case(task, env.state.case) | |
| assert 0.0 <= breakdown.total_score <= 1.0 | |
| def test_state_includes_episode_id_after_reset(): | |
| env = SupportDeskEnvironment(task_id="billing_refund_easy") | |
| env.reset(episode_id="episode-123") | |
| assert env.state.episode_id == "episode-123" | |
| def test_http_reset_step_state_are_session_consistent(): | |
| from supportdesk_env.server.app import app | |
| client = TestClient(app) | |
| reset_response = client.post("/reset", json={"episode_id": "http-episode"}) | |
| assert reset_response.status_code == 200 | |
| step_response = client.post( | |
| "/step", | |
| json={ | |
| "action": { | |
| "operation": "classify", | |
| "queue": "billing_ops", | |
| "priority": "high", | |
| "issue_type": "duplicate_charge", | |
| "status": "new", | |
| "requested_fields": [], | |
| "reply": "", | |
| "internal_note": "", | |
| } | |
| }, | |
| ) | |
| assert step_response.status_code == 200 | |
| state_response = client.get("/state") | |
| assert state_response.status_code == 200 | |
| state_payload = state_response.json() | |
| assert state_payload["episode_id"] == "http-episode" | |
| assert state_payload["step_count"] == 1 | |
| assert state_payload["case"]["queue"] == "billing_ops" | |
| assert state_payload["case"]["priority"] == "high" | |
| assert state_payload["case"]["issue_type"] == "duplicate_charge" | |