HyperBrickCaseOps / tests /test_supportdesk.py
modelbuilderhq's picture
Upload folder using huggingface_hub
726cf7a verified
raw
history blame
3.91 kB
"""Smoke tests for the SupportDesk environment."""
import pytest
try:
from fastapi.testclient import TestClient
except RuntimeError:
TestClient = None # type: ignore[assignment]
from supportdesk_env.graders import grade_case
from supportdesk_env.models import SupportDeskAction
from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
from supportdesk_env.tasks import get_task, list_task_ids
def test_all_tasks_are_registered():
assert list_task_ids() == [
"billing_refund_easy",
"account_takeover_medium",
"api_incident_hard",
]
def test_environment_reset_and_state():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
observation = env.reset()
assert observation.task_id == "billing_refund_easy"
assert env.state.step_count == 0
assert env.state.current_score == 0.15
def test_perfect_solution_grades_full_score():
task = get_task("billing_refund_easy")
env = SupportDeskEnvironment(task_id=task.task_id)
env.reset()
env.step(
SupportDeskAction(
operation="classify",
queue=task.gold_queue,
priority=task.gold_priority,
issue_type=task.gold_issue_type,
)
)
env.step(
SupportDeskAction(
operation="draft_reply",
reply="Refund approved for the duplicate charge and it should arrive within 5-7 business days.",
)
)
env.step(
SupportDeskAction(
operation="add_internal_note",
internal_note="Duplicate charge verified and refund approved.",
)
)
env.step(
SupportDeskAction(
operation="submit",
status=task.gold_status,
resolution_code=task.gold_resolution_code,
)
)
breakdown = grade_case(task, env.state.case)
assert breakdown.total_score == 1.0
def test_max_steps_ends_episode():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
observation = env.reset()
for _ in range(6):
observation = env.step(SupportDeskAction(operation="classify"))
assert observation.done is True
assert env.state.step_count == 6
def test_grade_is_bounded_between_zero_and_one():
task = get_task("api_incident_hard")
env = SupportDeskEnvironment(task_id=task.task_id)
env.reset()
breakdown = grade_case(task, env.state.case)
assert 0.0 <= breakdown.total_score <= 1.0
def test_state_includes_episode_id_after_reset():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
env.reset(episode_id="episode-123")
assert env.state.episode_id == "episode-123"
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
def test_http_reset_step_state_are_session_consistent():
from supportdesk_env.server.app import app
client = TestClient(app)
reset_response = client.post("/reset", json={"episode_id": "http-episode"})
assert reset_response.status_code == 200
step_response = client.post(
"/step",
json={
"action": {
"operation": "classify",
"queue": "billing_ops",
"priority": "high",
"issue_type": "duplicate_charge",
"status": "new",
"requested_fields": [],
"reply": "",
"internal_note": "",
}
},
)
assert step_response.status_code == 200
state_response = client.get("/state")
assert state_response.status_code == 200
state_payload = state_response.json()
assert state_payload["episode_id"] == "http-episode"
assert state_payload["step_count"] == 1
assert state_payload["case"]["queue"] == "billing_ops"
assert state_payload["case"]["priority"] == "high"
assert state_payload["case"]["issue_type"] == "duplicate_charge"