HyperBrickCaseOps / tests /test_supportdesk.py
modelbuilderhq's picture
Upload folder using huggingface_hub
551c5bc verified
"""Smoke tests for the SupportDesk environment."""
import importlib
import pytest
import yaml
try:
from fastapi.testclient import TestClient
except RuntimeError:
TestClient = None # type: ignore[assignment]
from graders import grade_case
from models import SupportCaseProgress, SupportDeskAction
from server.supportdesk_environment import SupportDeskEnvironment
from tasks import get_task, list_task_ids
def test_all_tasks_are_registered():
assert list_task_ids() == [
"billing_refund_easy",
"account_takeover_medium",
"api_incident_hard",
"regulated_export_exception_hard",
]
def test_environment_reset_and_state():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
observation = env.reset()
assert observation.task_id == "billing_refund_easy"
assert observation.workflow_stage == "intake"
assert "classify" in observation.required_next_actions
assert observation.current_sla_minutes_remaining == 240
assert env.state.step_count == 0
assert env.state.current_score == 0.15
def test_perfect_solution_grades_full_score():
task = get_task("billing_refund_easy")
env = SupportDeskEnvironment(task_id=task.task_id)
env.reset()
env.step(
SupportDeskAction(
operation="classify",
queue=task.gold_queue,
priority=task.gold_priority,
issue_type=task.gold_issue_type,
)
)
env.step(
SupportDeskAction(
operation="draft_reply",
reply="Refund approved for the duplicate charge and it should arrive within 5-7 business days.",
)
)
env.step(
SupportDeskAction(
operation="add_internal_note",
internal_note="Duplicate charge verified and refund approved.",
)
)
env.step(
SupportDeskAction(
operation="submit",
status=task.gold_status,
resolution_code=task.gold_resolution_code,
)
)
breakdown = grade_case(task, env.state.case)
assert breakdown.total_score == 0.99
def test_max_steps_ends_episode():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
observation = env.reset()
for _ in range(6):
observation = env.step(SupportDeskAction(operation="classify"))
assert observation.done is True
assert env.state.step_count == 6
def test_grade_is_bounded_between_zero_and_one():
task = get_task("regulated_export_exception_hard")
env = SupportDeskEnvironment(task_id=task.task_id)
env.reset()
breakdown = grade_case(task, env.state.case)
assert 0.0 < breakdown.total_score < 1.0
def test_task_specific_graders_are_importable_and_clamped():
from graders import (
AccountTakeoverMediumGrader,
ApiIncidentHardGrader,
BillingRefundEasyGrader,
RegulatedExportExceptionHardGrader,
)
from models import SupportCaseProgress
case = SupportCaseProgress()
scores = [
BillingRefundEasyGrader().grade(case),
AccountTakeoverMediumGrader().grade(case),
ApiIncidentHardGrader().grade(case),
RegulatedExportExceptionHardGrader().grade(case),
]
assert scores == [0.15, 0.01, 0.01, 0.01]
def test_openenv_manifest_graders_are_importable():
manifest = yaml.safe_load(open("openenv.yaml", encoding="utf-8"))
assert "tasks" in manifest
assert len(manifest["tasks"]) >= 4
for task in manifest["tasks"]:
grader_block = task["grader"]
assert isinstance(grader_block, dict)
assert grader_block.get("type") == "llm"
assert isinstance(grader_block.get("prompt_template"), str)
def test_state_includes_episode_id_after_reset():
env = SupportDeskEnvironment(task_id="billing_refund_easy")
env.reset(episode_id="episode-123")
assert env.state.episode_id == "episode-123"
assert env.state.workflow_stage == "intake"
assert "finance_close_risk" in env.state.risk_flags
def test_premature_submit_gets_penalized():
env = SupportDeskEnvironment(task_id="api_incident_hard")
env.reset()
observation = env.step(
SupportDeskAction(
operation="submit",
status="resolved",
resolution_code="incident_opened",
)
)
assert observation.reward < 0
assert observation.done is True
def test_follow_up_arrives_after_wait():
env = SupportDeskEnvironment(task_id="account_takeover_medium")
env.reset()
env.step(
SupportDeskAction(
operation="classify",
queue="trust_and_safety",
priority="urgent",
issue_type="account_compromise",
)
)
observation = env.step(
SupportDeskAction(
operation="request_info",
requested_fields=["workspace_id", "last_successful_login", "billing_email"],
)
)
assert observation.case.customer_follow_up.status == "pending"
observation = env.step(SupportDeskAction(operation="wait"))
assert observation.case.customer_follow_up.status == "partial"
assert "customer_reply_incomplete" in observation.risk_flags
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
def test_http_reset_step_state_are_session_consistent():
from server.app import app
client = TestClient(app)
reset_response = client.post("/reset", json={"episode_id": "http-episode"})
assert reset_response.status_code == 200
reset_payload = reset_response.json()
assert "score" in reset_payload
assert 0.0 < reset_payload["score"] < 1.0
step_response = client.post(
"/step",
json={
"action": {
"operation": "classify",
"queue": "billing_ops",
"priority": "high",
"issue_type": "duplicate_charge",
"status": "new",
"requested_fields": [],
"reply": "",
"internal_note": "",
}
},
)
assert step_response.status_code == 200
step_payload = step_response.json()
assert "score" in step_payload
assert 0.0 < step_payload["score"] < 1.0
state_response = client.get("/state")
assert state_response.status_code == 200
state_payload = state_response.json()
assert state_payload["episode_id"] == "http-episode"
assert state_payload["step_count"] == 1
assert state_payload["case"]["queue"] == "billing_ops"
assert state_payload["case"]["priority"] == "high"
assert state_payload["case"]["issue_type"] == "duplicate_charge"
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
def test_http_explicit_episode_helpers_work():
from server.app import app
client = TestClient(app)
episode_id = "explicit-http-episode"
reset_response = client.post("/reset", json={"episode_id": episode_id})
assert reset_response.status_code == 200
step_response = client.post(
f"/episodes/{episode_id}/step",
json={
"action": {
"operation": "classify",
"queue": "billing_ops",
"priority": "high",
"issue_type": "duplicate_charge",
}
},
)
assert step_response.status_code == 200
step_payload = step_response.json()
assert "score" in step_payload
assert 0.0 < step_payload["score"] < 1.0
state_response = client.get(f"/episodes/{episode_id}/state")
assert state_response.status_code == 200
state_payload = state_response.json()
assert state_payload["episode_id"] == episode_id
assert state_payload["step_count"] == 1
assert state_payload["case"]["queue"] == "billing_ops"
assert state_payload["case"]["priority"] == "high"
assert state_payload["case"]["issue_type"] == "duplicate_charge"
@pytest.mark.skipif(TestClient is None, reason="httpx is not installed for FastAPI TestClient")
def test_http_tasks_include_truthy_grader_field():
from server.app import app
client = TestClient(app)
tasks_response = client.get("/tasks")
assert tasks_response.status_code == 200
payload = tasks_response.json()
assert payload["total_tasks"] >= 4
assert len(payload["tasks"]) >= 4
for task in payload["tasks"]:
assert task["grader"].startswith("graders:")