from dataclasses import dataclass from typing import Dict, Any, Tuple, List from env_models import ( TicketObservation, TriageAction, TicketCategory, TicketPriority, Department, EscalationDecision, ) # ─────────────────────────────────────────────────────── # STRICT CLAMP (NO EDGE FAILURES) # ─────────────────────────────────────────────────────── def _clamp(score: float) -> float: return max(0.01, min(0.99, float(score))) # ─────────────────────────────────────────────────────── # TICKETS # ─────────────────────────────────────────────────────── TICKET_EASY = TicketObservation( ticket_id="T1", subject="Laptop not booting", body="My laptop shows black screen", reporter_name="User", reporter_role="Employee", system_info="Windows", timestamp="now", previous_tickets=0, task_instruction="Classify issue", valid_categories=[c.value for c in TicketCategory], valid_priorities=[p.value for p in TicketPriority], valid_departments=[d.value for d in Department], ) TICKET_MEDIUM = TICKET_EASY TICKET_HARD = TICKET_EASY # ─────────────────────────────────────────────────────── # GRADERS # ─────────────────────────────────────────────────────── def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]: score = 0.0 breakdown = {} if action.category == TicketCategory.HARDWARE: score += 0.6 if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}: score += 0.25 if isinstance(action.response, str) and len(action.response) > 10: score += 0.15 score = _clamp(score) breakdown["score"] = score return float(score), breakdown def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]: score = 0.0 breakdown = {} if action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}: score += 0.25 if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}: score += 0.25 if action.department in {Department.SYSADMIN, Department.TIER2_SUPPORT}: score += 0.25 if action.escalate == EscalationDecision.ESCALATE: score += 0.1 if isinstance(action.response, str) and len(action.response) > 20: score += 0.15 score = _clamp(score) breakdown["score"] = score return float(score), breakdown def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]: score = 0.0 penalty = 0.0 breakdown = {} if action.category == TicketCategory.SECURITY: score += 0.2 if action.priority == TicketPriority.CRITICAL: score += 0.2 if action.department == Department.SECURITY_TEAM: score += 0.2 if action.escalate == EscalationDecision.ESCALATE: score += 0.1 resp = (action.response or "").lower() if "disconnect" in resp: score += 0.15 if "do not" in resp or "don't" in resp: score += 0.15 # penalties if "decrypt" in resp or "recover" in resp: penalty += 0.4 if "disconnect" not in resp: penalty += 0.3 if action.escalate != EscalationDecision.ESCALATE: penalty += 0.2 raw = score - penalty final = _clamp(raw) breakdown["score"] = final return float(final), breakdown # ─────────────────────────────────────────────────────── # TASK STRUCTURE # ─────────────────────────────────────────────────────── @dataclass class Task: task_id: str name: str description: str difficulty: str ticket: TicketObservation grader: Any max_steps: int = 1 TASKS: List[Task] = [ Task("task_easy", "Easy", "Basic classification", "easy", TICKET_EASY, grade_easy), Task("task_medium", "Medium", "Full triage", "medium", TICKET_MEDIUM, grade_medium), Task("task_hard", "Hard", "Security incident", "hard", TICKET_HARD, grade_hard), ] TASK_MAP = {t.task_id: t for t in TASKS}