File size: 4,796 Bytes
485c977
 
 
 
 
 
 
 
 
 
 
 
e2d9024
 
 
8fc6354
e2d9024
3dba410
485c977
e2d9024
 
 
485c977
e2d9024
 
 
485c977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2d9024
 
 
3dba410
 
485c977
3dba410
ec4ef2f
485c977
3dba410
ec4ef2f
3dba410
 
e2d9024
3dba410
 
8fc6354
485c977
e2d9024
3dba410
 
 
 
485c977
3dba410
ec4ef2f
3dba410
 
ec4ef2f
3dba410
 
ec4ef2f
3dba410
 
ec4ef2f
485c977
ec4ef2f
e2d9024
3dba410
 
8fc6354
485c977
e2d9024
3dba410
 
 
 
 
485c977
3dba410
ec4ef2f
485c977
3dba410
ec4ef2f
485c977
3dba410
ec4ef2f
485c977
3dba410
ec4ef2f
485c977
3dba410
e2d9024
3dba410
485c977
3dba410
 
e2d9024
3dba410
 
e2d9024
 
485c977
3dba410
f188811
485c977
3dba410
f188811
485c977
3dba410
f188811
3dba410
e2d9024
485c977
e2d9024
485c977
 
e2d9024
 
 
485c977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from dataclasses import dataclass
from typing import Dict, Any, Tuple, List

from env_models import (
    TicketObservation,
    TriageAction,
    TicketCategory,
    TicketPriority,
    Department,
    EscalationDecision,
)

# ───────────────────────────────────────────────────────
# STRICT CLAMP (NO EDGE FAILURES)
# ───────────────────────────────────────────────────────
def _clamp(score: float) -> float:
    return max(0.01, min(0.99, float(score)))


# ───────────────────────────────────────────────────────
# TICKETS
# ───────────────────────────────────────────────────────
TICKET_EASY = TicketObservation(
    ticket_id="T1",
    subject="Laptop not booting",
    body="My laptop shows black screen",
    reporter_name="User",
    reporter_role="Employee",
    system_info="Windows",
    timestamp="now",
    previous_tickets=0,
    task_instruction="Classify issue",
    valid_categories=[c.value for c in TicketCategory],
    valid_priorities=[p.value for p in TicketPriority],
    valid_departments=[d.value for d in Department],
)

TICKET_MEDIUM = TICKET_EASY
TICKET_HARD = TICKET_EASY


# ───────────────────────────────────────────────────────
# GRADERS
# ───────────────────────────────────────────────────────
def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
    score = 0.0
    breakdown = {}

    if action.category == TicketCategory.HARDWARE:
        score += 0.6

    if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
        score += 0.25

    if isinstance(action.response, str) and len(action.response) > 10:
        score += 0.15

    score = _clamp(score)
    breakdown["score"] = score
    return float(score), breakdown


def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
    score = 0.0
    breakdown = {}

    if action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}:
        score += 0.25

    if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
        score += 0.25

    if action.department in {Department.SYSADMIN, Department.TIER2_SUPPORT}:
        score += 0.25

    if action.escalate == EscalationDecision.ESCALATE:
        score += 0.1

    if isinstance(action.response, str) and len(action.response) > 20:
        score += 0.15

    score = _clamp(score)
    breakdown["score"] = score
    return float(score), breakdown


def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
    score = 0.0
    penalty = 0.0
    breakdown = {}

    if action.category == TicketCategory.SECURITY:
        score += 0.2

    if action.priority == TicketPriority.CRITICAL:
        score += 0.2

    if action.department == Department.SECURITY_TEAM:
        score += 0.2

    if action.escalate == EscalationDecision.ESCALATE:
        score += 0.1

    resp = (action.response or "").lower()

    if "disconnect" in resp:
        score += 0.15

    if "do not" in resp or "don't" in resp:
        score += 0.15

    # penalties
    if "decrypt" in resp or "recover" in resp:
        penalty += 0.4

    if "disconnect" not in resp:
        penalty += 0.3

    if action.escalate != EscalationDecision.ESCALATE:
        penalty += 0.2

    raw = score - penalty

    final = _clamp(raw)
    breakdown["score"] = final
    return float(final), breakdown


# ───────────────────────────────────────────────────────
# TASK STRUCTURE
# ───────────────────────────────────────────────────────
@dataclass
class Task:
    task_id: str
    name: str
    description: str
    difficulty: str
    ticket: TicketObservation
    grader: Any
    max_steps: int = 1


TASKS: List[Task] = [
    Task("task_easy", "Easy", "Basic classification", "easy", TICKET_EASY, grade_easy),
    Task("task_medium", "Medium", "Full triage", "medium", TICKET_MEDIUM, grade_medium),
    Task("task_hard", "Hard", "Security incident", "hard", TICKET_HARD, grade_hard),
]

TASK_MAP = {t.task_id: t for t in TASKS}