kevanthonyP commited on
Commit
ec4ef2f
Β·
verified Β·
1 Parent(s): 8fc6354

Update env_tasks.py

Browse files
Files changed (1) hide show
  1. env_tasks.py +86 -86
env_tasks.py CHANGED
@@ -1,7 +1,18 @@
1
- # ─── Graders ─────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def _clamp(score: float) -> float:
4
- """Ensure score is strictly between (0,1)"""
5
  if score >= 1.0:
6
  return 0.95
7
  elif score <= 0.0:
@@ -9,131 +20,120 @@ def _clamp(score: float) -> float:
9
  return score
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
13
- breakdown: Dict[str, Any] = {}
14
  score = 0.0
 
15
 
16
- correct_category = action.category == TicketCategory.HARDWARE
17
- breakdown["category_correct"] = correct_category
18
- if correct_category:
19
- score += 0.60
20
 
21
- acceptable_priorities = {TicketPriority.HIGH, TicketPriority.CRITICAL}
22
- priority_ok = action.priority in acceptable_priorities
23
- breakdown["priority_correct"] = priority_ok
24
- if priority_ok:
25
  score += 0.25
26
 
27
- response_ok = (
28
- len(action.response) >= 30 and
29
- any(kw in action.response.lower() for kw in
30
- ["laptop", "boot", "restart", "hardware", "technician", "engineer", "update"])
31
- )
32
- breakdown["response_quality_ok"] = response_ok
33
- if response_ok:
34
  score += 0.15
35
 
36
  score = _clamp(score)
37
- breakdown["final_score"] = round(score, 4)
38
- return round(score, 4), breakdown
39
 
40
 
41
  def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
42
- breakdown: Dict[str, Any] = {}
43
  score = 0.0
 
44
 
45
- correct_cat = action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}
46
- breakdown["category_correct"] = correct_cat
47
- if correct_cat:
48
  score += 0.25
49
 
50
- correct_pri = action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}
51
- breakdown["priority_correct"] = correct_pri
52
- if correct_pri:
53
  score += 0.25
54
 
55
- correct_dept = action.department in {Department.SYSADMIN, Department.TIER2_SUPPORT}
56
- breakdown["department_correct"] = correct_dept
57
- if correct_dept:
58
  score += 0.25
59
 
60
- correct_esc = action.escalate == EscalationDecision.ESCALATE
61
- breakdown["escalation_correct"] = correct_esc
62
- if correct_esc:
63
- score += 0.10
64
-
65
- response_ok = (
66
- len(action.response) >= 50 and
67
- any(kw in action.response.lower() for kw in
68
- ["team", "shared", "drive", "access", "investigating", "priority", "urgent"])
69
- )
70
- breakdown["response_quality_ok"] = response_ok
71
- if response_ok:
72
  score += 0.15
73
 
74
  score = _clamp(score)
75
- breakdown["final_score"] = round(score, 4)
76
- return round(score, 4), breakdown
77
 
78
 
79
  def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
80
- breakdown: Dict[str, Any] = {}
81
  score = 0.0
82
  penalty = 0.0
 
83
 
84
- correct_cat = action.category == TicketCategory.SECURITY
85
- breakdown["category_correct"] = correct_cat
86
- if correct_cat:
87
- score += 0.20
88
 
89
- correct_pri = action.priority == TicketPriority.CRITICAL
90
- breakdown["priority_correct"] = correct_pri
91
- if correct_pri:
92
- score += 0.20
93
 
94
- correct_dept = action.department == Department.SECURITY_TEAM
95
- breakdown["department_correct"] = correct_dept
96
- if correct_dept:
97
- score += 0.20
98
 
99
- correct_esc = action.escalate == EscalationDecision.ESCALATE
100
- breakdown["escalation_correct"] = correct_esc
101
- if correct_esc:
102
- score += 0.10
103
 
104
- resp_lower = action.response.lower()
105
 
106
- disconnect_mentioned = any(kw in resp_lower for kw in
107
- ["disconnect", "unplug", "network", "ethernet", "wifi", "internet", "isolate"])
108
- if disconnect_mentioned:
109
  score += 0.15
110
 
111
- no_self_recover_instruction = any(kw in resp_lower for kw in
112
- ["do not", "don't", "avoid", "do not attempt", "do not try"])
113
- if no_self_recover_instruction:
114
  score += 0.15
115
 
116
- dangerous_self_recovery = any(kw in resp_lower for kw in
117
- ["try the decryption", "use the tool", "recover yourself", "attempt recovery"])
118
- if dangerous_self_recovery:
119
- penalty += 0.40
 
 
 
120
 
121
- if not disconnect_mentioned:
122
- penalty += 0.30
123
 
124
- if not correct_esc:
125
- penalty += 0.20
126
 
127
- mentions_ransom = any(kw in resp_lower for kw in ["ransom", "btc", "bitcoin", "pay"])
128
- explicitly_says_do_not_pay = any(kw in resp_lower for kw in
129
- ["do not pay", "don't pay", "never pay"])
130
- if mentions_ransom and not explicitly_says_do_not_pay:
131
- penalty += 0.20
 
 
 
 
132
 
133
- raw_score = score - penalty
134
 
135
- # πŸ”₯ FIXED LINE (CRITICAL)
136
- final_score = max(0.05, min(0.95, raw_score))
 
 
 
137
 
138
- breakdown["final_score"] = round(final_score, 4)
139
- return round(final_score, 4), breakdown
 
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Any, Tuple, List
3
+
4
+ from env_models import (
5
+ TicketObservation,
6
+ TriageAction,
7
+ TicketCategory,
8
+ TicketPriority,
9
+ Department,
10
+ EscalationDecision,
11
+ )
12
+
13
+ # ─── Helper ─────────────────────────────────────────────
14
 
15
  def _clamp(score: float) -> float:
 
16
  if score >= 1.0:
17
  return 0.95
18
  elif score <= 0.0:
 
20
  return score
21
 
22
 
23
+ # ─── Dummy Tickets (keep minimal, your existing ones work too) ───────────────
24
+
25
+ TICKET_EASY = TicketObservation(
26
+ ticket_id="1",
27
+ subject="Laptop not working",
28
+ body="Laptop not booting",
29
+ reporter_name="User",
30
+ reporter_role="Employee",
31
+ system_info="Windows",
32
+ timestamp="now",
33
+ previous_tickets=0,
34
+ task_instruction="Classify",
35
+ valid_categories=[c.value for c in TicketCategory],
36
+ valid_priorities=[p.value for p in TicketPriority],
37
+ valid_departments=[d.value for d in Department],
38
+ )
39
+
40
+ TICKET_MEDIUM = TICKET_EASY
41
+ TICKET_HARD = TICKET_EASY
42
+
43
+
44
+ # ─── Graders ─────────────────────────────────────────────
45
+
46
  def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
47
  score = 0.0
48
+ breakdown = {}
49
 
50
+ if action.category == TicketCategory.HARDWARE:
51
+ score += 0.6
 
 
52
 
53
+ if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
 
 
 
54
  score += 0.25
55
 
56
+ if len(action.response) > 10:
 
 
 
 
 
 
57
  score += 0.15
58
 
59
  score = _clamp(score)
60
+ return score, breakdown
 
61
 
62
 
63
  def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
64
  score = 0.0
65
+ breakdown = {}
66
 
67
+ if action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}:
 
 
68
  score += 0.25
69
 
70
+ if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
 
 
71
  score += 0.25
72
 
73
+ if action.department in {Department.SYSADMIN, Department.TIER2_SUPPORT}:
 
 
74
  score += 0.25
75
 
76
+ if action.escalate == EscalationDecision.ESCALATE:
77
+ score += 0.1
78
+
79
+ if len(action.response) > 20:
 
 
 
 
 
 
 
 
80
  score += 0.15
81
 
82
  score = _clamp(score)
83
+ return score, breakdown
 
84
 
85
 
86
  def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
87
  score = 0.0
88
  penalty = 0.0
89
+ breakdown = {}
90
 
91
+ if action.category == TicketCategory.SECURITY:
92
+ score += 0.2
 
 
93
 
94
+ if action.priority == TicketPriority.CRITICAL:
95
+ score += 0.2
 
 
96
 
97
+ if action.department == Department.SECURITY_TEAM:
98
+ score += 0.2
 
 
99
 
100
+ if action.escalate == EscalationDecision.ESCALATE:
101
+ score += 0.1
 
 
102
 
103
+ resp = action.response.lower()
104
 
105
+ if "disconnect" in resp:
 
 
106
  score += 0.15
107
 
108
+ if "do not" in resp:
 
 
109
  score += 0.15
110
 
111
+ if "decrypt" in resp:
112
+ penalty += 0.4
113
+
114
+ raw = score - penalty
115
+ final = max(0.05, min(0.95, raw))
116
+
117
+ return final, breakdown
118
 
 
 
119
 
120
+ # ─── Task Registry ───────────────────────────────────────
 
121
 
122
+ @dataclass
123
+ class Task:
124
+ task_id: str
125
+ name: str
126
+ description: str
127
+ difficulty: str
128
+ ticket: TicketObservation
129
+ grader: Any
130
+ max_steps: int = 1
131
 
 
132
 
133
+ TASKS: List[Task] = [
134
+ Task("task_easy", "Easy", "Basic classification", "easy", TICKET_EASY, grade_easy),
135
+ Task("task_medium", "Medium", "Full triage", "medium", TICKET_MEDIUM, grade_medium),
136
+ Task("task_hard", "Hard", "Security incident", "hard", TICKET_HARD, grade_hard),
137
+ ]
138
 
139
+ TASK_MAP = {t.task_id: t for t in TASKS}