kevanthonyP commited on
Commit
485c977
Β·
verified Β·
1 Parent(s): f188811

Update env_tasks.py

Browse files
Files changed (1) hide show
  1. env_tasks.py +78 -26
env_tasks.py CHANGED
@@ -1,7 +1,18 @@
1
- # ─── Helper ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def _clamp(score: float) -> float:
4
- # ALWAYS force strict (0,1)
5
  if score >= 1.0:
6
  return 0.99
7
  if score <= 0.0:
@@ -9,29 +20,50 @@ def _clamp(score: float) -> float:
9
  return score
10
 
11
 
12
- # ─── Graders ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
15
- breakdown: Dict[str, Any] = {}
16
  score = 0.0
 
17
 
18
  if action.category == TicketCategory.HARDWARE:
19
- score += 0.60
20
 
21
  if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
22
  score += 0.25
23
 
24
- if len(action.response) >= 30:
25
  score += 0.15
26
 
27
  score = _clamp(score)
28
- breakdown["final_score"] = round(score, 4)
29
  return score, breakdown
30
 
31
 
32
  def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
33
- breakdown: Dict[str, Any] = {}
34
  score = 0.0
 
35
 
36
  if action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}:
37
  score += 0.25
@@ -43,54 +75,74 @@ def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
43
  score += 0.25
44
 
45
  if action.escalate == EscalationDecision.ESCALATE:
46
- score += 0.10
47
 
48
- if len(action.response) >= 50:
49
  score += 0.15
50
 
51
  score = _clamp(score)
52
- breakdown["final_score"] = round(score, 4)
53
  return score, breakdown
54
 
55
 
56
  def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
57
- breakdown: Dict[str, Any] = {}
58
  score = 0.0
59
  penalty = 0.0
 
60
 
61
  if action.category == TicketCategory.SECURITY:
62
- score += 0.20
63
 
64
  if action.priority == TicketPriority.CRITICAL:
65
- score += 0.20
66
 
67
  if action.department == Department.SECURITY_TEAM:
68
- score += 0.20
69
 
70
  if action.escalate == EscalationDecision.ESCALATE:
71
- score += 0.10
72
 
73
  resp = action.response.lower()
74
 
75
- if any(k in resp for k in ["disconnect", "unplug", "network"]):
76
  score += 0.15
77
 
78
- if any(k in resp for k in ["do not", "don't", "avoid"]):
79
  score += 0.15
80
 
81
- if "decrypt" in resp or "recover" in resp:
82
- penalty += 0.40
83
 
84
  if "disconnect" not in resp:
85
- penalty += 0.30
86
 
87
  if action.escalate != EscalationDecision.ESCALATE:
88
- penalty += 0.20
89
 
90
  raw = score - penalty
91
-
92
- # πŸ”₯ CRITICAL FIX
93
  final = _clamp(raw)
94
 
95
- breakdown["final_score"] = round(final, 4)
96
- return final, breakdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Any, Tuple, List
3
+
4
+ from env_models import (
5
+ TicketObservation,
6
+ TriageAction,
7
+ TicketCategory,
8
+ TicketPriority,
9
+ Department,
10
+ EscalationDecision,
11
+ )
12
+
13
+ # ─── SAFE CLAMP ─────────────────────────────────────────
14
 
15
  def _clamp(score: float) -> float:
 
16
  if score >= 1.0:
17
  return 0.99
18
  if score <= 0.0:
 
20
  return score
21
 
22
 
23
+ # ─── TICKETS ───────────────────────────────────────────
24
+
25
+ TICKET_EASY = TicketObservation(
26
+ ticket_id="1",
27
+ subject="Laptop not working",
28
+ body="Laptop won't boot",
29
+ reporter_name="User",
30
+ reporter_role="Employee",
31
+ system_info="Windows",
32
+ timestamp="now",
33
+ previous_tickets=0,
34
+ task_instruction="Classify issue",
35
+ valid_categories=[c.value for c in TicketCategory],
36
+ valid_priorities=[p.value for p in TicketPriority],
37
+ valid_departments=[d.value for d in Department],
38
+ )
39
+
40
+ TICKET_MEDIUM = TICKET_EASY
41
+ TICKET_HARD = TICKET_EASY
42
+
43
+
44
+ # ─── GRADERS ───────────────────────────────────────────
45
 
46
  def grade_easy(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
47
  score = 0.0
48
+ breakdown = {}
49
 
50
  if action.category == TicketCategory.HARDWARE:
51
+ score += 0.6
52
 
53
  if action.priority in {TicketPriority.HIGH, TicketPriority.CRITICAL}:
54
  score += 0.25
55
 
56
+ if len(action.response) > 10:
57
  score += 0.15
58
 
59
  score = _clamp(score)
60
+ breakdown["score"] = score
61
  return score, breakdown
62
 
63
 
64
  def grade_medium(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
65
  score = 0.0
66
+ breakdown = {}
67
 
68
  if action.category in {TicketCategory.NETWORK, TicketCategory.ACCESS}:
69
  score += 0.25
 
75
  score += 0.25
76
 
77
  if action.escalate == EscalationDecision.ESCALATE:
78
+ score += 0.1
79
 
80
+ if len(action.response) > 20:
81
  score += 0.15
82
 
83
  score = _clamp(score)
84
+ breakdown["score"] = score
85
  return score, breakdown
86
 
87
 
88
  def grade_hard(action: TriageAction) -> Tuple[float, Dict[str, Any]]:
 
89
  score = 0.0
90
  penalty = 0.0
91
+ breakdown = {}
92
 
93
  if action.category == TicketCategory.SECURITY:
94
+ score += 0.2
95
 
96
  if action.priority == TicketPriority.CRITICAL:
97
+ score += 0.2
98
 
99
  if action.department == Department.SECURITY_TEAM:
100
+ score += 0.2
101
 
102
  if action.escalate == EscalationDecision.ESCALATE:
103
+ score += 0.1
104
 
105
  resp = action.response.lower()
106
 
107
+ if "disconnect" in resp:
108
  score += 0.15
109
 
110
+ if "do not" in resp:
111
  score += 0.15
112
 
113
+ if "decrypt" in resp:
114
+ penalty += 0.4
115
 
116
  if "disconnect" not in resp:
117
+ penalty += 0.3
118
 
119
  if action.escalate != EscalationDecision.ESCALATE:
120
+ penalty += 0.2
121
 
122
  raw = score - penalty
 
 
123
  final = _clamp(raw)
124
 
125
+ breakdown["score"] = final
126
+ return final, breakdown
127
+
128
+
129
+ # ─── TASK STRUCTURE ───────────────────────────────────
130
+
131
+ @dataclass
132
+ class Task:
133
+ task_id: str
134
+ name: str
135
+ description: str
136
+ difficulty: str
137
+ ticket: TicketObservation
138
+ grader: Any
139
+ max_steps: int = 1
140
+
141
+
142
+ TASKS: List[Task] = [
143
+ Task("task_easy", "Easy", "Basic classification", "easy", TICKET_EASY, grade_easy),
144
+ Task("task_medium", "Medium", "Full triage", "medium", TICKET_MEDIUM, grade_medium),
145
+ Task("task_hard", "Hard", "Security incident", "hard", TICKET_HARD, grade_hard),
146
+ ]
147
+
148
+ TASK_MAP = {t.task_id: t for t in TASKS}