modelbuilderhq commited on
Commit
e0f127e
·
verified ·
1 Parent(s): 2ade2c6

Delete graders.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. graders.py +0 -178
graders.py DELETED
@@ -1,178 +0,0 @@
1
- """Deterministic graders and reward helpers for SupportDesk."""
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- from dataclasses import dataclass
7
-
8
- from models import SupportCaseProgress
9
- from tasks import SupportTaskSpec, get_task
10
-
11
- STRICT_SCORE_EPSILON = 0.01
12
-
13
-
14
- @dataclass(frozen=True)
15
- class GradeBreakdown:
16
- """A scored view of how close a case is to the gold solution."""
17
-
18
- total_score: float
19
- queue_score: float
20
- priority_score: float
21
- issue_type_score: float
22
- requested_fields_score: float
23
- reply_score: float
24
- note_score: float
25
- status_score: float
26
- resolution_score: float
27
- completed_milestones: tuple[str, ...]
28
-
29
-
30
- def _normalize(text: str | None) -> str:
31
- if not text:
32
- return ""
33
- normalized = text.lower().replace("-", " ")
34
- return re.sub(r"[^a-z0-9\s]", " ", normalized)
35
-
36
-
37
- def _marker_group_score(text: str | None, marker_groups: tuple[tuple[str, ...], ...]) -> float:
38
- if not marker_groups:
39
- return 1.0
40
-
41
- normalized = _normalize(text)
42
- if not normalized:
43
- return 0.0
44
-
45
- matches = 0
46
- for group in marker_groups:
47
- if any(_normalize(marker) in normalized for marker in group):
48
- matches += 1
49
- return matches / len(marker_groups)
50
-
51
-
52
- def _requested_fields_score(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
53
- required = set(task.required_requested_fields)
54
- requested = set(case.requested_fields)
55
-
56
- if not required:
57
- return 1.0 if not requested else 0.0
58
- if not requested:
59
- return 0.0
60
-
61
- matched = len(required.intersection(requested))
62
- extras = len(requested.difference(required))
63
- raw = matched / len(required)
64
- penalty = min(0.25, extras * 0.05)
65
- return max(0.0, raw - penalty)
66
-
67
-
68
- def _reply_penalty(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
69
- text = _normalize(case.reply)
70
- if not text:
71
- return 0.0
72
- return 0.0 if not any(_normalize(marker) in text for marker in task.forbidden_reply_markers) else 0.5
73
-
74
-
75
- def _strict_open_unit_interval(score: float) -> float:
76
- """Keep final task scores strictly within (0, 1) for evaluator compatibility."""
77
-
78
- return min(1.0 - STRICT_SCORE_EPSILON, max(STRICT_SCORE_EPSILON, score))
79
-
80
-
81
- def grade_case(task: SupportTaskSpec, case: SupportCaseProgress) -> GradeBreakdown:
82
- """Score a case deterministically with total_score strictly inside (0, 1)."""
83
-
84
- queue_score = 1.0 if case.queue == task.gold_queue else 0.0
85
- priority_score = 1.0 if case.priority == task.gold_priority else 0.0
86
- issue_type_score = 1.0 if case.issue_type == task.gold_issue_type else 0.0
87
- requested_fields_score = _requested_fields_score(case, task)
88
- reply_score = max(0.0, _marker_group_score(case.reply, task.required_reply_markers) - _reply_penalty(case, task))
89
- note_score = _marker_group_score(case.internal_note, task.required_note_markers)
90
- status_score = 1.0 if case.status == task.gold_status else 0.0
91
- resolution_score = 1.0 if case.resolution_code == task.gold_resolution_code else 0.0
92
-
93
- weighted_total = (
94
- queue_score * 0.15
95
- + priority_score * 0.10
96
- + issue_type_score * 0.10
97
- + requested_fields_score * 0.15
98
- + reply_score * 0.25
99
- + note_score * 0.10
100
- + status_score * 0.10
101
- + resolution_score * 0.05
102
- )
103
-
104
- milestones: list[str] = []
105
- if queue_score:
106
- milestones.append("queue")
107
- if priority_score:
108
- milestones.append("priority")
109
- if issue_type_score:
110
- milestones.append("issue_type")
111
- if requested_fields_score >= 0.99:
112
- milestones.append("requested_fields")
113
- if reply_score >= 0.99:
114
- milestones.append("reply")
115
- if note_score >= 0.99:
116
- milestones.append("internal_note")
117
- if status_score:
118
- milestones.append("status")
119
- if resolution_score:
120
- milestones.append("resolution_code")
121
-
122
- return GradeBreakdown(
123
- total_score=round(_strict_open_unit_interval(weighted_total), 4),
124
- queue_score=queue_score,
125
- priority_score=priority_score,
126
- issue_type_score=issue_type_score,
127
- requested_fields_score=round(requested_fields_score, 4),
128
- reply_score=round(reply_score, 4),
129
- note_score=round(note_score, 4),
130
- status_score=status_score,
131
- resolution_score=resolution_score,
132
- completed_milestones=tuple(milestones),
133
- )
134
-
135
-
136
- def grade_task_id(task_id: str, case: SupportCaseProgress) -> GradeBreakdown:
137
- """Convenience wrapper used by tests and evaluation scripts."""
138
-
139
- return grade_case(get_task(task_id), case)
140
-
141
-
142
- class _TaskSpecificGrader:
143
- """Importable task-specific grader wrapper for validator task discovery."""
144
-
145
- task_id: str = ""
146
-
147
- def grade(self, case: SupportCaseProgress) -> float:
148
- return grade_task_id(self.task_id, case).total_score
149
-
150
- def __call__(self, case: SupportCaseProgress) -> float:
151
- return self.grade(case)
152
-
153
-
154
- class BillingRefundEasyGrader(_TaskSpecificGrader):
155
- task_id = "billing_refund_easy"
156
-
157
-
158
- class AccountTakeoverMediumGrader(_TaskSpecificGrader):
159
- task_id = "account_takeover_medium"
160
-
161
-
162
- class ApiIncidentHardGrader(_TaskSpecificGrader):
163
- task_id = "api_incident_hard"
164
-
165
-
166
- class RegulatedExportExceptionHardGrader(_TaskSpecificGrader):
167
- task_id = "regulated_export_exception_hard"
168
-
169
-
170
- __all__ = [
171
- "AccountTakeoverMediumGrader",
172
- "ApiIncidentHardGrader",
173
- "BillingRefundEasyGrader",
174
- "GradeBreakdown",
175
- "RegulatedExportExceptionHardGrader",
176
- "grade_case",
177
- "grade_task_id",
178
- ]