vicky1428 commited on
Commit
b9ad6f9
·
verified ·
1 Parent(s): dab441f

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. inference.py +5 -3
  3. models.py +2 -2
  4. server/environment.py +20 -11
README.md CHANGED
@@ -307,9 +307,9 @@ When an OpenAI-compatible endpoint is available, the script uses the OpenAI clie
307
 
308
  Deterministic fallback baseline on bundled tasks:
309
 
310
- - `email_easy`: `1.00`
311
- - `email_medium`: `1.00`
312
- - `email_hard`: `1.00`
313
 
314
  ## Hugging Face Spaces
315
 
 
307
 
308
  Deterministic fallback baseline on bundled tasks:
309
 
310
+ - `email_easy`: `0.99`
311
+ - `email_medium`: `0.99`
312
+ - `email_hard`: `0.99`
313
 
314
  ## Hugging Face Spaces
315
 
inference.py CHANGED
@@ -37,6 +37,8 @@ MAX_STEPS = 12
37
  TEMPERATURE = 0.4
38
  MAX_TOKENS = 25000
39
  SUCCESS_SCORE_THRESHOLD = 0.95
 
 
40
 
41
  @dataclass
42
  class LocalStepResult:
@@ -80,8 +82,8 @@ def sanitize(value: Any) -> str:
80
 
81
 
82
  def clamp_score(score: float) -> float:
83
- """Clamp score into [0, 1]."""
84
- return min(max(score, 0.0), 1.0)
85
 
86
 
87
  def compact_action(action: Optional[SupportAction]) -> str:
@@ -281,7 +283,7 @@ async def run_episode(task_id: str, client: Optional[OpenAI]) -> None:
281
  history: List[str] = []
282
  rewards: List[float] = []
283
  steps_taken = 0
284
- score = 0.0
285
  success = False
286
  action_for_log: Optional[SupportAction] = None
287
 
 
37
  TEMPERATURE = 0.4
38
  MAX_TOKENS = 25000
39
  SUCCESS_SCORE_THRESHOLD = 0.95
40
+ MIN_SCORE = 0.01
41
+ MAX_SCORE = 0.99
42
 
43
  @dataclass
44
  class LocalStepResult:
 
82
 
83
 
84
  def clamp_score(score: float) -> float:
85
+ """Clamp score into the open interval (0, 1)."""
86
+ return min(max(score, MIN_SCORE), MAX_SCORE)
87
 
88
 
89
  def compact_action(action: Optional[SupportAction]) -> str:
 
283
  history: List[str] = []
284
  rewards: List[float] = []
285
  steps_taken = 0
286
+ score = MIN_SCORE
287
  success = False
288
  action_for_log: Optional[SupportAction] = None
289
 
models.py CHANGED
@@ -77,7 +77,7 @@ class SupportObservation(Observation):
77
  description="Compact summaries of prior attempts in the episode.",
78
  )
79
  feedback: str = Field(default="", description="Step-level grader feedback.")
80
- score: float = Field(default=0.0, description="Current cumulative score.")
81
  attempts_remaining: int = Field(
82
  default=0,
83
  description="How many attempts remain before the episode ends.",
@@ -89,6 +89,6 @@ class SupportState(State):
89
 
90
  task_id: str | None = None
91
  difficulty: str | None = None
92
- score: float = 0.0
93
  matched_fields: List[str] = Field(default_factory=list)
94
  attempts_remaining: int = 0
 
77
  description="Compact summaries of prior attempts in the episode.",
78
  )
79
  feedback: str = Field(default="", description="Step-level grader feedback.")
80
+ score: float = Field(default=0.01, description="Current cumulative score.")
81
  attempts_remaining: int = Field(
82
  default=0,
83
  description="How many attempts remain before the episode ends.",
 
89
 
90
  task_id: str | None = None
91
  difficulty: str | None = None
92
+ score: float = 0.01
93
  matched_fields: List[str] = Field(default_factory=list)
94
  attempts_remaining: int = 0
server/environment.py CHANGED
@@ -37,6 +37,8 @@ class SupermailEnvironment(Environment):
37
  """Deterministic customer support email triage environment."""
38
 
39
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
 
 
40
 
41
  def __init__(self, task_id: str | None = None):
42
  self._requested_task_id = task_id
@@ -45,8 +47,12 @@ class SupermailEnvironment(Environment):
45
  self._task: TaskDefinition | None = None
46
  self._matched_fields: set[str] = set()
47
  self._history: list[str] = []
48
- self._score = 0.0
49
- self._state = SupportState(episode_id=str(uuid4()), step_count=0)
 
 
 
 
50
 
51
  @property
52
  def benchmark(self) -> str:
@@ -65,13 +71,13 @@ class SupermailEnvironment(Environment):
65
  self._task = self._select_task()
66
  self._matched_fields = set()
67
  self._history = []
68
- self._score = 0.0
69
  self._state = SupportState(
70
  episode_id=str(uuid4()),
71
  step_count=0,
72
  task_id=self._task.task_id,
73
  difficulty=self._task.difficulty,
74
- score=0.0,
75
  matched_fields=[],
76
  attempts_remaining=self._task.max_attempts,
77
  )
@@ -143,6 +149,14 @@ class SupermailEnvironment(Environment):
143
  decision[field_name] = value
144
  return decision
145
 
 
 
 
 
 
 
 
 
146
  def _assess(self, decision: dict[str, str]) -> StepAssessment:
147
  if self._task is None:
148
  raise RuntimeError("Task not initialized.")
@@ -186,13 +200,8 @@ class SupermailEnvironment(Environment):
186
  if self._state.step_count > 3 and matched_fields != set(self._task.required_fields):
187
  reward -= 0.05
188
 
189
- score = round(
190
- min(
191
- 1.0,
192
- sum(self._task.field_weights[field] for field in matched_fields),
193
- ),
194
- 2,
195
- )
196
 
197
  success = matched_fields == set(self._task.required_fields)
198
  done = success or self._state.step_count >= self._task.max_attempts
 
37
  """Deterministic customer support email triage environment."""
38
 
39
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
40
+ MIN_SCORE: float = 0.01
41
+ MAX_SCORE: float = 0.99
42
 
43
  def __init__(self, task_id: str | None = None):
44
  self._requested_task_id = task_id
 
47
  self._task: TaskDefinition | None = None
48
  self._matched_fields: set[str] = set()
49
  self._history: list[str] = []
50
+ self._score = self._bounded_score(0.0)
51
+ self._state = SupportState(
52
+ episode_id=str(uuid4()),
53
+ step_count=0,
54
+ score=self._score,
55
+ )
56
 
57
  @property
58
  def benchmark(self) -> str:
 
71
  self._task = self._select_task()
72
  self._matched_fields = set()
73
  self._history = []
74
+ self._score = self._bounded_score(0.0)
75
  self._state = SupportState(
76
  episode_id=str(uuid4()),
77
  step_count=0,
78
  task_id=self._task.task_id,
79
  difficulty=self._task.difficulty,
80
+ score=self._score,
81
  matched_fields=[],
82
  attempts_remaining=self._task.max_attempts,
83
  )
 
149
  decision[field_name] = value
150
  return decision
151
 
152
+ def _bounded_score(self, raw_score: float) -> float:
153
+ """Map raw progress into the open interval (0, 1)."""
154
+ clamped_raw_score = min(max(raw_score, 0.0), 1.0)
155
+ scaled_score = self.MIN_SCORE + (
156
+ clamped_raw_score * (self.MAX_SCORE - self.MIN_SCORE)
157
+ )
158
+ return round(scaled_score, 2)
159
+
160
  def _assess(self, decision: dict[str, str]) -> StepAssessment:
161
  if self._task is None:
162
  raise RuntimeError("Task not initialized.")
 
200
  if self._state.step_count > 3 and matched_fields != set(self._task.required_fields):
201
  reward -= 0.05
202
 
203
+ raw_score = sum(self._task.field_weights[field] for field in matched_fields)
204
+ score = self._bounded_score(raw_score)
 
 
 
 
 
205
 
206
  success = matched_fields == set(self._task.required_fields)
207
  done = success or self._state.step_count >= self._task.max_attempts