jampuramprem commited on
Commit
1af5ac7
·
1 Parent(s): d0b62f7

Clamp final scores to strictly (0, 1) for success check

Browse files
Files changed (2) hide show
  1. server/app.py +2 -0
  2. server/environment.py +9 -7
server/app.py CHANGED
@@ -66,9 +66,11 @@ def grader():
66
  score = env.last_grader_score
67
  if score is None and env.episode_actions:
68
  score = env.compute_final_score()
 
69
  return {
70
  "task_id": env.task_id,
71
  "score": score,
 
72
  "done": env.done,
73
  "processed_count": len(env.processed_emails),
74
  "total_emails": len(env.email_queue),
 
66
  score = env.last_grader_score
67
  if score is None and env.episode_actions:
68
  score = env.compute_final_score()
69
+ success = score is not None and 0.0 < score < 1.0
70
  return {
71
  "task_id": env.task_id,
72
  "score": score,
73
+ "success": success,
74
  "done": env.done,
75
  "processed_count": len(env.processed_emails),
76
  "total_emails": len(env.email_queue),
server/environment.py CHANGED
@@ -386,14 +386,16 @@ class EmailSortingEnvironment:
386
 
387
  def compute_final_score(self) -> float:
388
  if not self.episode_actions:
389
- return 0.0
390
  if self.task_id == "email_classification":
391
- return self.email_classification_score()
392
- if self.task_id == "response_drafting":
393
- return self.response_drafting_score()
394
- if self.task_id == "support_session":
395
- return self.support_session_score()
396
- return 0.0
 
 
397
 
398
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
399
  if self.done:
 
386
 
387
  def compute_final_score(self) -> float:
388
  if not self.episode_actions:
389
+ return 0.001
390
  if self.task_id == "email_classification":
391
+ score = self.email_classification_score()
392
+ elif self.task_id == "response_drafting":
393
+ score = self.response_drafting_score()
394
+ elif self.task_id == "support_session":
395
+ score = self.support_session_score()
396
+ else:
397
+ return 0.001
398
+ return round(max(0.001, min(0.999, score)), 3)
399
 
400
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
401
  if self.done: