Spaces:

jampuramprem
/

sieve

Sleeping

jampuramprem commited on Apr 7

Commit

1af5ac7

1 Parent(s): d0b62f7

Clamp final scores to strictly (0, 1) for success check

Files changed (2) hide show

server/app.py CHANGED Viewed

@@ -66,9 +66,11 @@ def grader():
     score = env.last_grader_score
     if score is None and env.episode_actions:
         score = env.compute_final_score()
     return {
         "task_id": env.task_id,
         "score": score,
         "done": env.done,
         "processed_count": len(env.processed_emails),
         "total_emails": len(env.email_queue),

     score = env.last_grader_score
     if score is None and env.episode_actions:
         score = env.compute_final_score()
+    success = score is not None and 0.0 < score < 1.0
     return {
         "task_id": env.task_id,
         "score": score,
+        "success": success,
         "done": env.done,
         "processed_count": len(env.processed_emails),
         "total_emails": len(env.email_queue),

server/environment.py CHANGED Viewed

@@ -386,14 +386,16 @@ class EmailSortingEnvironment:
     def compute_final_score(self) -> float:
         if not self.episode_actions:
-            return 0.0
         if self.task_id == "email_classification":
-            return self.email_classification_score()
-        if self.task_id == "response_drafting":
-            return self.response_drafting_score()
-        if self.task_id == "support_session":
-            return self.support_session_score()
-        return 0.0
     def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
         if self.done:

     def compute_final_score(self) -> float:
         if not self.episode_actions:
+            return 0.001
         if self.task_id == "email_classification":
+            score = self.email_classification_score()
+        elif self.task_id == "response_drafting":
+            score = self.response_drafting_score()
+        elif self.task_id == "support_session":
+            score = self.support_session_score()
+        else:
+            return 0.001
+        return round(max(0.001, min(0.999, score)), 3)
     def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
         if self.done: