Spaces:
Sleeping
Sleeping
Commit ·
1af5ac7
1
Parent(s): d0b62f7
Clamp final scores to strictly (0, 1) for success check
Browse files- server/app.py +2 -0
- server/environment.py +9 -7
server/app.py
CHANGED
|
@@ -66,9 +66,11 @@ def grader():
|
|
| 66 |
score = env.last_grader_score
|
| 67 |
if score is None and env.episode_actions:
|
| 68 |
score = env.compute_final_score()
|
|
|
|
| 69 |
return {
|
| 70 |
"task_id": env.task_id,
|
| 71 |
"score": score,
|
|
|
|
| 72 |
"done": env.done,
|
| 73 |
"processed_count": len(env.processed_emails),
|
| 74 |
"total_emails": len(env.email_queue),
|
|
|
|
| 66 |
score = env.last_grader_score
|
| 67 |
if score is None and env.episode_actions:
|
| 68 |
score = env.compute_final_score()
|
| 69 |
+
success = score is not None and 0.0 < score < 1.0
|
| 70 |
return {
|
| 71 |
"task_id": env.task_id,
|
| 72 |
"score": score,
|
| 73 |
+
"success": success,
|
| 74 |
"done": env.done,
|
| 75 |
"processed_count": len(env.processed_emails),
|
| 76 |
"total_emails": len(env.email_queue),
|
server/environment.py
CHANGED
|
@@ -386,14 +386,16 @@ class EmailSortingEnvironment:
|
|
| 386 |
|
| 387 |
def compute_final_score(self) -> float:
|
| 388 |
if not self.episode_actions:
|
| 389 |
-
return 0.
|
| 390 |
if self.task_id == "email_classification":
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
|
|
|
|
|
|
| 397 |
|
| 398 |
def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
|
| 399 |
if self.done:
|
|
|
|
| 386 |
|
| 387 |
def compute_final_score(self) -> float:
|
| 388 |
if not self.episode_actions:
|
| 389 |
+
return 0.001
|
| 390 |
if self.task_id == "email_classification":
|
| 391 |
+
score = self.email_classification_score()
|
| 392 |
+
elif self.task_id == "response_drafting":
|
| 393 |
+
score = self.response_drafting_score()
|
| 394 |
+
elif self.task_id == "support_session":
|
| 395 |
+
score = self.support_session_score()
|
| 396 |
+
else:
|
| 397 |
+
return 0.001
|
| 398 |
+
return round(max(0.001, min(0.999, score)), 3)
|
| 399 |
|
| 400 |
def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict]:
|
| 401 |
if self.done:
|