Spaces:

prashantmatlani
/

csa01

Sleeping

App Files Files Community

prashantmatlani commited on 28 days ago

Commit

b03ffc7

1 Parent(s): 5683339

modified task grader

Browse files

Files changed (1) hide show

inference.py +45 -38

inference.py CHANGED Viewed

@@ -29,32 +29,30 @@ def format_action(action: dict) -> str:
     return str(action)
-def main():
-    env = CustomerSupportEnv()
-    obs = env.reset()
-    model_name = os.getenv("MODEL_NAME", "unknown-model")
-    #model_name="llama-3.1-8b-instant"
-    api_base_url = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
-    #api_base_url = os.getenv("API_BASE_URL")
-    print(f"[CONFIG] api_base_url={api_base_url}")
-    task_name = "customer-support"
-    benchmark = "openenv"
     step_count = 0
     rewards = []
     success = False
-    # =========================
-    # START
-    # =========================
-    print(f"[START] task={task_name} env={benchmark} model={model_name}")
     try:
         done = False
@@ -76,11 +74,8 @@ def main():
             step_count += 1
             rewards.append(reward)
-            # =========================
-            # STEP
-            # =========================
             print(
-                f"[STEP] step={step_count} "
                 f"action={format_action(action)} "
                 f"reward={reward:.2f} "
                 f"done={'true' if done else 'false'} "
@@ -89,35 +84,47 @@ def main():
             obs = next_obs
-        # success from env
         success = info.get("task_success", False)
     except Exception as e:
-        # still must print END
         print(
-            f"[STEP] step={step_count+1} "
             f"action=null reward=0.00 done=true error={str(e)}"
         )
-    finally:
-        # =========================
-        # END
-        # =========================
-        rewards_str = ",".join(f"{r:.2f}" for r in rewards)
-        score = 1.0 if success else 0.0
-        #print(
-        #    f"[END] success={'true' if success else 'false'} "
-        #    f"steps={step_count} "
-        #    f"rewards={rewards_str}"
-        #)
-        print(
-        f"[END] success={'true' if success else 'false'} "
         f"steps={step_count} "
         f"score={score:.2f} "
         f"rewards={rewards_str}"
-        )
 if __name__ == "__main__":
     main()

     return str(action)
+def compute_score(success, steps, rewards):
+    """
+    Continuous score in (0,1)
+    """
+    avg_reward = sum(rewards) / max(1, len(rewards))
+    score = (
+        0.5 * (1.0 if success else 0.0) +
+        0.3 * (1 / (1 + steps)) +
+        0.2 * max(0, min(1, avg_reward))
+    )
+    # Clamp to (0,1) but not exact
+    return max(0.01, min(0.99, score))
+def run_single_task(task_id):
+    env = CustomerSupportEnv()
+    obs = env.reset()
     step_count = 0
     rewards = []
     success = False
     try:
         done = False
             step_count += 1
             rewards.append(reward)
             print(
+                f"[STEP] task={task_id} step={step_count} "
                 f"action={format_action(action)} "
                 f"reward={reward:.2f} "
                 f"done={'true' if done else 'false'} "
             obs = next_obs
         success = info.get("task_success", False)
     except Exception as e:
         print(
+            f"[STEP] task={task_id} step={step_count+1} "
             f"action=null reward=0.00 done=true error={str(e)}"
         )
+    score = compute_score(success, step_count, rewards)
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(
+        f"[END] task={task_id} "
+        f"success={'true' if success else 'false'} "
         f"steps={step_count} "
         f"score={score:.2f} "
         f"rewards={rewards_str}"
+    )
+def main():
+    model_name = os.getenv("MODEL_NAME", "unknown-model")
+    api_base_url = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+    print(f"[CONFIG] api_base_url={api_base_url}")
+    task_name = "customer-support"
+    benchmark = "openenv"
+    print(f"[START] task={task_name} env={benchmark} model={model_name}")
+    # =========================
+    # RUN MULTIPLE TASKS (IMPORTANT)
+    # =========================
+    NUM_TASKS = 3
+    for i in range(NUM_TASKS):
+        run_single_task(task_id=i + 1)
 if __name__ == "__main__":
     main()