akhiilll
/

forgeenv-source

akhiilll commited on 14 days ago

Commit

9c7f0da

verified ·

1 Parent(s): 77cbc5a

fix: capture rewards/reward_repair_function/mean for plot

Files changed (1) hide show

scripts/jobs/train_repair_agent.py CHANGED Viewed

@@ -211,11 +211,28 @@ training_rewards: list[float] = []
 if trainer_state.exists():
     state = json.loads(trainer_state.read_text())
     for log in state.get("log_history", []):
-        for k in ("rewards/mean", "reward", "train/reward"):
             if k in log:
                 training_rewards.append(float(log[k]))
                 break
 print(f"[job] {len(training_rewards)} reward log points", flush=True)
 plot_reward_curve(
     training_rewards or [0.0],

 if trainer_state.exists():
     state = json.loads(trainer_state.read_text())
     for log in state.get("log_history", []):
+        # TRL emits a few different reward keys depending on version;
+        # try the most specific first, then fall back.
+        candidates = [
+            "rewards/reward_repair_function/mean",
+            "rewards/mean",
+            "reward",
+            "train/reward",
+        ]
+        # also pick up any key matching rewards/<name>/mean
+        for k in list(log.keys()):
+            if k.startswith("rewards/") and k.endswith("/mean") and k not in candidates:
+                candidates.append(k)
+        for k in candidates:
             if k in log:
                 training_rewards.append(float(log[k]))
                 break
 print(f"[job] {len(training_rewards)} reward log points", flush=True)
+if training_rewards:
+    print(
+        f"[job] reward range: {min(training_rewards):.3f}..{max(training_rewards):.3f}",
+        flush=True,
+    )
 plot_reward_curve(
     training_rewards or [0.0],