Spaces:

K446
/

Opengrid

Running

K446 commited on 12 days ago

Commit

81257d9

1 Parent(s): a76abcc

Print every reward call so terminal shows continuous progress

Files changed (1) hide show

training/train_grpo.py CHANGED Viewed

@@ -261,8 +261,7 @@ def compute_grpo_reward_env(
     """
     global _REWARD_CALL_COUNT
     _REWARD_CALL_COUNT += 1
-    if _REWARD_CALL_COUNT <= 5 or _REWARD_CALL_COUNT % 100 == 0:
-        print(f"  [reward_fn] call #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
     rewards = []
     for completion, obs_dict in zip(completions, observations):

     """
     global _REWARD_CALL_COUNT
     _REWARD_CALL_COUNT += 1
+    print(f"  [reward] #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
     rewards = []
     for completion, obs_dict in zip(completions, observations):