Print every reward call so terminal shows continuous progress
Browse files- training/train_grpo.py +1 -2
training/train_grpo.py
CHANGED
|
@@ -261,8 +261,7 @@ def compute_grpo_reward_env(
|
|
| 261 |
"""
|
| 262 |
global _REWARD_CALL_COUNT
|
| 263 |
_REWARD_CALL_COUNT += 1
|
| 264 |
-
|
| 265 |
-
print(f" [reward_fn] call #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
|
| 266 |
|
| 267 |
rewards = []
|
| 268 |
for completion, obs_dict in zip(completions, observations):
|
|
|
|
| 261 |
"""
|
| 262 |
global _REWARD_CALL_COUNT
|
| 263 |
_REWARD_CALL_COUNT += 1
|
| 264 |
+
print(f" [reward] #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
|
|
|
|
| 265 |
|
| 266 |
rewards = []
|
| 267 |
for completion, obs_dict in zip(completions, observations):
|