K446 commited on
Commit
81257d9
·
1 Parent(s): a76abcc

Print every reward call so terminal shows continuous progress

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +1 -2
training/train_grpo.py CHANGED
@@ -261,8 +261,7 @@ def compute_grpo_reward_env(
261
  """
262
  global _REWARD_CALL_COUNT
263
  _REWARD_CALL_COUNT += 1
264
- if _REWARD_CALL_COUNT <= 5 or _REWARD_CALL_COUNT % 100 == 0:
265
- print(f" [reward_fn] call #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
266
 
267
  rewards = []
268
  for completion, obs_dict in zip(completions, observations):
 
261
  """
262
  global _REWARD_CALL_COUNT
263
  _REWARD_CALL_COUNT += 1
264
+ print(f" [reward] #{_REWARD_CALL_COUNT} | n={len(completions)}", flush=True)
 
265
 
266
  rewards = []
267
  for completion, obs_dict in zip(completions, observations):