shank commited on
Commit
8b16369
·
1 Parent(s): a5c67b3

Fix GRPOConfig: rename max_new_tokens to max_completion_length for trl==0.14.0

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +1 -1
training/train_grpo.py CHANGED
@@ -413,7 +413,7 @@ config = GRPOConfig(
413
  lr_scheduler_type="cosine",
414
  warmup_steps=20 if args.test else 40,
415
  num_generations=8, # GRPO key: more rollouts = stronger learning signal (was 4)
416
- max_new_tokens=512, # longer responses = more complete fixes (was 400)
417
  temperature=0.9, # slightly higher temp = more diverse rollouts for GRPO
418
  logging_steps=5 if args.test else 5, # log every 5 steps for dense W&B curve
419
  save_steps=50 if args.test else 100,
 
413
  lr_scheduler_type="cosine",
414
  warmup_steps=20 if args.test else 40,
415
  num_generations=8, # GRPO key: more rollouts = stronger learning signal (was 4)
416
+ max_completion_length=512, # longer responses = more complete fixes (was 400)
417
  temperature=0.9, # slightly higher temp = more diverse rollouts for GRPO
418
  logging_steps=5 if args.test else 5, # log every 5 steps for dense W&B curve
419
  save_steps=50 if args.test else 100,