shank commited on
Commit ·
8b16369
1
Parent(s): a5c67b3
Fix GRPOConfig: rename max_new_tokens to max_completion_length for trl==0.14.0
Browse files- training/train_grpo.py +1 -1
training/train_grpo.py
CHANGED
|
@@ -413,7 +413,7 @@ config = GRPOConfig(
|
|
| 413 |
lr_scheduler_type="cosine",
|
| 414 |
warmup_steps=20 if args.test else 40,
|
| 415 |
num_generations=8, # GRPO key: more rollouts = stronger learning signal (was 4)
|
| 416 |
-
|
| 417 |
temperature=0.9, # slightly higher temp = more diverse rollouts for GRPO
|
| 418 |
logging_steps=5 if args.test else 5, # log every 5 steps for dense W&B curve
|
| 419 |
save_steps=50 if args.test else 100,
|
|
|
|
| 413 |
lr_scheduler_type="cosine",
|
| 414 |
warmup_steps=20 if args.test else 40,
|
| 415 |
num_generations=8, # GRPO key: more rollouts = stronger learning signal (was 4)
|
| 416 |
+
max_completion_length=512, # longer responses = more complete fixes (was 400)
|
| 417 |
temperature=0.9, # slightly higher temp = more diverse rollouts for GRPO
|
| 418 |
logging_steps=5 if args.test else 5, # log every 5 steps for dense W&B curve
|
| 419 |
save_steps=50 if args.test else 100,
|