shank commited on
Commit ·
9487853
1
Parent(s): 6bf2fbb
Reduce max_completion_length to 160 for T4 speed: target 1000 steps in <8hrs
Browse files- training/train_grpo.py +1 -1
training/train_grpo.py
CHANGED
|
@@ -413,7 +413,7 @@ config = GRPOConfig(
|
|
| 413 |
lr_scheduler_type="cosine",
|
| 414 |
warmup_steps=10 if args.test else 30,
|
| 415 |
num_generations=4, # P100: halved from 8 to fit in 16GB
|
| 416 |
-
max_completion_length=
|
| 417 |
temperature=0.9,
|
| 418 |
logging_steps=5,
|
| 419 |
save_steps=50 if args.test else 50,
|
|
|
|
| 413 |
lr_scheduler_type="cosine",
|
| 414 |
warmup_steps=10 if args.test else 30,
|
| 415 |
num_generations=4, # P100: halved from 8 to fit in 16GB
|
| 416 |
+
max_completion_length=160, # T4: shorter completions = faster generation (bottleneck)
|
| 417 |
temperature=0.9,
|
| 418 |
logging_steps=5,
|
| 419 |
save_steps=50 if args.test else 50,
|