shank commited on
Commit
9487853
·
1 Parent(s): 6bf2fbb

Reduce max_completion_length to 160 for T4 speed: target 1000 steps in <8hrs

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +1 -1
training/train_grpo.py CHANGED
@@ -413,7 +413,7 @@ config = GRPOConfig(
413
  lr_scheduler_type="cosine",
414
  warmup_steps=10 if args.test else 30,
415
  num_generations=4, # P100: halved from 8 to fit in 16GB
416
- max_completion_length=256, # P100: halved from 512 to fit in 16GB
417
  temperature=0.9,
418
  logging_steps=5,
419
  save_steps=50 if args.test else 50,
 
413
  lr_scheduler_type="cosine",
414
  warmup_steps=10 if args.test else 30,
415
  num_generations=4, # P100: halved from 8 to fit in 16GB
416
+ max_completion_length=160, # T4: shorter completions = faster generation (bottleneck)
417
  temperature=0.9,
418
  logging_steps=5,
419
  save_steps=50 if args.test else 50,