shank commited on
Commit
18b4e8a
Β·
1 Parent(s): 024f3c7

Fix: Fixing

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +4 -3
training/train_grpo.py CHANGED
@@ -47,8 +47,8 @@ if not args.test_local:
47
  "wandb==0.18.7",
48
  "datasets==3.0.2",
49
  "transformers==4.44.2",
50
- "accelerate==1.0.1",
51
- "trl==0.12.2",
52
  "peft==0.13.2",
53
  "bitsandbytes==0.43.3",
54
  ]
@@ -466,6 +466,7 @@ config = GRPOConfig(
466
  warmup_steps=10 if args.test else 30,
467
  num_generations=_num_gen,
468
  max_completion_length=_max_comp,
 
469
  logging_steps=5,
470
  save_steps=50,
471
  report_to="wandb" if WANDB_API_KEY else "none",
@@ -476,7 +477,7 @@ trainer = GRPOTrainer(
476
  args=config,
477
  train_dataset=make_dataset(0),
478
  reward_funcs=reward_fn,
479
- tokenizer=tokenizer,
480
  )
481
 
482
  # ── Curriculum callback ───────────────────────────────────────────────────────
 
47
  "wandb==0.18.7",
48
  "datasets==3.0.2",
49
  "transformers==4.44.2",
50
+ "accelerate==0.34.2",
51
+ "trl==0.15.2",
52
  "peft==0.13.2",
53
  "bitsandbytes==0.43.3",
54
  ]
 
466
  warmup_steps=10 if args.test else 30,
467
  num_generations=_num_gen,
468
  max_completion_length=_max_comp,
469
+ temperature=0.9,
470
  logging_steps=5,
471
  save_steps=50,
472
  report_to="wandb" if WANDB_API_KEY else "none",
 
477
  args=config,
478
  train_dataset=make_dataset(0),
479
  reward_funcs=reward_fn,
480
+ processing_class=tokenizer,
481
  )
482
 
483
  # ── Curriculum callback ───────────────────────────────────────────────────────