shank commited on
Commit
024f3c7
Β·
1 Parent(s): cb09ef1

Fix: Trying to fix dependency issues

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +4 -5
training/train_grpo.py CHANGED
@@ -46,11 +46,11 @@ if not args.test_local:
46
  _TRAIN_DEPS = [
47
  "wandb==0.18.7",
48
  "datasets==3.0.2",
49
- "transformers==4.46.3",
50
  "accelerate==1.0.1",
51
- "trl==0.14.0",
52
  "peft==0.13.2",
53
- "bitsandbytes>=0.49.0",
54
  ]
55
  print("Installing training dependencies...", flush=True)
56
  ret = os.system(
@@ -466,7 +466,6 @@ config = GRPOConfig(
466
  warmup_steps=10 if args.test else 30,
467
  num_generations=_num_gen,
468
  max_completion_length=_max_comp,
469
- temperature=0.9,
470
  logging_steps=5,
471
  save_steps=50,
472
  report_to="wandb" if WANDB_API_KEY else "none",
@@ -477,7 +476,7 @@ trainer = GRPOTrainer(
477
  args=config,
478
  train_dataset=make_dataset(0),
479
  reward_funcs=reward_fn,
480
- processing_class=tokenizer,
481
  )
482
 
483
  # ── Curriculum callback ───────────────────────────────────────────────────────
 
46
  _TRAIN_DEPS = [
47
  "wandb==0.18.7",
48
  "datasets==3.0.2",
49
+ "transformers==4.44.2",
50
  "accelerate==1.0.1",
51
+ "trl==0.12.2",
52
  "peft==0.13.2",
53
+ "bitsandbytes==0.43.3",
54
  ]
55
  print("Installing training dependencies...", flush=True)
56
  ret = os.system(
 
466
  warmup_steps=10 if args.test else 30,
467
  num_generations=_num_gen,
468
  max_completion_length=_max_comp,
 
469
  logging_steps=5,
470
  save_steps=50,
471
  report_to="wandb" if WANDB_API_KEY else "none",
 
476
  args=config,
477
  train_dataset=make_dataset(0),
478
  reward_funcs=reward_fn,
479
+ tokenizer=tokenizer,
480
  )
481
 
482
  # ── Curriculum callback ───────────────────────────────────────────────────────