shank commited on
Commit Β·
024f3c7
1
Parent(s): cb09ef1
Fix: Trying to fix dependency issues
Browse files- training/train_grpo.py +4 -5
training/train_grpo.py
CHANGED
|
@@ -46,11 +46,11 @@ if not args.test_local:
|
|
| 46 |
_TRAIN_DEPS = [
|
| 47 |
"wandb==0.18.7",
|
| 48 |
"datasets==3.0.2",
|
| 49 |
-
"transformers==4.
|
| 50 |
"accelerate==1.0.1",
|
| 51 |
-
"trl==0.
|
| 52 |
"peft==0.13.2",
|
| 53 |
-
"bitsandbytes
|
| 54 |
]
|
| 55 |
print("Installing training dependencies...", flush=True)
|
| 56 |
ret = os.system(
|
|
@@ -466,7 +466,6 @@ config = GRPOConfig(
|
|
| 466 |
warmup_steps=10 if args.test else 30,
|
| 467 |
num_generations=_num_gen,
|
| 468 |
max_completion_length=_max_comp,
|
| 469 |
-
temperature=0.9,
|
| 470 |
logging_steps=5,
|
| 471 |
save_steps=50,
|
| 472 |
report_to="wandb" if WANDB_API_KEY else "none",
|
|
@@ -477,7 +476,7 @@ trainer = GRPOTrainer(
|
|
| 477 |
args=config,
|
| 478 |
train_dataset=make_dataset(0),
|
| 479 |
reward_funcs=reward_fn,
|
| 480 |
-
|
| 481 |
)
|
| 482 |
|
| 483 |
# ββ Curriculum callback βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 46 |
_TRAIN_DEPS = [
|
| 47 |
"wandb==0.18.7",
|
| 48 |
"datasets==3.0.2",
|
| 49 |
+
"transformers==4.44.2",
|
| 50 |
"accelerate==1.0.1",
|
| 51 |
+
"trl==0.12.2",
|
| 52 |
"peft==0.13.2",
|
| 53 |
+
"bitsandbytes==0.43.3",
|
| 54 |
]
|
| 55 |
print("Installing training dependencies...", flush=True)
|
| 56 |
ret = os.system(
|
|
|
|
| 466 |
warmup_steps=10 if args.test else 30,
|
| 467 |
num_generations=_num_gen,
|
| 468 |
max_completion_length=_max_comp,
|
|
|
|
| 469 |
logging_steps=5,
|
| 470 |
save_steps=50,
|
| 471 |
report_to="wandb" if WANDB_API_KEY else "none",
|
|
|
|
| 476 |
args=config,
|
| 477 |
train_dataset=make_dataset(0),
|
| 478 |
reward_funcs=reward_fn,
|
| 479 |
+
tokenizer=tokenizer,
|
| 480 |
)
|
| 481 |
|
| 482 |
# ββ Curriculum callback βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|