shank commited on
Commit Β·
18b4e8a
1
Parent(s): 024f3c7
Fix: Fixing
Browse files- training/train_grpo.py +4 -3
training/train_grpo.py
CHANGED
|
@@ -47,8 +47,8 @@ if not args.test_local:
|
|
| 47 |
"wandb==0.18.7",
|
| 48 |
"datasets==3.0.2",
|
| 49 |
"transformers==4.44.2",
|
| 50 |
-
"accelerate==
|
| 51 |
-
"trl==0.
|
| 52 |
"peft==0.13.2",
|
| 53 |
"bitsandbytes==0.43.3",
|
| 54 |
]
|
|
@@ -466,6 +466,7 @@ config = GRPOConfig(
|
|
| 466 |
warmup_steps=10 if args.test else 30,
|
| 467 |
num_generations=_num_gen,
|
| 468 |
max_completion_length=_max_comp,
|
|
|
|
| 469 |
logging_steps=5,
|
| 470 |
save_steps=50,
|
| 471 |
report_to="wandb" if WANDB_API_KEY else "none",
|
|
@@ -476,7 +477,7 @@ trainer = GRPOTrainer(
|
|
| 476 |
args=config,
|
| 477 |
train_dataset=make_dataset(0),
|
| 478 |
reward_funcs=reward_fn,
|
| 479 |
-
|
| 480 |
)
|
| 481 |
|
| 482 |
# ββ Curriculum callback βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 47 |
"wandb==0.18.7",
|
| 48 |
"datasets==3.0.2",
|
| 49 |
"transformers==4.44.2",
|
| 50 |
+
"accelerate==0.34.2",
|
| 51 |
+
"trl==0.15.2",
|
| 52 |
"peft==0.13.2",
|
| 53 |
"bitsandbytes==0.43.3",
|
| 54 |
]
|
|
|
|
| 466 |
warmup_steps=10 if args.test else 30,
|
| 467 |
num_generations=_num_gen,
|
| 468 |
max_completion_length=_max_comp,
|
| 469 |
+
temperature=0.9,
|
| 470 |
logging_steps=5,
|
| 471 |
save_steps=50,
|
| 472 |
report_to="wandb" if WANDB_API_KEY else "none",
|
|
|
|
| 477 |
args=config,
|
| 478 |
train_dataset=make_dataset(0),
|
| 479 |
reward_funcs=reward_fn,
|
| 480 |
+
processing_class=tokenizer,
|
| 481 |
)
|
| 482 |
|
| 483 |
# ββ Curriculum callback βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|