Jayant-Kernel commited on
Commit ·
3fdbb3c
1
Parent(s): ad9cdcc
fix: use /tmp for all output directories
Browse files
train.py
CHANGED
|
@@ -176,7 +176,7 @@ trainer = GRPOTrainer(
|
|
| 176 |
processing_class=tokenizer,
|
| 177 |
reward_funcs=[reward_fn],
|
| 178 |
args=GRPOConfig(
|
| 179 |
-
output_dir="
|
| 180 |
max_steps=150,
|
| 181 |
per_device_train_batch_size=4,
|
| 182 |
num_generations=4,
|
|
@@ -195,8 +195,8 @@ wandb.finish()
|
|
| 195 |
print("Training done!")
|
| 196 |
|
| 197 |
# Save Level 1 checkpoint
|
| 198 |
-
model.save_pretrained("deceit-1.5b-l1")
|
| 199 |
-
tokenizer.save_pretrained("deceit-1.5b-l1")
|
| 200 |
print("Level 1 checkpoint saved locally")
|
| 201 |
|
| 202 |
# Load Level 2 dataset
|
|
@@ -268,7 +268,7 @@ trainer_l2 = GRPOTrainer(
|
|
| 268 |
processing_class=tokenizer,
|
| 269 |
reward_funcs=[reward_fn_l2],
|
| 270 |
args=GRPOConfig(
|
| 271 |
-
output_dir="
|
| 272 |
max_steps=80,
|
| 273 |
per_device_train_batch_size=4,
|
| 274 |
num_generations=4,
|
|
@@ -287,8 +287,8 @@ wandb.finish()
|
|
| 287 |
print("Level 2 training done!")
|
| 288 |
|
| 289 |
# Save final model
|
| 290 |
-
model.save_pretrained("deceit-1.5b-final")
|
| 291 |
-
tokenizer.save_pretrained("deceit-1.5b-final")
|
| 292 |
model.push_to_hub(HF_REPO_ID)
|
| 293 |
tokenizer.push_to_hub(HF_REPO_ID)
|
| 294 |
print(f"Final model saved to {HF_REPO_ID}")
|
|
|
|
| 176 |
processing_class=tokenizer,
|
| 177 |
reward_funcs=[reward_fn],
|
| 178 |
args=GRPOConfig(
|
| 179 |
+
output_dir="/tmp/deceit-1.5b",
|
| 180 |
max_steps=150,
|
| 181 |
per_device_train_batch_size=4,
|
| 182 |
num_generations=4,
|
|
|
|
| 195 |
print("Training done!")
|
| 196 |
|
| 197 |
# Save Level 1 checkpoint
|
| 198 |
+
model.save_pretrained("/tmp/deceit-1.5b-l1")
|
| 199 |
+
tokenizer.save_pretrained("/tmp/deceit-1.5b-l1")
|
| 200 |
print("Level 1 checkpoint saved locally")
|
| 201 |
|
| 202 |
# Load Level 2 dataset
|
|
|
|
| 268 |
processing_class=tokenizer,
|
| 269 |
reward_funcs=[reward_fn_l2],
|
| 270 |
args=GRPOConfig(
|
| 271 |
+
output_dir="/tmp/deceit-1.5b-l2",
|
| 272 |
max_steps=80,
|
| 273 |
per_device_train_batch_size=4,
|
| 274 |
num_generations=4,
|
|
|
|
| 287 |
print("Level 2 training done!")
|
| 288 |
|
| 289 |
# Save final model
|
| 290 |
+
model.save_pretrained("/tmp/deceit-1.5b-final")
|
| 291 |
+
tokenizer.save_pretrained("/tmp/deceit-1.5b-final")
|
| 292 |
model.push_to_hub(HF_REPO_ID)
|
| 293 |
tokenizer.push_to_hub(HF_REPO_ID)
|
| 294 |
print(f"Final model saved to {HF_REPO_ID}")
|