Switch to bert-base-uncased, larger batch, fp16
Browse files- train_sidecar.py +9 -8
train_sidecar.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
-
"""Train a
|
| 3 |
import json, random, argparse, ast, sys
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
|
@@ -173,12 +173,12 @@ def tokenize_and_align(examples, tokenizer):
|
|
| 173 |
|
| 174 |
def main():
|
| 175 |
parser = argparse.ArgumentParser()
|
| 176 |
-
parser.add_argument("--base_model", default="
|
| 177 |
-
parser.add_argument("--output_model", default="narcolepticchicken/privacy-filter-sidecar-
|
| 178 |
parser.add_argument("--epochs", type=int, default=5)
|
| 179 |
-
parser.add_argument("--batch_size", type=int, default=
|
| 180 |
-
parser.add_argument("--grad_accum", type=int, default=
|
| 181 |
-
parser.add_argument("--lr", type=float, default=
|
| 182 |
parser.add_argument("--max_synthetic", type=int, default=5000)
|
| 183 |
parser.add_argument("--max_nemotron_train", type=int, default=5000)
|
| 184 |
parser.add_argument("--max_nemotron_eval", type=int, default=1000)
|
|
@@ -264,10 +264,11 @@ def main():
|
|
| 264 |
push_to_hub=True,
|
| 265 |
hub_model_id=args.output_model,
|
| 266 |
report_to="trackio",
|
| 267 |
-
run_name=f"sidecar-
|
| 268 |
project="privacy-filter-enhanced",
|
| 269 |
seed=args.seed,
|
| 270 |
-
bf16=
|
|
|
|
| 271 |
gradient_accumulation_steps=args.grad_accum,
|
| 272 |
dataloader_num_workers=2,
|
| 273 |
warmup_ratio=0.1,
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
"""Train a BERT sidecar NER model for 3 new PII categories."""
|
| 3 |
import json, random, argparse, ast, sys
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
|
|
|
| 173 |
|
| 174 |
def main():
|
| 175 |
parser = argparse.ArgumentParser()
|
| 176 |
+
parser.add_argument("--base_model", default="bert-base-uncased")
|
| 177 |
+
parser.add_argument("--output_model", default="narcolepticchicken/privacy-filter-sidecar-bert")
|
| 178 |
parser.add_argument("--epochs", type=int, default=5)
|
| 179 |
+
parser.add_argument("--batch_size", type=int, default=32)
|
| 180 |
+
parser.add_argument("--grad_accum", type=int, default=1)
|
| 181 |
+
parser.add_argument("--lr", type=float, default=5e-5)
|
| 182 |
parser.add_argument("--max_synthetic", type=int, default=5000)
|
| 183 |
parser.add_argument("--max_nemotron_train", type=int, default=5000)
|
| 184 |
parser.add_argument("--max_nemotron_eval", type=int, default=1000)
|
|
|
|
| 264 |
push_to_hub=True,
|
| 265 |
hub_model_id=args.output_model,
|
| 266 |
report_to="trackio",
|
| 267 |
+
run_name=f"sidecar-bert-lr{args.lr}-bs{args.batch_size}",
|
| 268 |
project="privacy-filter-enhanced",
|
| 269 |
seed=args.seed,
|
| 270 |
+
bf16=False,
|
| 271 |
+
fp16=True,
|
| 272 |
gradient_accumulation_steps=args.grad_accum,
|
| 273 |
dataloader_num_workers=2,
|
| 274 |
warmup_ratio=0.1,
|