narcolepticchicken commited on
Commit
0899f8d
·
verified ·
1 Parent(s): dda6dd9

Switch to bert-base-uncased, larger batch, fp16

Browse files
Files changed (1) hide show
  1. train_sidecar.py +9 -8
train_sidecar.py CHANGED
@@ -1,5 +1,5 @@
1
  #!/usr/bin/env python3
2
- """Train a DeBERTa-v3 sidecar NER model for 3 new PII categories."""
3
  import json, random, argparse, ast, sys
4
  import numpy as np
5
  import torch
@@ -173,12 +173,12 @@ def tokenize_and_align(examples, tokenizer):
173
 
174
  def main():
175
  parser = argparse.ArgumentParser()
176
- parser.add_argument("--base_model", default="microsoft/deberta-v3-base")
177
- parser.add_argument("--output_model", default="narcolepticchicken/privacy-filter-sidecar-v3")
178
  parser.add_argument("--epochs", type=int, default=5)
179
- parser.add_argument("--batch_size", type=int, default=16)
180
- parser.add_argument("--grad_accum", type=int, default=2)
181
- parser.add_argument("--lr", type=float, default=3e-5)
182
  parser.add_argument("--max_synthetic", type=int, default=5000)
183
  parser.add_argument("--max_nemotron_train", type=int, default=5000)
184
  parser.add_argument("--max_nemotron_eval", type=int, default=1000)
@@ -264,10 +264,11 @@ def main():
264
  push_to_hub=True,
265
  hub_model_id=args.output_model,
266
  report_to="trackio",
267
- run_name=f"sidecar-{args.base_model.split('/')[-1]}-lr{args.lr}-bs{args.batch_size}",
268
  project="privacy-filter-enhanced",
269
  seed=args.seed,
270
- bf16=True,
 
271
  gradient_accumulation_steps=args.grad_accum,
272
  dataloader_num_workers=2,
273
  warmup_ratio=0.1,
 
1
  #!/usr/bin/env python3
2
+ """Train a BERT sidecar NER model for 3 new PII categories."""
3
  import json, random, argparse, ast, sys
4
  import numpy as np
5
  import torch
 
173
 
174
  def main():
175
  parser = argparse.ArgumentParser()
176
+ parser.add_argument("--base_model", default="bert-base-uncased")
177
+ parser.add_argument("--output_model", default="narcolepticchicken/privacy-filter-sidecar-bert")
178
  parser.add_argument("--epochs", type=int, default=5)
179
+ parser.add_argument("--batch_size", type=int, default=32)
180
+ parser.add_argument("--grad_accum", type=int, default=1)
181
+ parser.add_argument("--lr", type=float, default=5e-5)
182
  parser.add_argument("--max_synthetic", type=int, default=5000)
183
  parser.add_argument("--max_nemotron_train", type=int, default=5000)
184
  parser.add_argument("--max_nemotron_eval", type=int, default=1000)
 
264
  push_to_hub=True,
265
  hub_model_id=args.output_model,
266
  report_to="trackio",
267
+ run_name=f"sidecar-bert-lr{args.lr}-bs{args.batch_size}",
268
  project="privacy-filter-enhanced",
269
  seed=args.seed,
270
+ bf16=False,
271
+ fp16=True,
272
  gradient_accumulation_steps=args.grad_accum,
273
  dataloader_num_workers=2,
274
  warmup_ratio=0.1,