av-codes commited on
Commit
da6eea2
·
verified ·
1 Parent(s): dbfd261

add hub push on every save + save every 4000 steps for crash recovery

Browse files
Files changed (1) hide show
  1. train_hrm_text_pi.py +6 -4
train_hrm_text_pi.py CHANGED
@@ -929,19 +929,21 @@ def main():
929
  warmup_steps=500 if not args.test else 0,
930
  lr_scheduler_type="cosine",
931
  eval_strategy="epoch",
932
- save_strategy="epoch",
 
933
  load_best_model_at_end=True,
934
  metric_for_best_model="f1",
935
  greater_is_better=True,
936
- save_total_limit=2,
937
  logging_strategy="steps",
938
  logging_first_step=True,
939
  logging_steps=5 if args.test else 20,
940
  disable_tqdm=False if args.test else True,
941
  fp16=use_cuda,
942
  bf16=False,
943
- push_to_hub=False,
944
- hub_model_id=None,
 
945
  use_cpu=not use_cuda,
946
  dataloader_num_workers=4,
947
  seed=args.seed,
 
929
  warmup_steps=500 if not args.test else 0,
930
  lr_scheduler_type="cosine",
931
  eval_strategy="epoch",
932
+ save_strategy="steps",
933
+ save_steps=4000,
934
  load_best_model_at_end=True,
935
  metric_for_best_model="f1",
936
  greater_is_better=True,
937
+ save_total_limit=3,
938
  logging_strategy="steps",
939
  logging_first_step=True,
940
  logging_steps=5 if args.test else 20,
941
  disable_tqdm=False if args.test else True,
942
  fp16=use_cuda,
943
  bf16=False,
944
+ push_to_hub=True,
945
+ hub_model_id=args.push_to_hub,
946
+ hub_strategy="every_save",
947
  use_cpu=not use_cuda,
948
  dataloader_num_workers=4,
949
  seed=args.seed,