GuizMeuh commited on
Commit
a3528da
·
verified ·
1 Parent(s): c12a7c8

Upload train_math.py

Browse files
Files changed (1) hide show
  1. train_math.py +3 -3
train_math.py CHANGED
@@ -18,7 +18,7 @@ LR = 2e-4
18
  PER_DEVICE_BATCH = 4
19
  GRADIENT_ACCUMULATION = 32
20
  MAX_SEQ_LENGTH = 4096
21
- WARMUP_RATIO = 0.1
22
  LORA_R = 32
23
  LORA_ALPHA = 16
24
  LORA_DROPOUT = 0.05
@@ -98,7 +98,7 @@ def main():
98
  per_device_train_batch_size=PER_DEVICE_BATCH,
99
  gradient_accumulation_steps=GRADIENT_ACCUMULATION,
100
  learning_rate=LR, bf16=True,
101
- lr_scheduler_type="cosine", warmup_ratio=WARMUP_RATIO,
102
  logging_steps=10, save_strategy="epoch", save_total_limit=2,
103
  gradient_checkpointing=True, push_to_hub=True, hub_model_id=HUB_MODEL_ID,
104
  hub_private_repo=False, report_to="trackio", disable_tqdm=True,
@@ -107,7 +107,7 @@ def main():
107
 
108
  print("Initializing SFTTrainer...")
109
  trainer = SFTTrainer(
110
- model=model, tokenizer=tokenizer, train_dataset=train_dataset,
111
  peft_config=peft_config, args=training_args,
112
  max_seq_length=MAX_SEQ_LENGTH,
113
  )
 
18
  PER_DEVICE_BATCH = 4
19
  GRADIENT_ACCUMULATION = 32
20
  MAX_SEQ_LENGTH = 4096
21
+ WARMUP_STEPS = 500
22
  LORA_R = 32
23
  LORA_ALPHA = 16
24
  LORA_DROPOUT = 0.05
 
98
  per_device_train_batch_size=PER_DEVICE_BATCH,
99
  gradient_accumulation_steps=GRADIENT_ACCUMULATION,
100
  learning_rate=LR, bf16=True,
101
+ lr_scheduler_type="cosine", warmup_steps=WARMUP_STEPS,
102
  logging_steps=10, save_strategy="epoch", save_total_limit=2,
103
  gradient_checkpointing=True, push_to_hub=True, hub_model_id=HUB_MODEL_ID,
104
  hub_private_repo=False, report_to="trackio", disable_tqdm=True,
 
107
 
108
  print("Initializing SFTTrainer...")
109
  trainer = SFTTrainer(
110
+ model=model, processing_class=tokenizer, train_dataset=train_dataset,
111
  peft_config=peft_config, args=training_args,
112
  max_seq_length=MAX_SEQ_LENGTH,
113
  )