nvan15's picture
Batch upload part 2
6bb0065 verified
model:
model_name: meta-llama/Llama-2-7b-hf #facebook/opt-125m #meta-llama/Llama-2-7b-hf #"openai-community/gpt2" #EleutherAI/pythia-160m #Qwen/Qwen2.5-0.5B
# model_name: facebook/opt-125m
# adapter_path: "./run_all/exnr15/ft2"
# adapter_path: './run_all/run_exps9/ft2'
# adapter_path: "./exp395/run_ex07/ft2"
data_collator_mode: 'dynamic'
rotation_adapter_config:
r: 16
num_rotations: 1
# target_modules: ["q_proj", "v_proj", "v_proj", "o_proj", "gate_proj","up_proj","down_proj"]
target_modules: ["q_proj", "v_proj",]
data:
dataset_name: 'math'
split_ratio: 0.00258
# path: "./data/gsm8k_test.jsonl"
# path: ./data/MetaMathQA-40K/MetaMathQA-40K.json
path: ./data/MetaMathQA/MetaMathQA-395K.json
dataset_split: train[:100000]
# dataset_field: [question, answer]
dataset_field: [query, response]
trainer_args:
learning_rate: 2e-4
warmup_ratio: 0.01
# eval_strategy: steps
per_device_train_batch_size: 32
per_device_eval_batch_size: 64
# accumulate_grad_batches: 1
# save_steps: 1000
gradient_checkpointing: False # (Turn off for faster training)
output_dir: "./exps/run_exps"
# save_path: "runs"
report_to: wandb
logging_steps: 200
# eval_steps: 1000
#dataloader_num_workers: 4
num_train_epochs: 2.0
# max_steps: 21
# device: 'cuda'