| model: | |
| model_name: facebook/opt-125m #"openai-community/gpt2" #EleutherAI/pythia-160m #Qwen/Qwen2.5-0.5B | |
| # adapter_path: "./nl_tasks/run_exps/ft2" | |
| data_collator_mode: 'dynamic' | |
| rotation_adapter_config: | |
| r: 4 | |
| num_rotations: 2 | |
| target_modules: ["q_proj", "v_proj"] | |
| data: | |
| dataset_name: 'math' | |
| # path: "./nl_tasks/data/MetaMathQA-40K" #MetaMathQA-40K.json" | |
| path: "./data/gsm8k_test.jsonl" | |
| dataset_split: train[:200] | |
| dataset_field: [question, answer] | |
| trainer_args: | |
| learning_rate: 2e-4 | |
| # accumulate_grad_batches: 1 | |
| # dataloader_workers: 5 | |
| # save_interval: 1000 | |
| # sample_interval: 100 | |
| # max_steps: -1 | |
| gradient_checkpointing: False # (Turn off for faster training) | |
| output_dir: "./run_exps" | |
| # save_path: "runs" | |
| max_steps: 40 | |
| # device: 'cuda' |