| base_model: NousResearch/Llama-3.2-1B |
| batch_size: 32 |
| bf16: true |
| chat_template: tokenizer_default_fallback_alpaca |
| datasets: |
| - data_files: |
| - bb5c3bd8ee309eb0_train_data.json |
| ds_type: json |
| format: custom |
| path: /workspace/input_data/bb5c3bd8ee309eb0_train_data.json |
| type: |
| field_input: system_prompt |
| field_instruction: question |
| field_output: response |
| format: '{instruction} {input}' |
| no_input_format: '{instruction}' |
| system_format: '{system}' |
| system_prompt: '' |
| eval_steps: 20 |
| flash_attention: true |
| gpu_memory_limit: 80GiB |
| gradient_checkpointing: true |
| group_by_length: true |
| hub_model_id: SystemAdmin123/60e7a811-5939-479a-aa70-b97de53f693d |
| hub_strategy: checkpoint |
| learning_rate: 0.0002 |
| logging_steps: 10 |
| lr_scheduler: cosine |
| micro_batch_size: 19 |
| model_type: AutoModelForCausalLM |
| num_epochs: 10 |
| optimizer: adamw_bnb_8bit |
| output_dir: /workspace/axolotl/configs |
| pad_to_sequence_len: true |
| resize_token_embeddings_to_32x: false |
| sample_packing: false |
| save_steps: 40 |
| save_total_limit: 1 |
| sequence_len: 2048 |
| special_tokens: |
| pad_token: <|end_of_text|> |
| tokenizer_type: PreTrainedTokenizerFast |
| train_on_inputs: false |
| trust_remote_code: true |
| val_set_size: 0.1 |
| wandb_entity: '' |
| wandb_mode: online |
| wandb_name: NousResearch/Llama-3.2-1B-/tmp/bb5c3bd8ee309eb0_train_data.json |
| wandb_project: Gradients-On-Demand |
| wandb_run: your_name |
| wandb_runid: default |
| warmup_ratio: 0.05 |
| xformers_attention: true |
|
|