| base_model: bigscience/bloomz-560m |
| batch_size: 32 |
| bf16: true |
| chat_template: tokenizer_default_fallback_alpaca |
| datasets: |
| - data_files: |
| - a6a22929b7211ec8_train_data.json |
| ds_type: json |
| format: custom |
| path: /workspace/input_data/a6a22929b7211ec8_train_data.json |
| type: |
| field_input: privacy_mask |
| field_instruction: masked_text |
| field_output: unmasked_text |
| format: '{instruction} {input}' |
| no_input_format: '{instruction}' |
| system_format: '{system}' |
| system_prompt: '' |
| eval_steps: 20 |
| flash_attention: true |
| gpu_memory_limit: 80GiB |
| gradient_checkpointing: true |
| group_by_length: true |
| hub_model_id: SystemAdmin123/275c513f-c7ea-4c16-b6eb-7e9453a6a0ac |
| hub_strategy: checkpoint |
| learning_rate: 0.0002 |
| logging_steps: 10 |
| lr_scheduler: cosine |
| micro_batch_size: 19 |
| model_type: AutoModelForCausalLM |
| num_epochs: 10 |
| optimizer: adamw_bnb_8bit |
| output_dir: /workspace/axolotl/configs |
| pad_to_sequence_len: true |
| resize_token_embeddings_to_32x: false |
| sample_packing: false |
| save_steps: 40 |
| save_total_limit: 1 |
| sequence_len: 2048 |
| tokenizer_type: BloomTokenizerFast |
| train_on_inputs: false |
| trust_remote_code: true |
| val_set_size: 0.1 |
| wandb_entity: '' |
| wandb_mode: online |
| wandb_name: bigscience/bloomz-560m-/tmp/a6a22929b7211ec8_train_data.json |
| wandb_project: Gradients-On-Demand |
| wandb_run: your_name |
| wandb_runid: default |
| warmup_ratio: 0.05 |
| xformers_attention: true |
|
|