| batch_size: 64 | |
| decoder_assistance_role: reflect | |
| decoder_base_conv_format: default | |
| decoder_base_conv_format_kwargs: null | |
| dropout: 0.0 | |
| eval_batch_size: 64 | |
| eval_data: | |
| heldout: | |
| - single_token_mc | |
| - ../../llama31_8b_data/eval_synthsys/heldout.pkl | |
| non_heldout: | |
| - single_token_mc | |
| - ../../llama31_8b_data/eval_synthsys/non_heldout.pkl | |
| eval_interval: 100 | |
| eval_name_mapping: | |
| test_templ_mc: test_mc | |
| train_templ_mc: train_mc | |
| eval_num_datapoints: 6400 | |
| eval_num_steps: null | |
| eval_patch_regions: | |
| - user | |
| fsdp_config: | |
| fsdp_activation_checkpointing: true | |
| fsdp_cpu_offload: false | |
| replica_group_size: 0 | |
| sharding_group_size: 0 | |
| sharding_strategy: 1 | |
| gradient_accumulation_steps: null | |
| gradient_clipping: false | |
| gradient_clipping_threshold: 1.0 | |
| hf_model_id: meta-llama/Llama-3.1-8B-Instruct | |
| layer_mapping: | |
| '0': 15 | |
| layer_to_read: 21 | |
| layer_to_write: 1 | |
| log_interval: 100 | |
| lora_config: | |
| lora_alpha: 32 | |
| lora_bias: none | |
| lora_dropout: 0.05 | |
| lora_r: 16 | |
| lora_target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| - gate_proj | |
| - up_proj | |
| - down_proj | |
| - lm_head | |
| lora_task_type: CAUSAL_LM | |
| lr: 0.0001 | |
| micro_batch_size: 8 | |
| min_lr_ratio: 0.1 | |
| new_context_tokens: null | |
| num_steps: 5000 | |
| pretrain_data_config: null | |
| read_layer_module_keys: | |
| - layer: 15 | |
| module: residual | |
| save_final_checkpoint: true | |
| save_interval: 500 | |
| save_path: latentqa/llama31_8b_experiments/15_to_0 | |
| seed: 7236 | |
| train_data_path: ../../llama31_8b_data/train.pkl | |
| train_patch_regions: | |
| - user | |
| use_fsdp: false | |
| use_peft: true | |
| use_wandb: true | |
| valid_data_path: null | |
| wandb_group_name: llama31_8b_experiments | |
| wandb_project: latentqa | |
| wandb_run_name: 15_to_0 | |
| warmup_steps: 0 | |
| weight_decay: 0.01 | |
| write_layer_module_keys: | |
| - layer: 0 | |
| module: residual | |