15_to_0 / train_lqa_config.yaml
choidami's picture
Upload folder using huggingface_hub
da35b67 verified
batch_size: 64
decoder_assistance_role: reflect
decoder_base_conv_format: default
decoder_base_conv_format_kwargs: null
dropout: 0.0
eval_batch_size: 64
eval_data:
heldout:
- single_token_mc
- ../../llama31_8b_data/eval_synthsys/heldout.pkl
non_heldout:
- single_token_mc
- ../../llama31_8b_data/eval_synthsys/non_heldout.pkl
eval_interval: 100
eval_name_mapping:
test_templ_mc: test_mc
train_templ_mc: train_mc
eval_num_datapoints: 6400
eval_num_steps: null
eval_patch_regions:
- user
fsdp_config:
fsdp_activation_checkpointing: true
fsdp_cpu_offload: false
replica_group_size: 0
sharding_group_size: 0
sharding_strategy: 1
gradient_accumulation_steps: null
gradient_clipping: false
gradient_clipping_threshold: 1.0
hf_model_id: meta-llama/Llama-3.1-8B-Instruct
layer_mapping:
'0': 15
layer_to_read: 21
layer_to_write: 1
log_interval: 100
lora_config:
lora_alpha: 32
lora_bias: none
lora_dropout: 0.05
lora_r: 16
lora_target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
- lm_head
lora_task_type: CAUSAL_LM
lr: 0.0001
micro_batch_size: 8
min_lr_ratio: 0.1
new_context_tokens: null
num_steps: 5000
pretrain_data_config: null
read_layer_module_keys:
- layer: 15
module: residual
save_final_checkpoint: true
save_interval: 500
save_path: latentqa/llama31_8b_experiments/15_to_0
seed: 7236
train_data_path: ../../llama31_8b_data/train.pkl
train_patch_regions:
- user
use_fsdp: false
use_peft: true
use_wandb: true
valid_data_path: null
wandb_group_name: llama31_8b_experiments
wandb_project: latentqa
wandb_run_name: 15_to_0
warmup_steps: 0
weight_decay: 0.01
write_layer_module_keys:
- layer: 0
module: residual