canrager commited on
Commit
10e8c61
·
verified ·
1 Parent(s): cc40ade

Upload llama-3.1-8B/layer_26/temporal/conf.yaml with huggingface_hub

Browse files
llama-3.1-8B/layer_26/temporal/conf.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ batch_size: 100
3
+ cache_dir: ../../activations/precomputed_activations/precomputed_activations_llama3/
4
+ context_length: 500
5
+ dtype: bfloat16
6
+ epochs: 1
7
+ hf_name: monology/pile-uncopyrighted
8
+ num_total_steps: 200000
9
+ num_workers: 2
10
+ deploy: true
11
+ device_id: cuda:0
12
+ eval:
13
+ save_tables: false
14
+ llm:
15
+ dimin: 4096
16
+ model_hf_name: meta-llama/Llama-3.1-8B
17
+ tokenizer_hf_name: meta-llama/Llama-3.1-8B
18
+ log:
19
+ log_interval: 10
20
+ save_interval: 1000
21
+ save_multiple: false
22
+ wandb_project_name: TemporalSAE_test
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ block_id: 1
34
+ bottleneck_factor: 1
35
+ exp_factor: 4
36
+ gamma_reg: 10
37
+ kval_topk: 256
38
+ n_attn_layers: 1
39
+ n_heads: 4
40
+ sae_diff_type: topk
41
+ sae_type: temporal
42
+ scaling_factor: 0.029
43
+ tied_weights: true
44
+ seed: 42
45
+ tag: scratch