amueller commited on
Commit
36bb368
·
verified ·
1 Parent(s): c0662fb

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 2718/gemma-2-2b/MP_corr_0.1/latest_ckpt.pt +3 -0
  2. 2718/gemma-2-2b/MP_corr_0.2/conf.yaml +40 -0
  3. 2718/gemma-2-2b/MP_corr_0.2/latest_ckpt.pt +3 -0
  4. 2718/gemma-2-2b/MP_corr_0.5/conf.yaml +40 -0
  5. 2718/gemma-2-2b/MP_corr_0.5/latest_ckpt.pt +3 -0
  6. 2718/gemma-2-2b/MP_corr_0.9/conf.yaml +40 -0
  7. 2718/gemma-2-2b/MP_corr_0.9/latest_ckpt.pt +3 -0
  8. 2718/gemma-2-2b/MP_corr_1.0/conf.yaml +40 -0
  9. 2718/gemma-2-2b/MP_corr_1.0/latest_ckpt.pt +3 -0
  10. 2718/gemma-2-2b/MP_corr_null/conf.yaml +40 -0
  11. 2718/gemma-2-2b/MP_corr_null/latest_ckpt.pt +3 -0
  12. 2718/gemma-2-2b/old/MP_corr_0.1/latest_ckpt.pt +3 -0
  13. 2718/gemma-2-2b/old/MP_corr_0.2/latest_ckpt.pt +3 -0
  14. 2718/gemma-2-2b/old/MP_corr_0.5/latest_ckpt.pt +3 -0
  15. 2718/gemma-2-2b/old/MP_corr_0.9/latest_ckpt.pt +3 -0
  16. 2718/gemma-2-2b/old/MP_corr_1.0/latest_ckpt.pt +3 -0
  17. 2718/gemma-2-2b/old/MP_uniform/latest_ckpt.pt +3 -0
  18. 2718/gemma-2-2b/old/sparsemax_dist_corr_0.1/latest_ckpt.pt +3 -0
  19. 2718/gemma-2-2b/old/sparsemax_dist_corr_0.2/latest_ckpt.pt +3 -0
  20. 2718/gemma-2-2b/old/sparsemax_dist_corr_0.5/latest_ckpt.pt +3 -0
  21. 2718/gemma-2-2b/old/sparsemax_dist_corr_0.9/latest_ckpt.pt +3 -0
  22. 2718/gemma-2-2b/old/sparsemax_dist_corr_1.0/latest_ckpt.pt +3 -0
  23. 2718/gemma-2-2b/old/sparsemax_dist_uniform/latest_ckpt.pt +3 -0
  24. 2718/gemma-2-2b/relu_corr_0.1/latest_ckpt.pt +3 -0
  25. 2718/gemma-2-2b/relu_corr_0.2/latest_ckpt.pt +3 -0
  26. 2718/gemma-2-2b/relu_corr_0.5/latest_ckpt.pt +3 -0
  27. 2718/gemma-2-2b/relu_corr_0.9/latest_ckpt.pt +3 -0
  28. 2718/gemma-2-2b/relu_corr_1.0/latest_ckpt.pt +3 -0
  29. 2718/gemma-2-2b/relu_uniform/latest_ckpt.pt +3 -0
  30. 2718/gemma-2-2b/sparsemax_dist_corr_0.1/conf.yaml +40 -0
  31. 2718/gemma-2-2b/sparsemax_dist_corr_0.1/latest_ckpt.pt +3 -0
  32. 2718/gemma-2-2b/sparsemax_dist_corr_0.2/conf.yaml +40 -0
  33. 2718/gemma-2-2b/sparsemax_dist_corr_0.2/latest_ckpt.pt +3 -0
  34. 2718/gemma-2-2b/sparsemax_dist_corr_0.5/conf.yaml +40 -0
  35. 2718/gemma-2-2b/sparsemax_dist_corr_0.5/latest_ckpt.pt +3 -0
  36. 2718/gemma-2-2b/sparsemax_dist_corr_0.9/conf.yaml +40 -0
  37. 2718/gemma-2-2b/sparsemax_dist_corr_0.9/latest_ckpt.pt +3 -0
  38. 2718/gemma-2-2b/sparsemax_dist_corr_1.0/conf.yaml +40 -0
  39. 2718/gemma-2-2b/sparsemax_dist_corr_1.0/latest_ckpt.pt +3 -0
  40. 2718/gemma-2-2b/sparsemax_dist_uniform/conf.yaml +40 -0
  41. 2718/gemma-2-2b/sparsemax_dist_uniform/latest_ckpt.pt +3 -0
  42. 2718/gemma-2-2b/topk_corr_0.1/latest_ckpt.pt +3 -0
  43. 2718/gemma-2-2b/topk_corr_0.2/latest_ckpt.pt +3 -0
  44. 2718/gemma-2-2b/topk_corr_0.5/latest_ckpt.pt +3 -0
  45. 2718/gemma-2-2b/topk_corr_0.9/latest_ckpt.pt +3 -0
  46. 2718/gemma-2-2b/topk_corr_1.0/latest_ckpt.pt +3 -0
  47. 2718/gemma-2-2b/topk_uniform/latest_ckpt.pt +3 -0
  48. 2718/pythia-70m-deduped/MP_corr_0.1/latest_ckpt.pt +3 -0
  49. 2718/pythia-70m-deduped/MP_corr_0.2/latest_ckpt.pt +3 -0
  50. 2718/pythia-70m-deduped/MP_corr_0.5/latest_ckpt.pt +3 -0
2718/gemma-2-2b/MP_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d39693c502d39e0ffe7a76b71ff3d0ba35bbbceedfc8f2fdf8c7457f86e7c553
3
+ size 509644272
2718/gemma-2-2b/MP_corr_0.2/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.2.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 2
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: MP
39
+ seed: 2718
40
+ tag: MP_corr_0.2
2718/gemma-2-2b/MP_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea75cdc4365dce23a95475a0e46ec87483b19e4efe47db98c2b5c07dbad7e39
3
+ size 509644272
2718/gemma-2-2b/MP_corr_0.5/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.5.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 2
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: MP
39
+ seed: 2718
40
+ tag: MP_corr_0.5
2718/gemma-2-2b/MP_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3af8b95802375420d4789abfba4d23701d28aa269b952a0dbe275495e6a58201
3
+ size 509644272
2718/gemma-2-2b/MP_corr_0.9/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.9.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 2
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: MP
39
+ seed: 2718
40
+ tag: MP_corr_0.9
2718/gemma-2-2b/MP_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705ccfff48c38623a7f5cd1a8b0ade64da836e2423f7cb8940165daef420063c
3
+ size 509644272
2718/gemma-2-2b/MP_corr_1.0/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_1.0.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 2
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: MP
39
+ seed: 2718
40
+ tag: MP_corr_1.0
2718/gemma-2-2b/MP_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0625571f7c3c35d3f18e3bbd18f0f0631e02a2827cea6156dcb959eb9f88a9d0
3
+ size 509644272
2718/gemma-2-2b/MP_corr_null/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: null
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 2
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: MP
39
+ seed: 2718
40
+ tag: MP_corr_null
2718/gemma-2-2b/MP_corr_null/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e82b6bd928880a8af38db16fe803399a29b4940dfacb6357334e44897d6c96
3
+ size 509644272
2718/gemma-2-2b/old/MP_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ff2c96aefc0a32c1d3e912f46d84663c9add744a5ba358a2e69edd023433ec
3
+ size 509642565
2718/gemma-2-2b/old/MP_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c194a4497949862f8d4d03ab3a8f52deb639b812f449041acc0860a1d123a6
3
+ size 509642565
2718/gemma-2-2b/old/MP_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e61ee462fe89e30e56d2c3129285bb3b1324c104e3f98f46ed074269f168de
3
+ size 169883973
2718/gemma-2-2b/old/MP_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54fb45658f25415aeefc1b18cad52d15dd0f9b2344929ab2cb0ec9bb688008fe
3
+ size 169883973
2718/gemma-2-2b/old/MP_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d32b9e1ec567ac87c0057888aaf81cfe96665e2d658d2f22feed7ce9ba896d
3
+ size 169883973
2718/gemma-2-2b/old/MP_uniform/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c45b13cb96fd4b8d82382e0b7e88b375003a39b79d4e524657174b13287da19
3
+ size 509642565
2718/gemma-2-2b/old/sparsemax_dist_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b1c4bcda70b36f16d0d2d5acbed86634f12a8dd56e10040a77b790bcff1b604
3
+ size 1019307337
2718/gemma-2-2b/old/sparsemax_dist_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d33902e89e1484c8be5cade4a8fe927193dca7c733ff258a74ec8fc93fe205
3
+ size 1019307337
2718/gemma-2-2b/old/sparsemax_dist_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca38ebd3e1d3d900e6373c5a7a38c62818dad8ea27a1ef88edd089e10a83265
3
+ size 1019307337
2718/gemma-2-2b/old/sparsemax_dist_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef016fe06a3ba88a5a28909c33eb3d2ce1c4a5f13c292d1d13b0cfdfcaf25ed0
3
+ size 339827525
2718/gemma-2-2b/old/sparsemax_dist_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd71f0b1aa3c7acc93dca34981c29ea10ffdc125dff4b70e7eb11ff51cc3d7b
3
+ size 339827525
2718/gemma-2-2b/old/sparsemax_dist_uniform/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fae6c71e797598042a3c331a377aca76ff2b96720c00ea138a34c6ef1ed17b4
3
+ size 1019307337
2718/gemma-2-2b/relu_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c509d711bc0bc539992c9a0a76fd7f7ec594569c44edf5b9d0579eb995fd0966
3
+ size 1019474124
2718/gemma-2-2b/relu_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787c65865dcb02173a9674367b33e8b18aeb6714909f3cb2768123df70c837b4
3
+ size 1019474124
2718/gemma-2-2b/relu_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0464d34498b0d63e7bbb67224fc54f63ddde4b8b1fc8b3c26681eb3e82c2cbf9
3
+ size 1019474124
2718/gemma-2-2b/relu_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027edc4ba0105c43452732a5a1a8c356e0bac8e8768a5b31ce2ed4a2285ca108
3
+ size 1019474124
2718/gemma-2-2b/relu_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e916efb0d32b2dd08b645dbdeea356176c456a07b2a79a3c7639f61f7f449c6
3
+ size 1019474124
2718/gemma-2-2b/relu_uniform/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c9c334af32b2ea14ca3163240ce43d2f82fc5600aa12f07a7dbf6b71123bfde
3
+ size 1019474060
2718/gemma-2-2b/sparsemax_dist_corr_0.1/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.1.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_0.1
2718/gemma-2-2b/sparsemax_dist_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d51c83b47dbbe78f767ebe84eb4826e58876e4ed30dbdc8a8f6ed5738d2ffe7
3
+ size 1019308980
2718/gemma-2-2b/sparsemax_dist_corr_0.2/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.2.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_0.2
2718/gemma-2-2b/sparsemax_dist_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc02068ccd4616e41fc8fe039ba605fb4a19a2e78118edfafce365b93bbc5949
3
+ size 1019308980
2718/gemma-2-2b/sparsemax_dist_corr_0.5/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.5.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_0.5
2718/gemma-2-2b/sparsemax_dist_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c173d0c866da58dcd3afddb9d836c578ed8645d5450170008a33234fc9f4a0a2
3
+ size 1019308980
2718/gemma-2-2b/sparsemax_dist_corr_0.9/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_0.9.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_0.9
2718/gemma-2-2b/sparsemax_dist_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c7728aed5a0efc2dd4336718b8dc0ea7e1e7dc0bb9159cdb639cea862b02e1
3
+ size 1019308980
2718/gemma-2-2b/sparsemax_dist_corr_1.0/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: ../configs/corr_ds-sp_1.0.jsonl
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_1.0
2718/gemma-2-2b/sparsemax_dist_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12116aad4b5ad6b821ebadd06e662ada6ec43337b54291582b9a24249f57863
3
+ size 1019308980
2718/gemma-2-2b/sparsemax_dist_uniform/conf.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: false
2
+ data:
3
+ batch_size: 64
4
+ corr_config: null
5
+ dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
6
+ max_sample_length: 64
7
+ num_iters: 10000
8
+ num_workers: 2
9
+ path: ../data/labeled_sentences_large_deduped_train.jsonl
10
+ run_validation: false
11
+ deploy: true
12
+ device: cuda:0
13
+ epochs: 1
14
+ eval:
15
+ save_tables: false
16
+ log:
17
+ eval_interval: 1000
18
+ log_interval: 10
19
+ save_interval: 1000
20
+ save_multiple: false
21
+ wandb_project_name: entangled_saes
22
+ model_name: google/gemma-2-2b
23
+ optimizer:
24
+ beta1: 0.9
25
+ beta2: 0.95
26
+ decay_lr: true
27
+ grad_clip: 1.0
28
+ learning_rate: 0.001
29
+ min_lr: 0.0009
30
+ warmup_iters: 200
31
+ weight_decay: 0.0001
32
+ sae:
33
+ encoder_reg: true
34
+ exp_factor: 8
35
+ gamma_reg: 10
36
+ kval_topk: 128
37
+ mp_kval: 128
38
+ sae_type: sparsemax_dist
39
+ seed: 2718
40
+ tag: sparsemax_dist_corr_null
2718/gemma-2-2b/sparsemax_dist_uniform/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c8b27f1d1297d3aa242f0b50fbcb4c636ab6708d2e4a7b2ab9bf93b8425bf17
3
+ size 1019308980
2718/gemma-2-2b/topk_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fb8c4fddad7a54277e3fd6eecb462818007bf918cf85a0d9fa864b84a2cc32
3
+ size 1019325769
2718/gemma-2-2b/topk_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd796d98615e8a7fe529b1e974be27119493ead6786775af6a92b01984a5373
3
+ size 1019325769
2718/gemma-2-2b/topk_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044b82213487f4005b6f26338268665cd8d53f8f861e4edcb668b9203634cc96
3
+ size 1019325769
2718/gemma-2-2b/topk_corr_0.9/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e196d8515e53ff2d4f9d62fc7df444a65dab339c4d18b2dd24fe819b0c5a8a
3
+ size 1019325769
2718/gemma-2-2b/topk_corr_1.0/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173c2fd8375237eb24a3670b44fc361526500cf8cfa635e2cef507e42d75527c
3
+ size 1019325769
2718/gemma-2-2b/topk_uniform/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f1f57a676ac1580e902259f8732e99468e87f86dbce96067506a826aa6e099a
3
+ size 1019325705
2718/pythia-70m-deduped/MP_corr_0.1/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070af424678728c094e14666e1d635b1d873622dcf84fa461cb86cfdf74320a8
3
+ size 25179013
2718/pythia-70m-deduped/MP_corr_0.2/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55faec7200aa9b3a481aa1d7bdbcf5df544163eee59e5fb4401e52524efa0e41
3
+ size 25179013
2718/pythia-70m-deduped/MP_corr_0.5/latest_ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72aef8e6c9207a4dbd5efec6f7a03b542fa3d0feb463c6a8c91ff65e3c3cac0
3
+ size 25179013