Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- 2718/gemma-2-2b/MP_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/MP_corr_0.2/conf.yaml +40 -0
- 2718/gemma-2-2b/MP_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/MP_corr_0.5/conf.yaml +40 -0
- 2718/gemma-2-2b/MP_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/MP_corr_0.9/conf.yaml +40 -0
- 2718/gemma-2-2b/MP_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/MP_corr_1.0/conf.yaml +40 -0
- 2718/gemma-2-2b/MP_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/MP_corr_null/conf.yaml +40 -0
- 2718/gemma-2-2b/MP_corr_null/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/MP_uniform/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/old/sparsemax_dist_uniform/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/relu_uniform/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.1/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.2/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.5/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.9/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_1.0/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/sparsemax_dist_uniform/conf.yaml +40 -0
- 2718/gemma-2-2b/sparsemax_dist_uniform/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_corr_0.1/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_corr_0.2/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_corr_0.5/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_corr_0.9/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_corr_1.0/latest_ckpt.pt +3 -0
- 2718/gemma-2-2b/topk_uniform/latest_ckpt.pt +3 -0
- 2718/pythia-70m-deduped/MP_corr_0.1/latest_ckpt.pt +3 -0
- 2718/pythia-70m-deduped/MP_corr_0.2/latest_ckpt.pt +3 -0
- 2718/pythia-70m-deduped/MP_corr_0.5/latest_ckpt.pt +3 -0
2718/gemma-2-2b/MP_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d39693c502d39e0ffe7a76b71ff3d0ba35bbbceedfc8f2fdf8c7457f86e7c553
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/MP_corr_0.2/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.2.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 2
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: MP
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: MP_corr_0.2
|
2718/gemma-2-2b/MP_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eea75cdc4365dce23a95475a0e46ec87483b19e4efe47db98c2b5c07dbad7e39
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/MP_corr_0.5/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.5.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 2
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: MP
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: MP_corr_0.5
|
2718/gemma-2-2b/MP_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3af8b95802375420d4789abfba4d23701d28aa269b952a0dbe275495e6a58201
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/MP_corr_0.9/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.9.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 2
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: MP
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: MP_corr_0.9
|
2718/gemma-2-2b/MP_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:705ccfff48c38623a7f5cd1a8b0ade64da836e2423f7cb8940165daef420063c
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/MP_corr_1.0/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_1.0.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 2
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: MP
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: MP_corr_1.0
|
2718/gemma-2-2b/MP_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0625571f7c3c35d3f18e3bbd18f0f0631e02a2827cea6156dcb959eb9f88a9d0
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/MP_corr_null/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: null
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 2
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: MP
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: MP_corr_null
|
2718/gemma-2-2b/MP_corr_null/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6e82b6bd928880a8af38db16fe803399a29b4940dfacb6357334e44897d6c96
|
| 3 |
+
size 509644272
|
2718/gemma-2-2b/old/MP_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02ff2c96aefc0a32c1d3e912f46d84663c9add744a5ba358a2e69edd023433ec
|
| 3 |
+
size 509642565
|
2718/gemma-2-2b/old/MP_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25c194a4497949862f8d4d03ab3a8f52deb639b812f449041acc0860a1d123a6
|
| 3 |
+
size 509642565
|
2718/gemma-2-2b/old/MP_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20e61ee462fe89e30e56d2c3129285bb3b1324c104e3f98f46ed074269f168de
|
| 3 |
+
size 169883973
|
2718/gemma-2-2b/old/MP_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54fb45658f25415aeefc1b18cad52d15dd0f9b2344929ab2cb0ec9bb688008fe
|
| 3 |
+
size 169883973
|
2718/gemma-2-2b/old/MP_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88d32b9e1ec567ac87c0057888aaf81cfe96665e2d658d2f22feed7ce9ba896d
|
| 3 |
+
size 169883973
|
2718/gemma-2-2b/old/MP_uniform/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c45b13cb96fd4b8d82382e0b7e88b375003a39b79d4e524657174b13287da19
|
| 3 |
+
size 509642565
|
2718/gemma-2-2b/old/sparsemax_dist_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b1c4bcda70b36f16d0d2d5acbed86634f12a8dd56e10040a77b790bcff1b604
|
| 3 |
+
size 1019307337
|
2718/gemma-2-2b/old/sparsemax_dist_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0d33902e89e1484c8be5cade4a8fe927193dca7c733ff258a74ec8fc93fe205
|
| 3 |
+
size 1019307337
|
2718/gemma-2-2b/old/sparsemax_dist_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ca38ebd3e1d3d900e6373c5a7a38c62818dad8ea27a1ef88edd089e10a83265
|
| 3 |
+
size 1019307337
|
2718/gemma-2-2b/old/sparsemax_dist_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef016fe06a3ba88a5a28909c33eb3d2ce1c4a5f13c292d1d13b0cfdfcaf25ed0
|
| 3 |
+
size 339827525
|
2718/gemma-2-2b/old/sparsemax_dist_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bd71f0b1aa3c7acc93dca34981c29ea10ffdc125dff4b70e7eb11ff51cc3d7b
|
| 3 |
+
size 339827525
|
2718/gemma-2-2b/old/sparsemax_dist_uniform/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fae6c71e797598042a3c331a377aca76ff2b96720c00ea138a34c6ef1ed17b4
|
| 3 |
+
size 1019307337
|
2718/gemma-2-2b/relu_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c509d711bc0bc539992c9a0a76fd7f7ec594569c44edf5b9d0579eb995fd0966
|
| 3 |
+
size 1019474124
|
2718/gemma-2-2b/relu_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:787c65865dcb02173a9674367b33e8b18aeb6714909f3cb2768123df70c837b4
|
| 3 |
+
size 1019474124
|
2718/gemma-2-2b/relu_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0464d34498b0d63e7bbb67224fc54f63ddde4b8b1fc8b3c26681eb3e82c2cbf9
|
| 3 |
+
size 1019474124
|
2718/gemma-2-2b/relu_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:027edc4ba0105c43452732a5a1a8c356e0bac8e8768a5b31ce2ed4a2285ca108
|
| 3 |
+
size 1019474124
|
2718/gemma-2-2b/relu_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e916efb0d32b2dd08b645dbdeea356176c456a07b2a79a3c7639f61f7f449c6
|
| 3 |
+
size 1019474124
|
2718/gemma-2-2b/relu_uniform/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c9c334af32b2ea14ca3163240ce43d2f82fc5600aa12f07a7dbf6b71123bfde
|
| 3 |
+
size 1019474060
|
2718/gemma-2-2b/sparsemax_dist_corr_0.1/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.1.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_0.1
|
2718/gemma-2-2b/sparsemax_dist_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d51c83b47dbbe78f767ebe84eb4826e58876e4ed30dbdc8a8f6ed5738d2ffe7
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/sparsemax_dist_corr_0.2/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.2.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_0.2
|
2718/gemma-2-2b/sparsemax_dist_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc02068ccd4616e41fc8fe039ba605fb4a19a2e78118edfafce365b93bbc5949
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/sparsemax_dist_corr_0.5/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.5.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_0.5
|
2718/gemma-2-2b/sparsemax_dist_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c173d0c866da58dcd3afddb9d836c578ed8645d5450170008a33234fc9f4a0a2
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/sparsemax_dist_corr_0.9/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_0.9.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_0.9
|
2718/gemma-2-2b/sparsemax_dist_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74c7728aed5a0efc2dd4336718b8dc0ea7e1e7dc0bb9159cdb639cea862b02e1
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/sparsemax_dist_corr_1.0/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: ../configs/corr_ds-sp_1.0.jsonl
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_1.0
|
2718/gemma-2-2b/sparsemax_dist_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12116aad4b5ad6b821ebadd06e662ada6ec43337b54291582b9a24249f57863
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/sparsemax_dist_uniform/conf.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bf16: false
|
| 2 |
+
data:
|
| 3 |
+
batch_size: 64
|
| 4 |
+
corr_config: null
|
| 5 |
+
dev_path: ../data/labeled_sentences_large_deduped_dev.jsonl
|
| 6 |
+
max_sample_length: 64
|
| 7 |
+
num_iters: 10000
|
| 8 |
+
num_workers: 2
|
| 9 |
+
path: ../data/labeled_sentences_large_deduped_train.jsonl
|
| 10 |
+
run_validation: false
|
| 11 |
+
deploy: true
|
| 12 |
+
device: cuda:0
|
| 13 |
+
epochs: 1
|
| 14 |
+
eval:
|
| 15 |
+
save_tables: false
|
| 16 |
+
log:
|
| 17 |
+
eval_interval: 1000
|
| 18 |
+
log_interval: 10
|
| 19 |
+
save_interval: 1000
|
| 20 |
+
save_multiple: false
|
| 21 |
+
wandb_project_name: entangled_saes
|
| 22 |
+
model_name: google/gemma-2-2b
|
| 23 |
+
optimizer:
|
| 24 |
+
beta1: 0.9
|
| 25 |
+
beta2: 0.95
|
| 26 |
+
decay_lr: true
|
| 27 |
+
grad_clip: 1.0
|
| 28 |
+
learning_rate: 0.001
|
| 29 |
+
min_lr: 0.0009
|
| 30 |
+
warmup_iters: 200
|
| 31 |
+
weight_decay: 0.0001
|
| 32 |
+
sae:
|
| 33 |
+
encoder_reg: true
|
| 34 |
+
exp_factor: 8
|
| 35 |
+
gamma_reg: 10
|
| 36 |
+
kval_topk: 128
|
| 37 |
+
mp_kval: 128
|
| 38 |
+
sae_type: sparsemax_dist
|
| 39 |
+
seed: 2718
|
| 40 |
+
tag: sparsemax_dist_corr_null
|
2718/gemma-2-2b/sparsemax_dist_uniform/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c8b27f1d1297d3aa242f0b50fbcb4c636ab6708d2e4a7b2ab9bf93b8425bf17
|
| 3 |
+
size 1019308980
|
2718/gemma-2-2b/topk_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06fb8c4fddad7a54277e3fd6eecb462818007bf918cf85a0d9fa864b84a2cc32
|
| 3 |
+
size 1019325769
|
2718/gemma-2-2b/topk_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd796d98615e8a7fe529b1e974be27119493ead6786775af6a92b01984a5373
|
| 3 |
+
size 1019325769
|
2718/gemma-2-2b/topk_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044b82213487f4005b6f26338268665cd8d53f8f861e4edcb668b9203634cc96
|
| 3 |
+
size 1019325769
|
2718/gemma-2-2b/topk_corr_0.9/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34e196d8515e53ff2d4f9d62fc7df444a65dab339c4d18b2dd24fe819b0c5a8a
|
| 3 |
+
size 1019325769
|
2718/gemma-2-2b/topk_corr_1.0/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:173c2fd8375237eb24a3670b44fc361526500cf8cfa635e2cef507e42d75527c
|
| 3 |
+
size 1019325769
|
2718/gemma-2-2b/topk_uniform/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f1f57a676ac1580e902259f8732e99468e87f86dbce96067506a826aa6e099a
|
| 3 |
+
size 1019325705
|
2718/pythia-70m-deduped/MP_corr_0.1/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:070af424678728c094e14666e1d635b1d873622dcf84fa461cb86cfdf74320a8
|
| 3 |
+
size 25179013
|
2718/pythia-70m-deduped/MP_corr_0.2/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55faec7200aa9b3a481aa1d7bdbcf5df544163eee59e5fb4401e52524efa0e41
|
| 3 |
+
size 25179013
|
2718/pythia-70m-deduped/MP_corr_0.5/latest_ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e72aef8e6c9207a4dbd5efec6f7a03b542fa3d0feb463c6a8c91ff65e3c3cac0
|
| 3 |
+
size 25179013
|