Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- absurd-dust-9/hyperparameters.yaml +70 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_10235904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_102395904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_102395904_log_feature_sparsity.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_20475904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_30715904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_40955904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_51195904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_61435904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_71675904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_81915904.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_81915904_log_feature_sparsity.pt +3 -0
- absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_92155904.pt +3 -0
- desert-oath-8/hyperparameters.yaml +70 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_10235904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_102395904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_102395904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_112635904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_122875904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_122875904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_133115904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_143355904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_143355904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_153595904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_163835904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_163835904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_174075904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_184315904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_184315904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_194555904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_20475904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_204795904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_204795904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_215035904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_225275904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_225275904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_235515904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_245755904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_245755904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_255995904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_266235904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_266235904_log_feature_sparsity.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_276475904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_30715904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_40955904.pt +3 -0
- desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt +3 -0
absurd-dust-9/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ../outputs/checkpoints
|
| 6 |
+
clip_grad_norm: false
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 2560
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset_path: Skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.2
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
flatten_activations_over_layer: false
|
| 22 |
+
flatten_activations_over_layer_output: false
|
| 23 |
+
from_pretrained_path: null
|
| 24 |
+
hook_point: blocks.10.hook_resid_pre
|
| 25 |
+
hook_point_head_index: null
|
| 26 |
+
hook_point_head_index_output: null
|
| 27 |
+
hook_point_layer: 10
|
| 28 |
+
hook_point_layer_output: null
|
| 29 |
+
hook_point_output: null
|
| 30 |
+
initial_decoder_norm: 0.1
|
| 31 |
+
initialise_encoder_to_decoder_transpose: false
|
| 32 |
+
is_dataset_tokenized: false
|
| 33 |
+
l0_coefficient: 9.0e-05
|
| 34 |
+
l0_warmup: false
|
| 35 |
+
l0_warmup_steps: 1000
|
| 36 |
+
l1_coefficient: 0
|
| 37 |
+
l1_warmup: false
|
| 38 |
+
l1_warmup_steps: 1000
|
| 39 |
+
log_to_wandb: true
|
| 40 |
+
lr: 0.0004
|
| 41 |
+
lr_scheduler_name: constant
|
| 42 |
+
lr_warm_up_steps: 500
|
| 43 |
+
max_resample_step: 100000
|
| 44 |
+
max_sparsity_target: 1
|
| 45 |
+
min_sparsity_for_resample: 0
|
| 46 |
+
min_sparsity_target: 1.0e-05
|
| 47 |
+
model_name: EleutherAI/pythia-2.8b-deduped
|
| 48 |
+
mse_loss_coefficient: 1
|
| 49 |
+
mse_loss_type: centered
|
| 50 |
+
multiple_runs: false
|
| 51 |
+
n_batches_in_store_buffer: 128
|
| 52 |
+
n_checkpoints: 80
|
| 53 |
+
n_running_sparsity: 300
|
| 54 |
+
normalise_initial_decoder_weights: false
|
| 55 |
+
normalise_w_dec: true
|
| 56 |
+
resample_batches: 128
|
| 57 |
+
resample_frequency: 25000
|
| 58 |
+
scale_input_norm: false
|
| 59 |
+
seed: 42
|
| 60 |
+
sparse_loss_coefficient: 1.0e-06
|
| 61 |
+
sparsity_log_frequency: 5000
|
| 62 |
+
store_batch_size: 8
|
| 63 |
+
subtract_b_dec_from_inputs: false
|
| 64 |
+
total_training_steps: 200000
|
| 65 |
+
train_batch_size: 4096
|
| 66 |
+
use_cached_activations: false
|
| 67 |
+
use_gated_sparse_autoencoder: false
|
| 68 |
+
wandb_log_frequency: 10
|
| 69 |
+
wandb_project: test_gemma_2b
|
| 70 |
+
weight_l1_by_decoder_norms: false
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_10235904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9a8295791a69c2e364b5055448bfecf290ca8da9dec752f98c3faf00df62fe5
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_102395904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4b76133fafcd25eda06150e4f8d9fb8e0a64ed240eefeb36d31be2c9decd13d
|
| 3 |
+
size 419527616
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_102395904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f49870ecba0405fff3c7ba76cbb6c3c49adc9dce8b9adf19511952544bb1f432
|
| 3 |
+
size 83795
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_20475904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8f04783dfd0e0d3bf58eddb34ee1e30172d6d3c767c6aa56c70fccfa435a841
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1ef0f233d81f2d88e072cf88e934c553b3136ac8eb802c64a5a73a493e10927
|
| 3 |
+
size 83790
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_30715904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e1644c6cba75be53df6e3a4615016868ebac8fd93417e80aaa97a786e5e4f3f
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_40955904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eecb31e5cace00f0070da387f4eaf3c68ea380562cf80b1e673e013d37618e4
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7915326437cf68f5dce41c2d746a23df15c2a568248e8e20e58510c57ed3f83c
|
| 3 |
+
size 83790
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_51195904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9229e96b912d9bfa5bd9227e39857e3891113c68cc6d51e2405fdc1560f00a97
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_61435904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6503d24e7f2c9bd8104a919dc46bd858a381bae4f366d856c9dd375d291a24fa
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bb6ecda90e88862bfa977c75064f712ef95b4c99c2a926161b8f2771c471685
|
| 3 |
+
size 83790
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_71675904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bde3554dee701880ef6b7e5e873b24ac4fabae46e3ed60a5c3a1df53ca278dc2
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_81915904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea8c86f910a721d91060c08218cc54749428b8f3cc2361de5f93c573648a1958
|
| 3 |
+
size 419527608
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_81915904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2f4c4c2f6553ddc4b6108675304b61da978c2f05df7a8f1f2329277eb6ec898
|
| 3 |
+
size 83790
|
absurd-dust-9/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.10.hook_resid_pre_s20480_92155904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68290e4020d7d87b5cd085e7bed215c4be6043bc939d9e4dd174390e80f42f37
|
| 3 |
+
size 419527608
|
desert-oath-8/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ../outputs/checkpoints
|
| 6 |
+
clip_grad_norm: false
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 2560
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset_path: Skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.2
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
flatten_activations_over_layer: false
|
| 22 |
+
flatten_activations_over_layer_output: false
|
| 23 |
+
from_pretrained_path: null
|
| 24 |
+
hook_point: blocks.24.hook_resid_pre
|
| 25 |
+
hook_point_head_index: null
|
| 26 |
+
hook_point_head_index_output: null
|
| 27 |
+
hook_point_layer: 24
|
| 28 |
+
hook_point_layer_output: null
|
| 29 |
+
hook_point_output: null
|
| 30 |
+
initial_decoder_norm: 0.1
|
| 31 |
+
initialise_encoder_to_decoder_transpose: false
|
| 32 |
+
is_dataset_tokenized: false
|
| 33 |
+
l0_coefficient: 9.0e-05
|
| 34 |
+
l0_warmup: false
|
| 35 |
+
l0_warmup_steps: 1000
|
| 36 |
+
l1_coefficient: 0
|
| 37 |
+
l1_warmup: false
|
| 38 |
+
l1_warmup_steps: 1000
|
| 39 |
+
log_to_wandb: true
|
| 40 |
+
lr: 0.0004
|
| 41 |
+
lr_scheduler_name: constant
|
| 42 |
+
lr_warm_up_steps: 500
|
| 43 |
+
max_resample_step: 100000
|
| 44 |
+
max_sparsity_target: 1
|
| 45 |
+
min_sparsity_for_resample: 0
|
| 46 |
+
min_sparsity_target: 1.0e-05
|
| 47 |
+
model_name: EleutherAI/pythia-2.8b-deduped
|
| 48 |
+
mse_loss_coefficient: 1
|
| 49 |
+
mse_loss_type: centered
|
| 50 |
+
multiple_runs: false
|
| 51 |
+
n_batches_in_store_buffer: 128
|
| 52 |
+
n_checkpoints: 80
|
| 53 |
+
n_running_sparsity: 300
|
| 54 |
+
normalise_initial_decoder_weights: false
|
| 55 |
+
normalise_w_dec: true
|
| 56 |
+
resample_batches: 128
|
| 57 |
+
resample_frequency: 25000
|
| 58 |
+
scale_input_norm: false
|
| 59 |
+
seed: 42
|
| 60 |
+
sparse_loss_coefficient: 1.0e-06
|
| 61 |
+
sparsity_log_frequency: 5000
|
| 62 |
+
store_batch_size: 8
|
| 63 |
+
subtract_b_dec_from_inputs: false
|
| 64 |
+
total_training_steps: 200000
|
| 65 |
+
train_batch_size: 4096
|
| 66 |
+
use_cached_activations: false
|
| 67 |
+
use_gated_sparse_autoencoder: false
|
| 68 |
+
wandb_log_frequency: 10
|
| 69 |
+
wandb_project: test_gemma_2b
|
| 70 |
+
weight_l1_by_decoder_norms: false
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_10235904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33bfbb7da1d97bd78d5a38b54ed4438875159bb51e3f7bb6cca1c0d3f3e8ff1b
|
| 3 |
+
size 419527608
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_102395904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac0793431bbd2afb03e286e11874f871f576d4769520a72d9ad178505dcc134d
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_102395904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62fc95b86d2876fc139f948da224cad531932c8df26c3d902b89447b350466dc
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_112635904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8432421f0c9ce3b82852cd84e75b7d7eedf25d9ddadaec0f0bfc8e47d3c9b7c
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_122875904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84c1743a32b4b8b6bcb8b530247e534008da70b3a574888936481474ab4e494f
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_122875904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63f37e3694cbaf47429bad066179fb80ead8a27fd84bf19ee6b3fe8213b5c8fe
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_133115904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:205d43b9dad4e13d13dc04a31476d1ab489524a98beeec47e33ad79d2a6909df
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_143355904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed988e292c82eee6b48f42776e7797274ac00147a874d202578b52b16c29799e
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_143355904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f5b25b5d72129474416dcd830b85e3061e0adc6849ea186307d72989eabfbba
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_153595904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7eddec3192a320df219fa8132ba030b37406ea45f5142029842061f3cde7aebf
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_163835904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cd2331a119ca8496ecb2bb39747b251e8e310d6872ab0a4ee1e7620f52df85d
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_163835904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8adf8b2e1a39cc292149b4b74553815f68c3c0df548b143e077b68a9ac8ba561
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_174075904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e52d4b02e771ab668d669626ee6018fac7540ab88f3bc7739ea25071e76220b
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_184315904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c2ae18905e47d035d13b1662263753897a6dee1648f037cfed96762b5c793ac
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_184315904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2f8d38909a966362dd30d1696c7f47da6fbf3411738f5f426a9c6f7b38da489
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_194555904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09de86909c5622e787c2a538c47ad3ab3515e26db8556c7a16ee7b624696b329
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_20475904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e419d7785c36e9da66b9dced8a2bb876cf2317bcad8eec12192fa5c7288d3824
|
| 3 |
+
size 419527608
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ddd0488a9a455f50d92f8849c72c1cf0840276ca917669f05e74de9b11a4f99
|
| 3 |
+
size 83790
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_204795904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1e8db26517cc91fe3206671d95e3e1a9c4d329b0b64ce26a703f50316dcd3a8
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_204795904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c51626e44efed73f2acc5bbed5fc898f86e53a5bfa75493d519969c59cd9b65
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_215035904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef4b11749a738f67162195f2c424b8fe686dc95892f41dab47e0788ba4eca9f4
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_225275904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8895df8c79c658e53620b394e08db0b1d2c278ea509057d96fd8959533c08e12
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_225275904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d32eb541ceaef794214b4aa66aa02cb24753ec417f0d52c5be977e8983240d3
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_235515904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:874a5f62382c44777e61e0332925260c10e6a6d403e2667420bb9df040525847
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_245755904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e782df84a41cafbf576fbbaba8471e21a66c025b842bae7693a59feb8d37d7a7
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_245755904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd06b093626be000d28042faf65bcd36e7476cfd4a4ab8930f588be4967b9b8e
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_255995904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:481f3da929f1348fdcc434516c22bec8249ff55eebc5f01ad566601edce57031
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_266235904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a70cdfd1bcfb3561b86005c3661025437082cdb1cc9d255716b8cfab4f0859d
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_266235904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ceaa7ab397ba8264d582a5532956de345fb22f3cd1ed266c50b2a27fd4bc3a1c
|
| 3 |
+
size 83795
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_276475904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25077f1350eabe5b9db55698dccd92b0fcc9499d693a52bf0fe98ad7c1918dd4
|
| 3 |
+
size 419527616
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_30715904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7a66d1390de499f782caffb0dcff4264c88af6cd15c53ce517dda366b9fcda2
|
| 3 |
+
size 419527608
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_40955904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02ff4a06c1698c2717799837a0235e00530864c6cf9a22068af0be5d354d7c42
|
| 3 |
+
size 419527608
|
desert-oath-8/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.24.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:277115d941bb5b44facc9e2ffb66e1ed56f2664eb57b63927c53c37c0f74910d
|
| 3 |
+
size 83790
|