Upload folder using huggingface_hub
Browse files- autumn-fog-10/hyperparameters.yaml +70 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_30715904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_51195904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt +3 -0
- autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_71675904.pt +3 -0
- polished-wildflower-12/hyperparameters.yaml +70 -0
- polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt +3 -0
- polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt +3 -0
- polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
- tough-capybara-11/hyperparameters.yaml +70 -0
autumn-fog-10/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ../outputs/checkpoints
|
| 6 |
+
clip_grad_norm: false
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 2560
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset_path: Skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.2
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
flatten_activations_over_layer: false
|
| 22 |
+
flatten_activations_over_layer_output: false
|
| 23 |
+
from_pretrained_path: null
|
| 24 |
+
hook_point: blocks.11.hook_resid_pre
|
| 25 |
+
hook_point_head_index: null
|
| 26 |
+
hook_point_head_index_output: null
|
| 27 |
+
hook_point_layer: 11
|
| 28 |
+
hook_point_layer_output: null
|
| 29 |
+
hook_point_output: null
|
| 30 |
+
initial_decoder_norm: 0.1
|
| 31 |
+
initialise_encoder_to_decoder_transpose: false
|
| 32 |
+
is_dataset_tokenized: false
|
| 33 |
+
l0_coefficient: 9.0e-05
|
| 34 |
+
l0_warmup: false
|
| 35 |
+
l0_warmup_steps: 1000
|
| 36 |
+
l1_coefficient: 0
|
| 37 |
+
l1_warmup: false
|
| 38 |
+
l1_warmup_steps: 1000
|
| 39 |
+
log_to_wandb: true
|
| 40 |
+
lr: 0.0004
|
| 41 |
+
lr_scheduler_name: constant
|
| 42 |
+
lr_warm_up_steps: 500
|
| 43 |
+
max_resample_step: 100000
|
| 44 |
+
max_sparsity_target: 1
|
| 45 |
+
min_sparsity_for_resample: 0
|
| 46 |
+
min_sparsity_target: 8.0e-06
|
| 47 |
+
model_name: EleutherAI/pythia-2.8b-deduped
|
| 48 |
+
mse_loss_coefficient: 1
|
| 49 |
+
mse_loss_type: centered
|
| 50 |
+
multiple_runs: false
|
| 51 |
+
n_batches_in_store_buffer: 128
|
| 52 |
+
n_checkpoints: 80
|
| 53 |
+
n_running_sparsity: 300
|
| 54 |
+
normalise_initial_decoder_weights: false
|
| 55 |
+
normalise_w_dec: true
|
| 56 |
+
resample_batches: 128
|
| 57 |
+
resample_frequency: 25000
|
| 58 |
+
scale_input_norm: false
|
| 59 |
+
seed: 42
|
| 60 |
+
sparse_loss_coefficient: 1.0e-06
|
| 61 |
+
sparsity_log_frequency: 5000
|
| 62 |
+
store_batch_size: 8
|
| 63 |
+
subtract_b_dec_from_inputs: false
|
| 64 |
+
total_training_steps: 200000
|
| 65 |
+
train_batch_size: 4096
|
| 66 |
+
use_cached_activations: false
|
| 67 |
+
use_gated_sparse_autoencoder: false
|
| 68 |
+
wandb_log_frequency: 10
|
| 69 |
+
wandb_project: test_gemma_2b
|
| 70 |
+
weight_l1_by_decoder_norms: false
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b0286db76bb1405babbe0d01606c35ab6593f08fa140a69e7018311991373b9
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f91633a9191d97bb1198191eeb12ef12121c3cf56021441337e0b8186bda777
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5139601ee8e2dca3e719d1d6294235be7c458116ba60ceafac4abc504b2a5d01
|
| 3 |
+
size 83790
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_30715904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c5c058bc5341fde64f6fa5f83c1cb59b4c707c42a00b90d6c2c397e092f77a9
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ec3c08e9f11b092cd0b0bbfc2d19f5b094e8d65e8717c5c135412e383819739
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55dea2e2d89926b886ea5d0658a84bac46c000ffae9885b744d6323e39f86ffc
|
| 3 |
+
size 83790
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_51195904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1651fb7ce66320fabb781007d36dfaa7f1a00d0910033990976a8b1205b8f418
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:140537a71edab4fd332fe0a9fa40835afcb34ab502f0317d5fe94f72351d9da1
|
| 3 |
+
size 419527608
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46e68602b6c8297b96b7c168ec9b35b46aa448d4dfeb3857f6b2d8e2d705be71
|
| 3 |
+
size 83790
|
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_71675904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc5440500ab0c6672f7f5a466bce23b80cb4fa764ff12f3e5a85874758facbe6
|
| 3 |
+
size 419527608
|
polished-wildflower-12/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ../outputs/checkpoints
|
| 6 |
+
clip_grad_norm: false
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 2560
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset_path: Skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.2
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
flatten_activations_over_layer: false
|
| 22 |
+
flatten_activations_over_layer_output: false
|
| 23 |
+
from_pretrained_path: null
|
| 24 |
+
hook_point: blocks.11.hook_resid_pre
|
| 25 |
+
hook_point_head_index: null
|
| 26 |
+
hook_point_head_index_output: null
|
| 27 |
+
hook_point_layer: 11
|
| 28 |
+
hook_point_layer_output: null
|
| 29 |
+
hook_point_output: null
|
| 30 |
+
initial_decoder_norm: 0.1
|
| 31 |
+
initialise_encoder_to_decoder_transpose: false
|
| 32 |
+
is_dataset_tokenized: false
|
| 33 |
+
l0_coefficient: 9.0e-05
|
| 34 |
+
l0_warmup: false
|
| 35 |
+
l0_warmup_steps: 1000
|
| 36 |
+
l1_coefficient: 0
|
| 37 |
+
l1_warmup: false
|
| 38 |
+
l1_warmup_steps: 1000
|
| 39 |
+
log_to_wandb: true
|
| 40 |
+
lr: 0.0004
|
| 41 |
+
lr_scheduler_name: constant
|
| 42 |
+
lr_warm_up_steps: 500
|
| 43 |
+
max_resample_step: 100000
|
| 44 |
+
max_sparsity_target: 1
|
| 45 |
+
min_sparsity_for_resample: 0
|
| 46 |
+
min_sparsity_target: 8.0e-06
|
| 47 |
+
model_name: EleutherAI/pythia-2.8b-deduped
|
| 48 |
+
mse_loss_coefficient: 1
|
| 49 |
+
mse_loss_type: centered
|
| 50 |
+
multiple_runs: false
|
| 51 |
+
n_batches_in_store_buffer: 128
|
| 52 |
+
n_checkpoints: 80
|
| 53 |
+
n_running_sparsity: 300
|
| 54 |
+
normalise_initial_decoder_weights: false
|
| 55 |
+
normalise_w_dec: true
|
| 56 |
+
resample_batches: 128
|
| 57 |
+
resample_frequency: 25000
|
| 58 |
+
scale_input_norm: false
|
| 59 |
+
seed: 42
|
| 60 |
+
sparse_loss_coefficient: 1.0e-06
|
| 61 |
+
sparsity_log_frequency: 5000
|
| 62 |
+
store_batch_size: 8
|
| 63 |
+
subtract_b_dec_from_inputs: false
|
| 64 |
+
total_training_steps: 200000
|
| 65 |
+
train_batch_size: 4096
|
| 66 |
+
use_cached_activations: false
|
| 67 |
+
use_gated_sparse_autoencoder: false
|
| 68 |
+
wandb_log_frequency: 10
|
| 69 |
+
wandb_project: test_gemma_2b
|
| 70 |
+
weight_l1_by_decoder_norms: false
|
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ba732653b5157241d96cf52fdd1271784f13b574c9ea5de21cd716c5b0de36
|
| 3 |
+
size 419527608
|
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f3d7a51ce38b49e78b4b73cb87f71cd4036b1680e0e334bd77c5d2e3a8f31f4
|
| 3 |
+
size 419527608
|
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5139601ee8e2dca3e719d1d6294235be7c458116ba60ceafac4abc504b2a5d01
|
| 3 |
+
size 83790
|
tough-capybara-11/hyperparameters.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adam_beta1: 0.9
|
| 2 |
+
adam_beta2: 0.999
|
| 3 |
+
b_dec_init_method: zeros
|
| 4 |
+
cached_activations_path: null
|
| 5 |
+
checkpoint_path: ../outputs/checkpoints
|
| 6 |
+
clip_grad_norm: false
|
| 7 |
+
context_size: 256
|
| 8 |
+
custom_loss: null
|
| 9 |
+
d_in: 2560
|
| 10 |
+
d_out: null
|
| 11 |
+
dataset_path: Skylion007/openwebtext
|
| 12 |
+
dense_loss_coefficient: 0
|
| 13 |
+
device: cuda
|
| 14 |
+
different_output: false
|
| 15 |
+
dtype: float32
|
| 16 |
+
epsilon_l0_approx: 0.2
|
| 17 |
+
eval_frequency: 500
|
| 18 |
+
expansion_factor: 8
|
| 19 |
+
feature_reinit_scale: 0.2
|
| 20 |
+
feature_resampling_method: null
|
| 21 |
+
flatten_activations_over_layer: false
|
| 22 |
+
flatten_activations_over_layer_output: false
|
| 23 |
+
from_pretrained_path: null
|
| 24 |
+
hook_point: blocks.11.hook_resid_pre
|
| 25 |
+
hook_point_head_index: null
|
| 26 |
+
hook_point_head_index_output: null
|
| 27 |
+
hook_point_layer: 11
|
| 28 |
+
hook_point_layer_output: null
|
| 29 |
+
hook_point_output: null
|
| 30 |
+
initial_decoder_norm: 0.1
|
| 31 |
+
initialise_encoder_to_decoder_transpose: false
|
| 32 |
+
is_dataset_tokenized: false
|
| 33 |
+
l0_coefficient: 9.0e-05
|
| 34 |
+
l0_warmup: false
|
| 35 |
+
l0_warmup_steps: 1000
|
| 36 |
+
l1_coefficient: 0
|
| 37 |
+
l1_warmup: false
|
| 38 |
+
l1_warmup_steps: 1000
|
| 39 |
+
log_to_wandb: true
|
| 40 |
+
lr: 0.0004
|
| 41 |
+
lr_scheduler_name: constant
|
| 42 |
+
lr_warm_up_steps: 500
|
| 43 |
+
max_resample_step: 100000
|
| 44 |
+
max_sparsity_target: 1
|
| 45 |
+
min_sparsity_for_resample: 0
|
| 46 |
+
min_sparsity_target: 8.0e-06
|
| 47 |
+
model_name: EleutherAI/pythia-2.8b-deduped
|
| 48 |
+
mse_loss_coefficient: 1
|
| 49 |
+
mse_loss_type: centered
|
| 50 |
+
multiple_runs: false
|
| 51 |
+
n_batches_in_store_buffer: 128
|
| 52 |
+
n_checkpoints: 80
|
| 53 |
+
n_running_sparsity: 300
|
| 54 |
+
normalise_initial_decoder_weights: false
|
| 55 |
+
normalise_w_dec: true
|
| 56 |
+
resample_batches: 128
|
| 57 |
+
resample_frequency: 25000
|
| 58 |
+
scale_input_norm: false
|
| 59 |
+
seed: 42
|
| 60 |
+
sparse_loss_coefficient: 1.0e-06
|
| 61 |
+
sparsity_log_frequency: 5000
|
| 62 |
+
store_batch_size: 8
|
| 63 |
+
subtract_b_dec_from_inputs: false
|
| 64 |
+
total_training_steps: 200000
|
| 65 |
+
train_batch_size: 4096
|
| 66 |
+
use_cached_activations: false
|
| 67 |
+
use_gated_sparse_autoencoder: false
|
| 68 |
+
wandb_log_frequency: 10
|
| 69 |
+
wandb_project: test_gemma_2b
|
| 70 |
+
weight_l1_by_decoder_norms: false
|