eoinf commited on
Commit
d9a2822
·
verified ·
1 Parent(s): 40f7f43

Upload folder using huggingface_hub

Browse files
Files changed (16) hide show
  1. autumn-fog-10/hyperparameters.yaml +70 -0
  2. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt +3 -0
  3. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt +3 -0
  4. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
  5. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_30715904.pt +3 -0
  6. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904.pt +3 -0
  7. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt +3 -0
  8. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_51195904.pt +3 -0
  9. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904.pt +3 -0
  10. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt +3 -0
  11. autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_71675904.pt +3 -0
  12. polished-wildflower-12/hyperparameters.yaml +70 -0
  13. polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt +3 -0
  14. polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt +3 -0
  15. polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt +3 -0
  16. tough-capybara-11/hyperparameters.yaml +70 -0
autumn-fog-10/hyperparameters.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ../outputs/checkpoints
6
+ clip_grad_norm: false
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 2560
10
+ d_out: null
11
+ dataset_path: Skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.2
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ flatten_activations_over_layer: false
22
+ flatten_activations_over_layer_output: false
23
+ from_pretrained_path: null
24
+ hook_point: blocks.11.hook_resid_pre
25
+ hook_point_head_index: null
26
+ hook_point_head_index_output: null
27
+ hook_point_layer: 11
28
+ hook_point_layer_output: null
29
+ hook_point_output: null
30
+ initial_decoder_norm: 0.1
31
+ initialise_encoder_to_decoder_transpose: false
32
+ is_dataset_tokenized: false
33
+ l0_coefficient: 9.0e-05
34
+ l0_warmup: false
35
+ l0_warmup_steps: 1000
36
+ l1_coefficient: 0
37
+ l1_warmup: false
38
+ l1_warmup_steps: 1000
39
+ log_to_wandb: true
40
+ lr: 0.0004
41
+ lr_scheduler_name: constant
42
+ lr_warm_up_steps: 500
43
+ max_resample_step: 100000
44
+ max_sparsity_target: 1
45
+ min_sparsity_for_resample: 0
46
+ min_sparsity_target: 8.0e-06
47
+ model_name: EleutherAI/pythia-2.8b-deduped
48
+ mse_loss_coefficient: 1
49
+ mse_loss_type: centered
50
+ multiple_runs: false
51
+ n_batches_in_store_buffer: 128
52
+ n_checkpoints: 80
53
+ n_running_sparsity: 300
54
+ normalise_initial_decoder_weights: false
55
+ normalise_w_dec: true
56
+ resample_batches: 128
57
+ resample_frequency: 25000
58
+ scale_input_norm: false
59
+ seed: 42
60
+ sparse_loss_coefficient: 1.0e-06
61
+ sparsity_log_frequency: 5000
62
+ store_batch_size: 8
63
+ subtract_b_dec_from_inputs: false
64
+ total_training_steps: 200000
65
+ train_batch_size: 4096
66
+ use_cached_activations: false
67
+ use_gated_sparse_autoencoder: false
68
+ wandb_log_frequency: 10
69
+ wandb_project: test_gemma_2b
70
+ weight_l1_by_decoder_norms: false
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0286db76bb1405babbe0d01606c35ab6593f08fa140a69e7018311991373b9
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f91633a9191d97bb1198191eeb12ef12121c3cf56021441337e0b8186bda777
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5139601ee8e2dca3e719d1d6294235be7c458116ba60ceafac4abc504b2a5d01
3
+ size 83790
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_30715904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5c058bc5341fde64f6fa5f83c1cb59b4c707c42a00b90d6c2c397e092f77a9
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec3c08e9f11b092cd0b0bbfc2d19f5b094e8d65e8717c5c135412e383819739
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_40955904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dea2e2d89926b886ea5d0658a84bac46c000ffae9885b744d6323e39f86ffc
3
+ size 83790
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_51195904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1651fb7ce66320fabb781007d36dfaa7f1a00d0910033990976a8b1205b8f418
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140537a71edab4fd332fe0a9fa40835afcb34ab502f0317d5fe94f72351d9da1
3
+ size 419527608
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_61435904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e68602b6c8297b96b7c168ec9b35b46aa448d4dfeb3857f6b2d8e2d705be71
3
+ size 83790
autumn-fog-10/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_71675904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5440500ab0c6672f7f5a466bce23b80cb4fa764ff12f3e5a85874758facbe6
3
+ size 419527608
polished-wildflower-12/hyperparameters.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ../outputs/checkpoints
6
+ clip_grad_norm: false
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 2560
10
+ d_out: null
11
+ dataset_path: Skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.2
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ flatten_activations_over_layer: false
22
+ flatten_activations_over_layer_output: false
23
+ from_pretrained_path: null
24
+ hook_point: blocks.11.hook_resid_pre
25
+ hook_point_head_index: null
26
+ hook_point_head_index_output: null
27
+ hook_point_layer: 11
28
+ hook_point_layer_output: null
29
+ hook_point_output: null
30
+ initial_decoder_norm: 0.1
31
+ initialise_encoder_to_decoder_transpose: false
32
+ is_dataset_tokenized: false
33
+ l0_coefficient: 9.0e-05
34
+ l0_warmup: false
35
+ l0_warmup_steps: 1000
36
+ l1_coefficient: 0
37
+ l1_warmup: false
38
+ l1_warmup_steps: 1000
39
+ log_to_wandb: true
40
+ lr: 0.0004
41
+ lr_scheduler_name: constant
42
+ lr_warm_up_steps: 500
43
+ max_resample_step: 100000
44
+ max_sparsity_target: 1
45
+ min_sparsity_for_resample: 0
46
+ min_sparsity_target: 8.0e-06
47
+ model_name: EleutherAI/pythia-2.8b-deduped
48
+ mse_loss_coefficient: 1
49
+ mse_loss_type: centered
50
+ multiple_runs: false
51
+ n_batches_in_store_buffer: 128
52
+ n_checkpoints: 80
53
+ n_running_sparsity: 300
54
+ normalise_initial_decoder_weights: false
55
+ normalise_w_dec: true
56
+ resample_batches: 128
57
+ resample_frequency: 25000
58
+ scale_input_norm: false
59
+ seed: 42
60
+ sparse_loss_coefficient: 1.0e-06
61
+ sparsity_log_frequency: 5000
62
+ store_batch_size: 8
63
+ subtract_b_dec_from_inputs: false
64
+ total_training_steps: 200000
65
+ train_batch_size: 4096
66
+ use_cached_activations: false
67
+ use_gated_sparse_autoencoder: false
68
+ wandb_log_frequency: 10
69
+ wandb_project: test_gemma_2b
70
+ weight_l1_by_decoder_norms: false
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_10235904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ba732653b5157241d96cf52fdd1271784f13b574c9ea5de21cd716c5b0de36
3
+ size 419527608
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3d7a51ce38b49e78b4b73cb87f71cd4036b1680e0e334bd77c5d2e3a8f31f4
3
+ size 419527608
polished-wildflower-12/sparse_autoencoder_EleutherAI/pythia-2.8b-deduped_blocks.11.hook_resid_pre_s20480_20475904_log_feature_sparsity.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5139601ee8e2dca3e719d1d6294235be7c458116ba60ceafac4abc504b2a5d01
3
+ size 83790
tough-capybara-11/hyperparameters.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ../outputs/checkpoints
6
+ clip_grad_norm: false
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 2560
10
+ d_out: null
11
+ dataset_path: Skylion007/openwebtext
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.2
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ flatten_activations_over_layer: false
22
+ flatten_activations_over_layer_output: false
23
+ from_pretrained_path: null
24
+ hook_point: blocks.11.hook_resid_pre
25
+ hook_point_head_index: null
26
+ hook_point_head_index_output: null
27
+ hook_point_layer: 11
28
+ hook_point_layer_output: null
29
+ hook_point_output: null
30
+ initial_decoder_norm: 0.1
31
+ initialise_encoder_to_decoder_transpose: false
32
+ is_dataset_tokenized: false
33
+ l0_coefficient: 9.0e-05
34
+ l0_warmup: false
35
+ l0_warmup_steps: 1000
36
+ l1_coefficient: 0
37
+ l1_warmup: false
38
+ l1_warmup_steps: 1000
39
+ log_to_wandb: true
40
+ lr: 0.0004
41
+ lr_scheduler_name: constant
42
+ lr_warm_up_steps: 500
43
+ max_resample_step: 100000
44
+ max_sparsity_target: 1
45
+ min_sparsity_for_resample: 0
46
+ min_sparsity_target: 8.0e-06
47
+ model_name: EleutherAI/pythia-2.8b-deduped
48
+ mse_loss_coefficient: 1
49
+ mse_loss_type: centered
50
+ multiple_runs: false
51
+ n_batches_in_store_buffer: 128
52
+ n_checkpoints: 80
53
+ n_running_sparsity: 300
54
+ normalise_initial_decoder_weights: false
55
+ normalise_w_dec: true
56
+ resample_batches: 128
57
+ resample_frequency: 25000
58
+ scale_input_norm: false
59
+ seed: 42
60
+ sparse_loss_coefficient: 1.0e-06
61
+ sparsity_log_frequency: 5000
62
+ store_batch_size: 8
63
+ subtract_b_dec_from_inputs: false
64
+ total_training_steps: 200000
65
+ train_batch_size: 4096
66
+ use_cached_activations: false
67
+ use_gated_sparse_autoencoder: false
68
+ wandb_log_frequency: 10
69
+ wandb_project: test_gemma_2b
70
+ weight_l1_by_decoder_norms: false