eoinf commited on
Commit
5d03afa
·
verified ·
1 Parent(s): 6403b53

Adds 300M resid_5_pre SAE trained on the pile

Browse files
fragrant-disco-34/hyperparameters.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ b_dec_init_method: zeros
4
+ cached_activations_path: null
5
+ checkpoint_path: ./outputs/checkpoints
6
+ clip_grad_norm: true
7
+ context_size: 256
8
+ custom_loss: null
9
+ d_in: 2560
10
+ d_out: null
11
+ dataset: monology/pile-uncopyrighted
12
+ dense_loss_coefficient: 0
13
+ device: cuda
14
+ different_output: false
15
+ dtype: float32
16
+ epsilon_l0_approx: 0.5
17
+ eval_frequency: 500
18
+ expansion_factor: 8
19
+ feature_reinit_scale: 0.2
20
+ feature_resampling_method: null
21
+ fine_tune_dataset: false
22
+ flatten_activations_over_layer: false
23
+ flatten_activations_over_layer_output: false
24
+ from_pretrained_path: null
25
+ hook_point: blocks.5.hook_resid_pre
26
+ hook_point_head_index: null
27
+ hook_point_head_index_output: null
28
+ hook_point_layer: 5
29
+ hook_point_layer_output: null
30
+ hook_point_output: null
31
+ initial_decoder_norm: 0.1
32
+ initialise_encoder_to_decoder_transpose: false
33
+ is_dataset_tokenized: false
34
+ l0_coefficient: 0
35
+ l0_warmup: false
36
+ l0_warmup_steps: 1000
37
+ l1_coefficient: 5.5
38
+ l1_warmup: true
39
+ l1_warmup_steps: 10000
40
+ log_to_wandb: true
41
+ loop_dataset: false
42
+ lr: 0.0001
43
+ lr_scheduler_name: constant_with_warmup
44
+ lr_warm_up_steps: 1000
45
+ max_resample_step: 100000
46
+ max_sparsity_target: 1
47
+ min_sparsity_for_resample: 0
48
+ min_sparsity_target: 0
49
+ model_name: EleutherAI/pythia-2.8b-deduped
50
+ mse_loss_coefficient: 1
51
+ mse_loss_type: standard
52
+ multiple_runs: false
53
+ n_batches_in_store_buffer: 128
54
+ n_checkpoints: 80
55
+ n_running_sparsity: 100
56
+ normalise_initial_decoder_weights: false
57
+ normalise_w_dec: false
58
+ resample_batches: 128
59
+ resample_frequency: 25000
60
+ scale_input_norm: false
61
+ seed: 42
62
+ sparse_loss_coefficient: 0
63
+ sparsity_log_frequency: 5000
64
+ store_batch_size: 8
65
+ subtract_b_dec_from_inputs: false
66
+ total_training_steps: 73242
67
+ train_batch_size: 4096
68
+ use_cached_activations: false
69
+ use_gated_sparse_autoencoder: false
70
+ wandb_log_frequency: 10
71
+ wandb_project: test_pythia-mlp
72
+ weight_l1_by_decoder_norms: true
fragrant-disco-34/sparse_autoencoder/pythia-2.8b-deduped_blocks.0.hook_mlp_out_s20480_300M_tokens_thepile_L0-16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cace92f94e4310515a813e0076c8aa94ce1aeef5f54c1491cbd185e0f40a0fb0
3
+ size 419527672