chanind commited on
Commit
3a54b02
·
verified ·
1 Parent(s): ce3db2d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/sae_weights.safetensors +3 -0
  3. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/saebench_autointerp_custom_sae_eval_results.json +3 -0
  4. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/sparsity.safetensors +3 -0
  5. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/sae_weights.safetensors +3 -0
  6. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/saebench_autointerp_custom_sae_eval_results.json +3 -0
  7. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/sparsity.safetensors +3 -0
  8. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/sae_weights.safetensors +3 -0
  9. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/saebench_autointerp_custom_sae_eval_results.json +3 -0
  10. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/sparsity.safetensors +3 -0
  11. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/sae_weights.safetensors +3 -0
  12. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/saebench_autointerp_custom_sae_eval_results.json +3 -0
  13. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/sparsity.safetensors +3 -0
  14. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/sae_weights.safetensors +3 -0
  15. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/saebench_autointerp_custom_sae_eval_results.json +3 -0
  16. denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/sparsity.safetensors +3 -0
  17. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/41_truthqa_tf_blocks.12.hook_resid_post_l1.json +45 -0
  18. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/44_phys_tf_blocks.12.hook_resid_post_l1.json +45 -0
  19. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/48_cm_correct_blocks.12.hook_resid_post_l1.json +45 -0
  20. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/49_cm_isshort_blocks.12.hook_resid_post_l1.json +45 -0
  21. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/50_deon_isvalid_blocks.12.hook_resid_post_l1.json +45 -0
  22. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/52_virtue_is_blocks.12.hook_resid_post_l1.json +45 -0
  23. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/54_cs_tf_blocks.12.hook_resid_post_l1.json +45 -0
  24. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/56_wikidatasex_or_gender_blocks.12.hook_resid_post_l1.json +45 -0
  25. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/57_wikidatais_alive_blocks.12.hook_resid_post_l1.json +45 -0
  26. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/58_wikidatapolitical_party_blocks.12.hook_resid_post_l1.json +45 -0
  27. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/59_wikidata_occupation_isjournalist_blocks.12.hook_resid_post_l1.json +45 -0
  28. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/5_hist_fig_ismale_blocks.12.hook_resid_post_l1.json +45 -0
  29. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/60_wikidata_occupation_isathlete_blocks.12.hook_resid_post_l1.json +45 -0
  30. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/61_wikidata_occupation_isactor_blocks.12.hook_resid_post_l1.json +45 -0
  31. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/62_wikidata_occupation_ispolitician_blocks.12.hook_resid_post_l1.json +45 -0
  32. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/63_wikidata_occupation_issinger_blocks.12.hook_resid_post_l1.json +45 -0
  33. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/64_wikidata_occupation_isresearcher_blocks.12.hook_resid_post_l1.json +45 -0
  34. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/65_high-school_blocks.12.hook_resid_post_l1.json +45 -0
  35. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/66_living-room_blocks.12.hook_resid_post_l1.json +45 -0
  36. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/67_social-security_blocks.12.hook_resid_post_l1.json +45 -0
  37. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/68_credit-card_blocks.12.hook_resid_post_l1.json +45 -0
  38. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/69_blood-pressure_blocks.12.hook_resid_post_l1.json +45 -0
  39. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/6_hist_fig_isamerican_blocks.12.hook_resid_post_l1.json +45 -0
  40. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/70_prime-factors_blocks.12.hook_resid_post_l1.json +45 -0
  41. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/71_social-media_blocks.12.hook_resid_post_l1.json +45 -0
  42. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/72_gene-expression_blocks.12.hook_resid_post_l1.json +45 -0
  43. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/73_control-group_blocks.12.hook_resid_post_l1.json +45 -0
  44. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/74_magnetic-field_blocks.12.hook_resid_post_l1.json +45 -0
  45. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/75_cell-lines_blocks.12.hook_resid_post_l1.json +45 -0
  46. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/76_trial-court_blocks.12.hook_resid_post_l1.json +45 -0
  47. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/7_hist_fig_ispolitician_blocks.12.hook_resid_post_l1.json +45 -0
  48. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/84_clinical-trials_blocks.12.hook_resid_post_l1.json +45 -0
  49. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/87_glue_cola_blocks.12.hook_resid_post_l1.json +45 -0
  50. denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/90_glue_qnli_blocks.12.hook_resid_post_l1.json +45 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
37
+ denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
38
+ denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
39
+ denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
40
+ denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
41
+ denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.2/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
42
+ denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
43
+ denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
44
+ denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.2/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
45
+ denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.3/saebench_autointerp_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5abd351e928eaf1d81a02b647349cb65421e657e8a6c791bf46f09a85db94594
3
+ size 604251536
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/saebench_autointerp_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb299406ed1b213b7f390899999703a6c8a82e53059487db264429a24261e567
3
+ size 27888928
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.7/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2814dec8fb637346e78190dd01e3d74347a92f904a3866ac6588efd53d22ee7b
3
+ size 131152
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b721cae653319ba5eb75bc917aaa95dae55e7c5a610bbe720be119066181d1d
3
+ size 604251536
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/saebench_autointerp_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98f36d48cc9e5d7bde8a6f3df1bf29dd00c4ebf269b39cbb70118e5d47852dfa
3
+ size 27719786
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-150/seed-0/model.layers.12/w-32768/t-300M/l0-143.3/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ab44a988479caddaebb0eb1bbdb32990058650cadc59855bb7927d5b09ce65
3
+ size 131152
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1dbd24acb2c90312d64988baa91252ad61aa3fc0ef989bfbb0475aeb834137
3
+ size 604251536
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/saebench_autointerp_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be94929205776f9fed6ce15f3a79d9d2a2f5413c15c832117434e7e4907c207d
3
+ size 27351353
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-200/seed-0/model.layers.12/w-32768/t-300M/l0-193.4/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b71e91cde1c4f455809be25ac14254897a6fcbffd3b04cf0c9b6e15f0f7a8a
3
+ size 131152
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17b8a3fff788c10c6df323d8fb6499e9e53a1456fea66d3cc2559614fca7f10
3
+ size 604251536
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/saebench_autointerp_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9515327a674ad5d754d709b7bd1daca2d71e969e31706a8056cb509e4785329
3
+ size 27709911
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-250/seed-0/model.layers.12/w-32768/t-300M/l0-243.5/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3c1313dd62815a766eec4e1163638f4397af07639f4c95e2e3e8c170891076
3
+ size 131152
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79da8ed5cfca009472b6462c0d93db81c082bfe9aed7e8fe0356315270c3b8d
3
+ size 604251536
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/saebench_autointerp_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6e79b4958abb42792b44bd0d045f061f1dd18067d35121af0b3621503af5075
3
+ size 27597584
denoised/gemma-2-2b-btk-softmax_topp_zero-p0.8/k-50/seed-0/model.layers.12/w-32768/t-300M/l0-44.8/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99259f3212552e05a751a822ee0703ae66673e95c41a605035f0cc0a3e293f3
3
+ size 131152
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/41_truthqa_tf_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.535068825510002,
4
+ "test_acc": 0.5353535353535354,
5
+ "test_auc": 0.5893877551020408,
6
+ "val_auc": 0.6009634684865516,
7
+ "k": 1,
8
+ "dataset": "41_truthqa_tf",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 22911
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.6347975240510105,
18
+ "test_acc": 0.6363636363636364,
19
+ "test_auc": 0.7191836734693877,
20
+ "val_auc": 0.8446407065435568,
21
+ "k": 16,
22
+ "dataset": "41_truthqa_tf",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 22911,
28
+ 16807,
29
+ 26727,
30
+ 2786,
31
+ 5565,
32
+ 26735,
33
+ 18696,
34
+ 19848,
35
+ 25584,
36
+ 5237,
37
+ 20014,
38
+ 12055,
39
+ 19708,
40
+ 23700,
41
+ 2046,
42
+ 27474
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/44_phys_tf_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.6273262844661285,
4
+ "test_acc": 0.6274213836477988,
5
+ "test_auc": 0.6637401155802454,
6
+ "val_auc": 0.5720288115246098,
7
+ "k": 1,
8
+ "dataset": "44_phys_tf",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 19063
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.6392422692148372,
18
+ "test_acc": 0.639245283018868,
19
+ "test_auc": 0.7066322190819805,
20
+ "val_auc": 0.6398559423769506,
21
+ "k": 16,
22
+ "dataset": "44_phys_tf",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 19063,
28
+ 29863,
29
+ 25223,
30
+ 595,
31
+ 923,
32
+ 19463,
33
+ 32491,
34
+ 28979,
35
+ 27330,
36
+ 10632,
37
+ 20380,
38
+ 4276,
39
+ 23903,
40
+ 3081,
41
+ 31295,
42
+ 2051
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/48_cm_correct_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.5575186765570808,
4
+ "test_acc": 0.579874213836478,
5
+ "test_auc": 0.5920507949559459,
6
+ "val_auc": 0.611844737895158,
7
+ "k": 1,
8
+ "dataset": "48_cm_correct",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 19063
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.6291969408048274,
18
+ "test_acc": 0.6294339622641509,
19
+ "test_auc": 0.7149887244959441,
20
+ "val_auc": 0.7511004401760704,
21
+ "k": 16,
22
+ "dataset": "48_cm_correct",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 19063,
28
+ 11294,
29
+ 31233,
30
+ 5988,
31
+ 19157,
32
+ 941,
33
+ 11274,
34
+ 7405,
35
+ 15288,
36
+ 28979,
37
+ 17637,
38
+ 29710,
39
+ 29762,
40
+ 24931,
41
+ 1588,
42
+ 3153
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/49_cm_isshort_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9962264050636651,
4
+ "test_acc": 0.9962264150943396,
5
+ "test_auc": 0.9972265652293225,
6
+ "val_auc": 1.0,
7
+ "k": 1,
8
+ "dataset": "49_cm_isshort",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 29214
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9989937106918239,
18
+ "test_acc": 0.9989937106918239,
19
+ "test_auc": 0.999987089117493,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "49_cm_isshort",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 29214,
28
+ 1876,
29
+ 5608,
30
+ 26735,
31
+ 16791,
32
+ 6125,
33
+ 28803,
34
+ 4096,
35
+ 13371,
36
+ 8867,
37
+ 31084,
38
+ 1255,
39
+ 3153,
40
+ 16191,
41
+ 27330,
42
+ 21571
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/50_deon_isvalid_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.6183594594341082,
4
+ "test_acc": 0.6264150943396226,
5
+ "test_auc": 0.6397195452534027,
6
+ "val_auc": 0.689875950380152,
7
+ "k": 1,
8
+ "dataset": "50_deon_isvalid",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 19063
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.698576863629426,
18
+ "test_acc": 0.6986163522012578,
19
+ "test_auc": 0.7741818297808998,
20
+ "val_auc": 0.8167266906762705,
21
+ "k": 16,
22
+ "dataset": "50_deon_isvalid",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 19063,
28
+ 26924,
29
+ 28979,
30
+ 24931,
31
+ 21879,
32
+ 24844,
33
+ 4096,
34
+ 21571,
35
+ 24699,
36
+ 7405,
37
+ 10803,
38
+ 29710,
39
+ 25223,
40
+ 27781,
41
+ 1379,
42
+ 6125
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/52_virtue_is_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.7910570865188057,
4
+ "test_acc": 0.7910643889618922,
5
+ "test_auc": 0.8565597073191756,
6
+ "val_auc": 0.7932000000000001,
7
+ "k": 1,
8
+ "dataset": "52_virtue_is",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 2789
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.844654857611397,
18
+ "test_acc": 0.8446780551905387,
19
+ "test_auc": 0.917528524610817,
20
+ "val_auc": 0.8752,
21
+ "k": 16,
22
+ "dataset": "52_virtue_is",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 2789,
28
+ 7473,
29
+ 1556,
30
+ 14641,
31
+ 32430,
32
+ 8333,
33
+ 21738,
34
+ 9410,
35
+ 19291,
36
+ 9734,
37
+ 21086,
38
+ 5709,
39
+ 31931,
40
+ 5185,
41
+ 24006,
42
+ 22146
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/54_cs_tf_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.5155798695295019,
4
+ "test_acc": 0.5285534591194968,
5
+ "test_auc": 0.5487289615903777,
6
+ "val_auc": 0.5906362545018007,
7
+ "k": 1,
8
+ "dataset": "54_cs_tf",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 24699
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.5686057910066935,
18
+ "test_acc": 0.5690566037735849,
19
+ "test_auc": 0.6040757630837871,
20
+ "val_auc": 0.6842737094837936,
21
+ "k": 16,
22
+ "dataset": "54_cs_tf",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 24699,
28
+ 24732,
29
+ 5608,
30
+ 16791,
31
+ 19063,
32
+ 12753,
33
+ 7305,
34
+ 11274,
35
+ 1473,
36
+ 7518,
37
+ 19848,
38
+ 12563,
39
+ 2776,
40
+ 4956,
41
+ 1588,
42
+ 3134
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/56_wikidatasex_or_gender_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9916978231678448,
4
+ "test_acc": 0.9916981132075472,
5
+ "test_auc": 0.9971356827426563,
6
+ "val_auc": 0.9995998399359743,
7
+ "k": 1,
8
+ "dataset": "56_wikidatasex_or_gender",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 13440
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.99522012518114,
18
+ "test_acc": 0.9952201257861635,
19
+ "test_auc": 0.9995878643780145,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "56_wikidatasex_or_gender",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 13440,
28
+ 16807,
29
+ 13590,
30
+ 8117,
31
+ 4211,
32
+ 13296,
33
+ 8781,
34
+ 6986,
35
+ 5074,
36
+ 7989,
37
+ 30776,
38
+ 8743,
39
+ 25310,
40
+ 27478,
41
+ 5676,
42
+ 24080
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/57_wikidatais_alive_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8129862271548718,
4
+ "test_acc": 0.8153459119496855,
5
+ "test_auc": 0.8342447741304395,
6
+ "val_auc": 0.7919167667066827,
7
+ "k": 1,
8
+ "dataset": "57_wikidatais_alive",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 7989
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9625067489398731,
18
+ "test_acc": 0.9625157232704402,
19
+ "test_auc": 0.9928818001111854,
20
+ "val_auc": 0.9839935974389755,
21
+ "k": 16,
22
+ "dataset": "57_wikidatais_alive",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 7989,
28
+ 24080,
29
+ 30776,
30
+ 13042,
31
+ 16807,
32
+ 32185,
33
+ 5676,
34
+ 8117,
35
+ 13590,
36
+ 11333,
37
+ 29789,
38
+ 25750,
39
+ 5736,
40
+ 3155,
41
+ 13296,
42
+ 25310
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/58_wikidatapolitical_party_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8559719312637949,
4
+ "test_acc": 0.8572151898734177,
5
+ "test_auc": 0.867901135818269,
6
+ "val_auc": 0.8399359743897558,
7
+ "k": 1,
8
+ "dataset": "58_wikidatapolitical_party",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 20868
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8712420160105138,
18
+ "test_acc": 0.8713924050632912,
19
+ "test_auc": 0.942794281120149,
20
+ "val_auc": 0.9579831932773109,
21
+ "k": 16,
22
+ "dataset": "58_wikidatapolitical_party",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 20868,
28
+ 10602,
29
+ 4211,
30
+ 26855,
31
+ 963,
32
+ 27478,
33
+ 21898,
34
+ 23966,
35
+ 6179,
36
+ 19463,
37
+ 23231,
38
+ 18412,
39
+ 5605,
40
+ 4347,
41
+ 29018,
42
+ 1911
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/59_wikidata_occupation_isjournalist_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.7470021225559578,
4
+ "test_acc": 0.7497435897435898,
5
+ "test_auc": 0.7856187093950922,
6
+ "val_auc": 0.729906160750714,
7
+ "k": 1,
8
+ "dataset": "59_wikidata_occupation_isjournalist",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 8117
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.856457512639249,
18
+ "test_acc": 0.8574358974358974,
19
+ "test_auc": 0.9152472481233379,
20
+ "val_auc": 0.8935128518971848,
21
+ "k": 16,
22
+ "dataset": "59_wikidata_occupation_isjournalist",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 8117,
28
+ 16380,
29
+ 22302,
30
+ 8467,
31
+ 22322,
32
+ 9062,
33
+ 21898,
34
+ 17208,
35
+ 30776,
36
+ 28542,
37
+ 16418,
38
+ 26735,
39
+ 32185,
40
+ 5684,
41
+ 27972,
42
+ 22911
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/5_hist_fig_ismale_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8880076430409011,
4
+ "test_acc": 0.8885534591194969,
5
+ "test_auc": 0.9005940524880537,
6
+ "val_auc": 0.931172468987595,
7
+ "k": 1,
8
+ "dataset": "5_hist_fig_ismale",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 13440
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9159623333124473,
18
+ "test_acc": 0.9159748427672956,
19
+ "test_auc": 0.9796620690423365,
20
+ "val_auc": 0.9903961584633854,
21
+ "k": 16,
22
+ "dataset": "5_hist_fig_ismale",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 13440,
28
+ 8743,
29
+ 19252,
30
+ 15635,
31
+ 22911,
32
+ 16964,
33
+ 30776,
34
+ 10799,
35
+ 12400,
36
+ 29782,
37
+ 27398,
38
+ 14693,
39
+ 29612,
40
+ 22926,
41
+ 16170,
42
+ 29742
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/60_wikidata_occupation_isathlete_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8458515648432096,
4
+ "test_acc": 0.8471794871794872,
5
+ "test_auc": 0.8640724408388595,
6
+ "val_auc": 0.8653610771113831,
7
+ "k": 1,
8
+ "dataset": "60_wikidata_occupation_isathlete",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 4645
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8728915459038216,
18
+ "test_acc": 0.8738461538461538,
19
+ "test_auc": 0.9110731477429562,
20
+ "val_auc": 0.9184006527947776,
21
+ "k": 16,
22
+ "dataset": "60_wikidata_occupation_isathlete",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 4645,
28
+ 22911,
29
+ 25750,
30
+ 21898,
31
+ 17208,
32
+ 4723,
33
+ 21085,
34
+ 21722,
35
+ 5074,
36
+ 25310,
37
+ 26176,
38
+ 6986,
39
+ 7897,
40
+ 212,
41
+ 18921,
42
+ 27304
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/61_wikidata_occupation_isactor_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.7986625911999248,
4
+ "test_acc": 0.8020512820512821,
5
+ "test_auc": 0.8293457770895747,
6
+ "val_auc": 0.8006935944512443,
7
+ "k": 1,
8
+ "dataset": "61_wikidata_occupation_isactor",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 32185
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8631622511194406,
18
+ "test_acc": 0.8635897435897436,
19
+ "test_auc": 0.9220427845288989,
20
+ "val_auc": 0.9530803753569972,
21
+ "k": 16,
22
+ "dataset": "61_wikidata_occupation_isactor",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 32185,
28
+ 25321,
29
+ 17208,
30
+ 19428,
31
+ 25750,
32
+ 5736,
33
+ 16885,
34
+ 6179,
35
+ 11918,
36
+ 25083,
37
+ 26240,
38
+ 8467,
39
+ 12261,
40
+ 6146,
41
+ 26735,
42
+ 21898
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/62_wikidata_occupation_ispolitician_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.7208036118609906,
4
+ "test_acc": 0.721025641025641,
5
+ "test_auc": 0.765324670952974,
6
+ "val_auc": 0.7350061199510405,
7
+ "k": 1,
8
+ "dataset": "62_wikidata_occupation_ispolitician",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 16807
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8523011667096096,
18
+ "test_acc": 0.8523076923076923,
19
+ "test_auc": 0.9241971589187732,
20
+ "val_auc": 0.8996328029375764,
21
+ "k": 16,
22
+ "dataset": "62_wikidata_occupation_ispolitician",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 16807,
28
+ 1683,
29
+ 25750,
30
+ 13296,
31
+ 13590,
32
+ 29782,
33
+ 19463,
34
+ 32185,
35
+ 16179,
36
+ 13042,
37
+ 6986,
38
+ 16564,
39
+ 27972,
40
+ 5676,
41
+ 5074,
42
+ 16964
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/63_wikidata_occupation_issinger_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8198099470622835,
4
+ "test_acc": 0.8215384615384616,
5
+ "test_auc": 0.8389394755444844,
6
+ "val_auc": 0.8182374541003672,
7
+ "k": 1,
8
+ "dataset": "63_wikidata_occupation_issinger",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 10854
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8456892963970287,
18
+ "test_acc": 0.8461538461538461,
19
+ "test_auc": 0.9010229070589424,
20
+ "val_auc": 0.9290085679314567,
21
+ "k": 16,
22
+ "dataset": "63_wikidata_occupation_issinger",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 10854,
28
+ 26793,
29
+ 25750,
30
+ 13296,
31
+ 9062,
32
+ 19253,
33
+ 4210,
34
+ 11048,
35
+ 22911,
36
+ 8781,
37
+ 26379,
38
+ 23205,
39
+ 8137,
40
+ 16885,
41
+ 18418,
42
+ 4684
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/64_wikidata_occupation_isresearcher_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.809395423339481,
4
+ "test_acc": 0.8096256684491978,
5
+ "test_auc": 0.8256396529951135,
6
+ "val_auc": 0.7993939393939394,
7
+ "k": 1,
8
+ "dataset": "64_wikidata_occupation_isresearcher",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 16885
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.8930393191131495,
18
+ "test_acc": 0.893048128342246,
19
+ "test_auc": 0.951993996961877,
20
+ "val_auc": 0.9612121212121213,
21
+ "k": 16,
22
+ "dataset": "64_wikidata_occupation_isresearcher",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 16885,
28
+ 21722,
29
+ 7897,
30
+ 22302,
31
+ 9062,
32
+ 16807,
33
+ 5074,
34
+ 24080,
35
+ 29789,
36
+ 22911,
37
+ 25750,
38
+ 15785,
39
+ 6729,
40
+ 24699,
41
+ 31106,
42
+ 12885
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/65_high-school_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9737927932131458,
4
+ "test_acc": 0.9737931034482759,
5
+ "test_auc": 0.9941994967260133,
6
+ "val_auc": 1.0,
7
+ "k": 1,
8
+ "dataset": "65_high-school",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 15746
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9788503869393446,
18
+ "test_acc": 0.9788505747126437,
19
+ "test_auc": 0.9957908301315006,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "65_high-school",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 15746,
28
+ 24665,
29
+ 16885,
30
+ 6986,
31
+ 23717,
32
+ 26491,
33
+ 31598,
34
+ 3081,
35
+ 29782,
36
+ 11717,
37
+ 13371,
38
+ 8771,
39
+ 27503,
40
+ 31341,
41
+ 5163,
42
+ 13662
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/66_living-room_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9660773853725189,
4
+ "test_acc": 0.9660774008600096,
5
+ "test_auc": 0.9853355028753736,
6
+ "val_auc": 0.9987129987129987,
7
+ "k": 1,
8
+ "dataset": "66_living-room",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 9197
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9904439412521912,
18
+ "test_acc": 0.9904443382704252,
19
+ "test_auc": 0.9983509289036689,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "66_living-room",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 9197,
28
+ 21851,
29
+ 30799,
30
+ 16885,
31
+ 6787,
32
+ 27207,
33
+ 20677,
34
+ 8001,
35
+ 1193,
36
+ 595,
37
+ 20435,
38
+ 15141,
39
+ 13699,
40
+ 9203,
41
+ 10642,
42
+ 14573
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/67_social-security_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9926436128484677,
4
+ "test_acc": 0.9926436781609196,
5
+ "test_auc": 0.9989346014394718,
6
+ "val_auc": 1.0,
7
+ "k": 1,
8
+ "dataset": "67_social-security",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 687
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9940229859788012,
18
+ "test_acc": 0.9940229885057471,
19
+ "test_auc": 0.998727440608258,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "67_social-security",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 687,
28
+ 756,
29
+ 16885,
30
+ 27474,
31
+ 21160,
32
+ 17989,
33
+ 13384,
34
+ 1503,
35
+ 3778,
36
+ 16896,
37
+ 5163,
38
+ 1185,
39
+ 16094,
40
+ 31957,
41
+ 26491,
42
+ 7029
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/68_credit-card_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9627469502282924,
4
+ "test_acc": 0.9627586206896551,
5
+ "test_auc": 0.9879745251366416,
6
+ "val_auc": 0.9655032467532467,
7
+ "k": 1,
8
+ "dataset": "68_credit-card",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 32204
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9788503243482447,
18
+ "test_acc": 0.9788505747126437,
19
+ "test_auc": 0.996735314410953,
20
+ "val_auc": 0.9736201298701298,
21
+ "k": 16,
22
+ "dataset": "68_credit-card",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 32204,
28
+ 16885,
29
+ 6986,
30
+ 28026,
31
+ 27331,
32
+ 11250,
33
+ 4211,
34
+ 8781,
35
+ 1531,
36
+ 963,
37
+ 23419,
38
+ 8137,
39
+ 26713,
40
+ 20435,
41
+ 9772,
42
+ 32486
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/69_blood-pressure_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9935631421929442,
4
+ "test_acc": 0.9935632183908046,
5
+ "test_auc": 0.9996922181936252,
6
+ "val_auc": 0.9951298701298701,
7
+ "k": 1,
8
+ "dataset": "69_blood-pressure",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 26075
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9954022871877355,
18
+ "test_acc": 0.9954022988505747,
19
+ "test_auc": 0.9998511824232913,
20
+ "val_auc": 0.9979707792207793,
21
+ "k": 16,
22
+ "dataset": "69_blood-pressure",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 26075,
28
+ 10769,
29
+ 25545,
30
+ 16885,
31
+ 29067,
32
+ 18310,
33
+ 28026,
34
+ 28175,
35
+ 8412,
36
+ 19303,
37
+ 27474,
38
+ 6986,
39
+ 25584,
40
+ 20937,
41
+ 8013,
42
+ 4211
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/6_hist_fig_isamerican_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.7377335329080565,
4
+ "test_acc": 0.7411320754716981,
5
+ "test_auc": 0.7800188144468219,
6
+ "val_auc": 0.7923169267707083,
7
+ "k": 1,
8
+ "dataset": "6_hist_fig_isamerican",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 29789
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9197209813352412,
18
+ "test_acc": 0.919748427672956,
19
+ "test_auc": 0.9631196843871482,
20
+ "val_auc": 0.9853941576630653,
21
+ "k": 16,
22
+ "dataset": "6_hist_fig_isamerican",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 29789,
28
+ 31884,
29
+ 22302,
30
+ 29742,
31
+ 5684,
32
+ 7897,
33
+ 27972,
34
+ 5794,
35
+ 11333,
36
+ 31106,
37
+ 16964,
38
+ 3988,
39
+ 6179,
40
+ 21124,
41
+ 30073,
42
+ 8897
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/70_prime-factors_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.999080459770115,
4
+ "test_acc": 0.999080459770115,
5
+ "test_auc": 0.9999915444558688,
6
+ "val_auc": 1.0,
7
+ "k": 1,
8
+ "dataset": "70_prime-factors",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 19329
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9995402298850574,
18
+ "test_acc": 0.9995402298850574,
19
+ "test_auc": 0.9999932355646951,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "70_prime-factors",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 19329,
28
+ 23875,
29
+ 7948,
30
+ 5342,
31
+ 32434,
32
+ 24227,
33
+ 24075,
34
+ 13708,
35
+ 21709,
36
+ 24962,
37
+ 16885,
38
+ 25584,
39
+ 31654,
40
+ 9255,
41
+ 31005,
42
+ 14612
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/71_social-media_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9885047265436513,
4
+ "test_acc": 0.9885057471264368,
5
+ "test_auc": 0.9956124181503327,
6
+ "val_auc": 1.0,
7
+ "k": 1,
8
+ "dataset": "71_social-media",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 5476
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9912642348581793,
18
+ "test_acc": 0.991264367816092,
19
+ "test_auc": 0.9983105822825912,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "71_social-media",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 5476,
28
+ 20088,
29
+ 13672,
30
+ 16885,
31
+ 6986,
32
+ 11592,
33
+ 20998,
34
+ 20588,
35
+ 2105,
36
+ 30096,
37
+ 26523,
38
+ 16807,
39
+ 31598,
40
+ 12531,
41
+ 24795,
42
+ 20642
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/72_gene-expression_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8337075552390707,
4
+ "test_acc": 0.8344827586206897,
5
+ "test_auc": 0.9071593937713078,
6
+ "val_auc": 0.8687094155844156,
7
+ "k": 1,
8
+ "dataset": "72_gene-expression",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 31810
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.951723709319247,
18
+ "test_acc": 0.9517241379310345,
19
+ "test_auc": 0.9899505858000974,
20
+ "val_auc": 0.9910714285714286,
21
+ "k": 16,
22
+ "dataset": "72_gene-expression",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 31810,
28
+ 6986,
29
+ 16885,
30
+ 31498,
31
+ 13134,
32
+ 25556,
33
+ 23010,
34
+ 28546,
35
+ 26855,
36
+ 11592,
37
+ 2017,
38
+ 25584,
39
+ 21200,
40
+ 13590,
41
+ 26523,
42
+ 4502
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/73_control-group_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9006075873834957,
4
+ "test_acc": 0.9006896551724138,
5
+ "test_auc": 0.9346902226852102,
6
+ "val_auc": 0.9109172077922078,
7
+ "k": 1,
8
+ "dataset": "73_control-group",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 8980
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9613784122849459,
18
+ "test_acc": 0.9613793103448276,
19
+ "test_auc": 0.9867806023053196,
20
+ "val_auc": 0.9963474025974025,
21
+ "k": 16,
22
+ "dataset": "73_control-group",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 8980,
28
+ 7453,
29
+ 6986,
30
+ 6638,
31
+ 27478,
32
+ 2819,
33
+ 30994,
34
+ 7454,
35
+ 15785,
36
+ 29194,
37
+ 9805,
38
+ 18978,
39
+ 11973,
40
+ 20596,
41
+ 11059,
42
+ 31066
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/74_magnetic-field_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9512528061366141,
4
+ "test_acc": 0.9512643678160919,
5
+ "test_auc": 0.9819770076843984,
6
+ "val_auc": 0.9821428571428572,
7
+ "k": 1,
8
+ "dataset": "74_magnetic-field",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 31304
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.964124872203006,
18
+ "test_acc": 0.9641379310344828,
19
+ "test_auc": 0.9902955720006493,
20
+ "val_auc": 0.9939123376623378,
21
+ "k": 16,
22
+ "dataset": "74_magnetic-field",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 31304,
28
+ 6986,
29
+ 4211,
30
+ 10769,
31
+ 7518,
32
+ 10141,
33
+ 16885,
34
+ 11063,
35
+ 25584,
36
+ 27609,
37
+ 12768,
38
+ 29793,
39
+ 13310,
40
+ 4543,
41
+ 26849,
42
+ 27177
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/75_cell-lines_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.975629577196123,
4
+ "test_acc": 0.975632183908046,
5
+ "test_auc": 0.9924305968937712,
6
+ "val_auc": 0.9935064935064934,
7
+ "k": 1,
8
+ "dataset": "75_cell-lines",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 31765
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9829882468290856,
18
+ "test_acc": 0.9829885057471265,
19
+ "test_auc": 0.9970989028085935,
20
+ "val_auc": 0.9963474025974026,
21
+ "k": 16,
22
+ "dataset": "75_cell-lines",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 31765,
28
+ 10716,
29
+ 28776,
30
+ 15464,
31
+ 26830,
32
+ 29782,
33
+ 28309,
34
+ 6986,
35
+ 24006,
36
+ 26129,
37
+ 6787,
38
+ 2430,
39
+ 5938,
40
+ 4121,
41
+ 27478,
42
+ 14329
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/76_trial-court_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.9848264892438243,
4
+ "test_acc": 0.9848275862068966,
5
+ "test_auc": 0.9968887825369338,
6
+ "val_auc": 0.9983766233766234,
7
+ "k": 1,
8
+ "dataset": "76_trial-court",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 23599
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9889653073070783,
18
+ "test_acc": 0.9889655172413793,
19
+ "test_auc": 0.9981473902808593,
20
+ "val_auc": 1.0,
21
+ "k": 16,
22
+ "dataset": "76_trial-court",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 23599,
28
+ 21889,
29
+ 781,
30
+ 17946,
31
+ 8713,
32
+ 16885,
33
+ 31579,
34
+ 17155,
35
+ 6986,
36
+ 30437,
37
+ 23645,
38
+ 21695,
39
+ 25226,
40
+ 19463,
41
+ 8220,
42
+ 26523
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/7_hist_fig_ispolitician_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.5707146755861021,
4
+ "test_acc": 0.5773584905660377,
5
+ "test_auc": 0.5949865017989163,
6
+ "val_auc": 0.6122448979591837,
7
+ "k": 1,
8
+ "dataset": "7_hist_fig_ispolitician",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 30776
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.6271556532123238,
18
+ "test_acc": 0.6271698113207547,
19
+ "test_auc": 0.6715824640849628,
20
+ "val_auc": 0.6746698679471789,
21
+ "k": 16,
22
+ "dataset": "7_hist_fig_ispolitician",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 30776,
28
+ 16964,
29
+ 23393,
30
+ 25584,
31
+ 16807,
32
+ 22911,
33
+ 19252,
34
+ 19414,
35
+ 6787,
36
+ 19529,
37
+ 29782,
38
+ 17052,
39
+ 16170,
40
+ 8897,
41
+ 6729,
42
+ 32207
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/84_clinical-trials_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.8094194116514002,
4
+ "test_acc": 0.8096551724137931,
5
+ "test_auc": 0.8932149331673791,
6
+ "val_auc": 0.8946834415584415,
7
+ "k": 1,
8
+ "dataset": "84_clinical-trials",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 4804
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.9562943999682111,
18
+ "test_acc": 0.9563218390804598,
19
+ "test_auc": 0.9889638237999892,
20
+ "val_auc": 0.9849837662337663,
21
+ "k": 16,
22
+ "dataset": "84_clinical-trials",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 4804,
28
+ 16885,
29
+ 6986,
30
+ 16141,
31
+ 11874,
32
+ 18282,
33
+ 4810,
34
+ 2936,
35
+ 30326,
36
+ 4891,
37
+ 30421,
38
+ 11321,
39
+ 19463,
40
+ 11592,
41
+ 23667,
42
+ 15166
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/87_glue_cola_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.6393833901698838,
4
+ "test_acc": 0.6405031446540881,
5
+ "test_auc": 0.6659348390291422,
6
+ "val_auc": 0.5966386554621849,
7
+ "k": 1,
8
+ "dataset": "87_glue_cola",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 13024
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.6978015977397342,
18
+ "test_acc": 0.6991194968553459,
19
+ "test_auc": 0.7707565220209025,
20
+ "val_auc": 0.7511004401760705,
21
+ "k": 16,
22
+ "dataset": "87_glue_cola",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 13024,
28
+ 9734,
29
+ 30525,
30
+ 18826,
31
+ 21391,
32
+ 24006,
33
+ 29762,
34
+ 10528,
35
+ 20189,
36
+ 1588,
37
+ 12753,
38
+ 10803,
39
+ 12959,
40
+ 4096,
41
+ 14286,
42
+ 1306
43
+ ]
44
+ }
45
+ ]
denoised/gemma-2-2b-btk-vnorm_topp-p0.9/k-100/seed-0/model.layers.12/w-32768/t-300M/l0-93.5/sae_probes/sae_probes_gemma-2-2b/normal_setting/90_glue_qnli_blocks.12.hook_resid_post_l1.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "test_f1": 0.6796542660165885,
4
+ "test_acc": 0.6963522012578617,
5
+ "test_auc": 0.7019145572984966,
6
+ "val_auc": 0.6660664265706282,
7
+ "k": 1,
8
+ "dataset": "90_glue_qnli",
9
+ "hook_name": "blocks.12.hook_resid_post",
10
+ "reg_type": "l1",
11
+ "binarize": false,
12
+ "indices": [
13
+ 13220
14
+ ]
15
+ },
16
+ {
17
+ "test_f1": 0.7645983497467719,
18
+ "test_acc": 0.7650314465408805,
19
+ "test_auc": 0.8462053144230253,
20
+ "val_auc": 0.8147258903561424,
21
+ "k": 16,
22
+ "dataset": "90_glue_qnli",
23
+ "hook_name": "blocks.12.hook_resid_post",
24
+ "reg_type": "l1",
25
+ "binarize": false,
26
+ "indices": [
27
+ 13220,
28
+ 15387,
29
+ 9734,
30
+ 22755,
31
+ 19157,
32
+ 11274,
33
+ 17984,
34
+ 26891,
35
+ 10708,
36
+ 16807,
37
+ 24699,
38
+ 30983,
39
+ 1814,
40
+ 25884,
41
+ 14286,
42
+ 18904
43
+ ]
44
+ }
45
+ ]