Phase 5 release: 7 TopK SAEs + specificity / null-steering JSON artifacts
Browse files- .gitattributes +2 -34
- README.md +74 -0
- adhd_L1_hook_resid_post/config.json +19 -0
- adhd_L1_hook_resid_post/history.json +106 -0
- adhd_L1_hook_resid_post/sae.pt +3 -0
- adhd_L2_hook_resid_post/config.json +19 -0
- adhd_L2_hook_resid_post/history.json +106 -0
- adhd_L2_hook_resid_post/sae.pt +3 -0
- adhd_L3_hook_resid_post/config.json +19 -0
- adhd_L3_hook_resid_post/history.json +106 -0
- adhd_L3_hook_resid_post/sae.pt +3 -0
- causal_nulls_per_seed.json +104 -0
- deepdive_steering.json +238 -0
- feature_diff.json +120 -0
- loading_example.py +10 -0
- standard_L0_hook_resid_post/analysis.json +444 -0
- standard_L0_hook_resid_post/config.json +22 -0
- standard_L0_hook_resid_post/history.json +38 -0
- standard_L0_hook_resid_post/sae.pt +3 -0
- standard_L1_hook_resid_post/config.json +19 -0
- standard_L1_hook_resid_post/history.json +106 -0
- standard_L1_hook_resid_post/sae.pt +3 -0
- standard_L2_hook_resid_post/config.json +19 -0
- standard_L2_hook_resid_post/history.json +106 -0
- standard_L2_hook_resid_post/sae.pt +3 -0
- standard_L3_hook_resid_post/config.json +19 -0
- standard_L3_hook_resid_post/history.json +106 -0
- standard_L3_hook_resid_post/sae.pt +3 -0
- three_probes.json +62 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,3 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
|
| 27 |
-
*.
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: sae_lens
|
| 4 |
+
tags:
|
| 5 |
+
- interpretability
|
| 6 |
+
- sparse-autoencoder
|
| 7 |
+
- sae
|
| 8 |
+
- mechanistic-interpretability
|
| 9 |
+
- topk-sae
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# InterpGPT — Phase 5 TopK SAEs
|
| 13 |
+
|
| 14 |
+
Seven sparse autoencoders trained on the residual stream
|
| 15 |
+
(`hook_resid_post`) of the two Phase 1 InterpGPT models
|
| 16 |
+
([`interpgpt-standard-23M`](https://huggingface.co/connaaa/interpgpt-standard-23M),
|
| 17 |
+
[`interpgpt-adhd-23M`](https://huggingface.co/connaaa/interpgpt-adhd-23M)).
|
| 18 |
+
|
| 19 |
+
| Model | Layer | Hook | Subdir |
|
| 20 |
+
|---|---|---|---|
|
| 21 |
+
| standard | 0 | hook_resid_post | `standard_L0_hook_resid_post/` |
|
| 22 |
+
| standard | 1 | hook_resid_post | `standard_L1_hook_resid_post/` |
|
| 23 |
+
| standard | 2 | hook_resid_post | `standard_L2_hook_resid_post/` |
|
| 24 |
+
| standard | 3 | hook_resid_post | `standard_L3_hook_resid_post/` |
|
| 25 |
+
| adhd | 1 | hook_resid_post | `adhd_L1_hook_resid_post/` |
|
| 26 |
+
| adhd | 2 | hook_resid_post | `adhd_L2_hook_resid_post/` |
|
| 27 |
+
| adhd | 3 | hook_resid_post | `adhd_L3_hook_resid_post/` |
|
| 28 |
+
|
| 29 |
+
## Training setup
|
| 30 |
+
|
| 31 |
+
- Library: [`sae_lens`](https://github.com/jbloomAus/SAELens) TopK training SAE
|
| 32 |
+
- `k = 40`, `d_sae = 4096`
|
| 33 |
+
- All 7 SAEs pass quality gates: FVE 0.87–0.92, dead features < 2%
|
| 34 |
+
|
| 35 |
+
## Phase 1 result artifacts (included)
|
| 36 |
+
|
| 37 |
+
- `feature_diff.json` — 312 ADHD-L2 features firing at step-onset that the
|
| 38 |
+
standard model lacks. Feature 2504 highlighted (2000× cross-model asymmetry).
|
| 39 |
+
- `causal_nulls_per_seed.json` — 5-seed causal ablation nulls for the L3 swap.
|
| 40 |
+
- `deepdive_steering.json` — feature 2504 four-panel steering results (all four
|
| 41 |
+
interventions Δ within ±0.025 of null, below 2 SEM).
|
| 42 |
+
- `three_probes.json` — three-probe causal-check outputs.
|
| 43 |
+
|
| 44 |
+
## Loading
|
| 45 |
+
|
| 46 |
+
### Minimal
|
| 47 |
+
|
| 48 |
+
```python
|
| 49 |
+
from huggingface_hub import snapshot_download
|
| 50 |
+
from sae_lens import SAE
|
| 51 |
+
|
| 52 |
+
repo = "connaaa/interpgpt-sae-phase5"
|
| 53 |
+
local = snapshot_download(repo_id=repo, allow_patterns=["adhd_L2_hook_resid_post/*"])
|
| 54 |
+
sae = SAE.load_from_disk(f"{local}/adhd_L2_hook_resid_post")
|
| 55 |
+
print(sae)
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
### Pull everything
|
| 59 |
+
|
| 60 |
+
```python
|
| 61 |
+
from huggingface_hub import snapshot_download
|
| 62 |
+
local = snapshot_download(repo_id="connaaa/interpgpt-sae-phase5")
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
## Reproducibility
|
| 66 |
+
|
| 67 |
+
Training script: `phase5_sae.py` in
|
| 68 |
+
[github.com/cwklurks/interpgpt](https://github.com/cwklurks/interpgpt).
|
| 69 |
+
Production driver: `phase5_production.py`. Four-panel steering harness:
|
| 70 |
+
`phase5_steering_ci.py`.
|
| 71 |
+
|
| 72 |
+
## License
|
| 73 |
+
|
| 74 |
+
MIT.
|
adhd_L1_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "adhd",
|
| 3 |
+
"layer": 1,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 3.830482244491577,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.9020028367114843,
|
| 15 |
+
"n_dead": 5,
|
| 16 |
+
"mse": 0.0073381345719099045
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 1.3671875
|
| 19 |
+
}
|
adhd_L1_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 7.214208602905273,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.8173658289454013,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.014090251177549362
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 11.098737716674805,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8590923953729561,
|
| 15 |
+
"n_dead": 488,
|
| 16 |
+
"mse": 0.011645233258605003
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 7.969749450683594,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.8684529110409115,
|
| 23 |
+
"n_dead": 148,
|
| 24 |
+
"mse": 0.010252815671265125
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 5.30736780166626,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.8735112871918154,
|
| 31 |
+
"n_dead": 18,
|
| 32 |
+
"mse": 0.00969421211630106
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 4.6798553466796875,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.8860032616097528,
|
| 39 |
+
"n_dead": 4,
|
| 40 |
+
"mse": 0.0090001430362463
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 4.3763346672058105,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.8946085205816868,
|
| 47 |
+
"n_dead": 1,
|
| 48 |
+
"mse": 0.008514291606843472
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 4.231931209564209,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.8952190554998088,
|
| 55 |
+
"n_dead": 0,
|
| 56 |
+
"mse": 0.008265490643680096
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 4.067999839782715,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.8984853600784243,
|
| 63 |
+
"n_dead": 0,
|
| 64 |
+
"mse": 0.007945312187075615
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 3.9912521839141846,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.897026286041148,
|
| 71 |
+
"n_dead": 0,
|
| 72 |
+
"mse": 0.007795413956046104
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 3.987309694290161,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.9006864810283836,
|
| 79 |
+
"n_dead": 1,
|
| 80 |
+
"mse": 0.007757413201034069
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 3.8046491146087646,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.9039961047080753,
|
| 87 |
+
"n_dead": 1,
|
| 88 |
+
"mse": 0.007402045652270317
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 3.7656211853027344,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.9085856363510846,
|
| 95 |
+
"n_dead": 4,
|
| 96 |
+
"mse": 0.00724159087985754
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 3.830482244491577,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.9020028367114843,
|
| 103 |
+
"n_dead": 5,
|
| 104 |
+
"mse": 0.0073381345719099045
|
| 105 |
+
}
|
| 106 |
+
]
|
adhd_L1_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bfe52fb2ee7e5992b148d7aecd779fa71dfe2ebfd701619c93b3ebe01c7db11
|
| 3 |
+
size 16798005
|
adhd_L2_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "adhd",
|
| 3 |
+
"layer": 2,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 7.990331649780273,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8937416797380295,
|
| 15 |
+
"n_dead": 1,
|
| 16 |
+
"mse": 0.015545391477644444
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 1.46484375
|
| 19 |
+
}
|
adhd_L2_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 14.795553207397461,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.8293616575146343,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.028897564858198166
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 22.638526916503906,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8359017185027313,
|
| 15 |
+
"n_dead": 663,
|
| 16 |
+
"mse": 0.023937705904245377
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 19.389955520629883,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.8645987532704418,
|
| 23 |
+
"n_dead": 222,
|
| 24 |
+
"mse": 0.021326521411538124
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 11.63227653503418,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.8760963178028377,
|
| 31 |
+
"n_dead": 35,
|
| 32 |
+
"mse": 0.02005489356815815
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 9.950961112976074,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.8784041379676274,
|
| 39 |
+
"n_dead": 8,
|
| 40 |
+
"mse": 0.018850065767765045
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 9.30925178527832,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.8774098117815495,
|
| 47 |
+
"n_dead": 2,
|
| 48 |
+
"mse": 0.018041376024484634
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 8.932659149169922,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.8964732688104713,
|
| 55 |
+
"n_dead": 1,
|
| 56 |
+
"mse": 0.01737871766090393
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 8.559926986694336,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.8907664732630304,
|
| 63 |
+
"n_dead": 0,
|
| 64 |
+
"mse": 0.016718603670597076
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 8.413354873657227,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.8881362105445537,
|
| 71 |
+
"n_dead": 0,
|
| 72 |
+
"mse": 0.01643233373761177
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 8.195343017578125,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.8944998626527212,
|
| 79 |
+
"n_dead": 1,
|
| 80 |
+
"mse": 0.015944253653287888
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 8.069483757019043,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.9000171435610246,
|
| 87 |
+
"n_dead": 1,
|
| 88 |
+
"mse": 0.01569938287138939
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 7.750567436218262,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.9054223585460653,
|
| 95 |
+
"n_dead": 0,
|
| 96 |
+
"mse": 0.015137827023863792
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 7.990331649780273,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.8937416797380295,
|
| 103 |
+
"n_dead": 1,
|
| 104 |
+
"mse": 0.015545391477644444
|
| 105 |
+
}
|
| 106 |
+
]
|
adhd_L2_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:725267a3fca5866bcf9c8dcab2fd77cddd7b27ecc42cb631250395d36cbcf62a
|
| 3 |
+
size 16798005
|
adhd_L3_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "adhd",
|
| 3 |
+
"layer": 3,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 16.80815315246582,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8691270860953982,
|
| 15 |
+
"n_dead": 0,
|
| 16 |
+
"mse": 0.032828424125909805
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 2.2216796875
|
| 19 |
+
}
|
adhd_L3_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 29.05353546142578,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.7888412205173247,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.05674518644809723
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 44.51063537597656,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.828121429866145,
|
| 15 |
+
"n_dead": 769,
|
| 16 |
+
"mse": 0.04685475304722786
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 40.60480880737305,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.8353118397308505,
|
| 23 |
+
"n_dead": 239,
|
| 24 |
+
"mse": 0.04323723167181015
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 22.681713104248047,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.8478801929634973,
|
| 31 |
+
"n_dead": 29,
|
| 32 |
+
"mse": 0.03988751769065857
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 19.90431785583496,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.8681096795485392,
|
| 39 |
+
"n_dead": 4,
|
| 40 |
+
"mse": 0.03827929496765137
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 19.122100830078125,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.8613335049168376,
|
| 47 |
+
"n_dead": 2,
|
| 48 |
+
"mse": 0.037058740854263306
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 18.61587142944336,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.854929133645014,
|
| 55 |
+
"n_dead": 2,
|
| 56 |
+
"mse": 0.03607767075300217
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 17.96778106689453,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.8535506236950299,
|
| 63 |
+
"n_dead": 1,
|
| 64 |
+
"mse": 0.03495676815509796
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 17.5817928314209,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.8671142899286272,
|
| 71 |
+
"n_dead": 1,
|
| 72 |
+
"mse": 0.03420582413673401
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 17.37525749206543,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.8686602042934782,
|
| 79 |
+
"n_dead": 2,
|
| 80 |
+
"mse": 0.03367304056882858
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 16.68439483642578,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.877142112507239,
|
| 87 |
+
"n_dead": 1,
|
| 88 |
+
"mse": 0.03245990723371506
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 16.43060874938965,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.8754777867952352,
|
| 95 |
+
"n_dead": 0,
|
| 96 |
+
"mse": 0.03209103271365166
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 16.80815315246582,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.8691270860953982,
|
| 103 |
+
"n_dead": 0,
|
| 104 |
+
"mse": 0.032828424125909805
|
| 105 |
+
}
|
| 106 |
+
]
|
adhd_L3_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f314c6a8d95b68b24e83107e45a2a41003088be5556ec13d5b3ed019f68f7ad5
|
| 3 |
+
size 16798005
|
causal_nulls_per_seed.json
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"n_seeds": 4,
|
| 3 |
+
"n_prompts": 30,
|
| 4 |
+
"coefs": [
|
| 5 |
+
0.0,
|
| 6 |
+
2.0,
|
| 7 |
+
5.0,
|
| 8 |
+
10.0
|
| 9 |
+
],
|
| 10 |
+
"results": {
|
| 11 |
+
"L2_up_std": {
|
| 12 |
+
"0.0": [
|
| 13 |
+
0.08018867924528301,
|
| 14 |
+
0.08653846153846154,
|
| 15 |
+
0.09090909090909091,
|
| 16 |
+
0.06862745098039216
|
| 17 |
+
],
|
| 18 |
+
"2.0": [
|
| 19 |
+
0.08133971291866028,
|
| 20 |
+
0.08695652173913043,
|
| 21 |
+
0.08530805687203792,
|
| 22 |
+
0.07352941176470588
|
| 23 |
+
],
|
| 24 |
+
"5.0": [
|
| 25 |
+
0.07655502392344497,
|
| 26 |
+
0.08695652173913043,
|
| 27 |
+
0.09268292682926829,
|
| 28 |
+
0.07881773399014778
|
| 29 |
+
],
|
| 30 |
+
"10.0": [
|
| 31 |
+
0.08018867924528301,
|
| 32 |
+
0.08490566037735849,
|
| 33 |
+
0.11682242990654206,
|
| 34 |
+
0.08866995073891626
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"L2_down_adhd": {
|
| 38 |
+
"0.0": [
|
| 39 |
+
0.4488888888888889,
|
| 40 |
+
0.3949771689497717,
|
| 41 |
+
0.44794188861985473,
|
| 42 |
+
0.42403628117913833
|
| 43 |
+
],
|
| 44 |
+
"2.0": [
|
| 45 |
+
0.43935926773455375,
|
| 46 |
+
0.382830626450116,
|
| 47 |
+
0.44364508393285373,
|
| 48 |
+
0.432183908045977
|
| 49 |
+
],
|
| 50 |
+
"5.0": [
|
| 51 |
+
0.4387990762124711,
|
| 52 |
+
0.39902676399026765,
|
| 53 |
+
0.42857142857142855,
|
| 54 |
+
0.42755344418052255
|
| 55 |
+
],
|
| 56 |
+
"10.0": [
|
| 57 |
+
0.39172749391727496,
|
| 58 |
+
0.3949771689497717,
|
| 59 |
+
0.40648379052369077,
|
| 60 |
+
0.4230769230769231
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
"L2_zero_adhd": {
|
| 64 |
+
"baseline": [
|
| 65 |
+
0.4488888888888889,
|
| 66 |
+
0.3949771689497717,
|
| 67 |
+
0.44794188861985473,
|
| 68 |
+
0.42403628117913833
|
| 69 |
+
],
|
| 70 |
+
"zero_step_onset": [
|
| 71 |
+
0.44742729306487694,
|
| 72 |
+
0.38672768878718533,
|
| 73 |
+
0.4348894348894349,
|
| 74 |
+
0.4217687074829932
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
"L1_up_std": {
|
| 78 |
+
"0.0": [
|
| 79 |
+
0.08018867924528301,
|
| 80 |
+
0.08653846153846154,
|
| 81 |
+
0.09090909090909091,
|
| 82 |
+
0.06862745098039216
|
| 83 |
+
],
|
| 84 |
+
"2.0": [
|
| 85 |
+
0.08095238095238096,
|
| 86 |
+
0.08695652173913043,
|
| 87 |
+
0.0861244019138756,
|
| 88 |
+
0.06862745098039216
|
| 89 |
+
],
|
| 90 |
+
"5.0": [
|
| 91 |
+
0.0761904761904762,
|
| 92 |
+
0.07766990291262135,
|
| 93 |
+
0.08695652173913043,
|
| 94 |
+
0.07352941176470588
|
| 95 |
+
],
|
| 96 |
+
"10.0": [
|
| 97 |
+
0.07692307692307693,
|
| 98 |
+
0.08163265306122448,
|
| 99 |
+
0.0784313725490196,
|
| 100 |
+
0.07881773399014778
|
| 101 |
+
]
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
}
|
deepdive_steering.json
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"feat_2504_deep_dive": {
|
| 3 |
+
"top_contexts": [
|
| 4 |
+
{
|
| 5 |
+
"act": 2.2637319564819336,
|
| 6 |
+
"pos_class": "step_onset",
|
| 7 |
+
"context": "for 2 minutes at easy pace *<|sep|>* sip water <|sep|> fist pump \u2014",
|
| 8 |
+
"variant": "adhd"
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"act": 2.1833174228668213,
|
| 12 |
+
"pos_class": "step_onset",
|
| 13 |
+
"context": "acy resources to prevent recur rence *<|sep|>* deep breath <|sep|> close eyes briefly",
|
| 14 |
+
"variant": "adhd"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"act": 2.110867500305176,
|
| 18 |
+
"pos_class": "step_onset",
|
| 19 |
+
"context": "excess grout while wet using sponge *<|sep|>* close eyes briefly <|sep|> apply grout",
|
| 20 |
+
"variant": "adhd"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"act": 2.1062774658203125,
|
| 24 |
+
"pos_class": "step_onset",
|
| 25 |
+
"context": "donate remaining items to charity organizations *<|sep|>* quick stretch <|sep|> pause and breathe",
|
| 26 |
+
"variant": "adhd"
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"act": 2.099095106124878,
|
| 30 |
+
"pos_class": "step_onset",
|
| 31 |
+
"context": "e red details and architectural elements *<|sep|>* quick stretch <|sep|> pause and breathe",
|
| 32 |
+
"variant": "adhd"
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"act": 2.096402645111084,
|
| 36 |
+
"pos_class": "step_onset",
|
| 37 |
+
"context": "new skills to build competen ce *<|sep|>* roll shoulders <|sep|> seek feedback from",
|
| 38 |
+
"variant": "adhd"
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"act": 2.071779251098633,
|
| 42 |
+
"pos_class": "step_onset",
|
| 43 |
+
"context": "up monthly and weekly spread s *<|sep|>* roll shoulders <|sep|> begin adding daily",
|
| 44 |
+
"variant": "adhd"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"act": 2.044790029525757,
|
| 48 |
+
"pos_class": "step_onset",
|
| 49 |
+
"context": "methodology and divide data collection tasks *<|sep|>* close eyes briefly <|sep|> quick focus",
|
| 50 |
+
"variant": "adhd"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"act": 2.0143749713897705,
|
| 54 |
+
"pos_class": "step_onset",
|
| 55 |
+
"context": "hydration products on longer training rides *<|sep|>* close eyes briefly <|sep|> complete a",
|
| 56 |
+
"variant": "adhd"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"act": 2.014024257659912,
|
| 60 |
+
"pos_class": "step_onset",
|
| 61 |
+
"context": "briefly <|sep|> refin ed training intensity *<|sep|>* shake out hands <|sep|> complete weeks",
|
| 62 |
+
"variant": "adhd"
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"act": 2.0133330821990967,
|
| 66 |
+
"pos_class": "step_onset",
|
| 67 |
+
"context": "<|sep|> rinse all scrub bed surfaces *<|sep|>* roll shoulders <|sep|> scrub the lav",
|
| 68 |
+
"variant": "adhd"
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"act": 2.0048322677612305,
|
| 72 |
+
"pos_class": "step_onset",
|
| 73 |
+
"context": "<|sep|> write results and discussion sections *<|sep|>* present findings in final report or",
|
| 74 |
+
"variant": "adhd"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"act": 1.988505244255066,
|
| 78 |
+
"pos_class": "step_onset",
|
| 79 |
+
"context": "roll shoulders <|sep|> increased cardio sessions *<|sep|>* deep breath <|sep|> 30 - second",
|
| 80 |
+
"variant": "adhd"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"act": 1.963397741317749,
|
| 84 |
+
"pos_class": "step_onset",
|
| 85 |
+
"context": "increased cardio sessions <|sep|> deep breath *<|sep|>* 30 - second stretch <|sep|> execute",
|
| 86 |
+
"variant": "adhd"
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"act": 1.9471516609191895,
|
| 90 |
+
"pos_class": "step_onset",
|
| 91 |
+
"context": "with proper organization and archi ving *<|sep|>* wiggle fingers <|sep|> research family gen",
|
| 92 |
+
"variant": "adhd"
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"act": 1.9434456825256348,
|
| 96 |
+
"pos_class": "step_onset",
|
| 97 |
+
"context": "until you see no soap residue *<|sep|>* sip water <|sep|> wipe down with",
|
| 98 |
+
"variant": "adhd"
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"act": 1.936253547668457,
|
| 102 |
+
"pos_class": "step_onset",
|
| 103 |
+
"context": "for 5 minutes at easy pace *<|sep|>* quick stretch <|sep|> hydrate and refuel",
|
| 104 |
+
"variant": "adhd"
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"act": 1.9354044198989868,
|
| 108 |
+
"pos_class": "step_onset",
|
| 109 |
+
"context": "online and wait for approval decision *<|sep|>* wiggle fingers <|sep|> fist pump \u2014",
|
| 110 |
+
"variant": "adhd"
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"act": 1.9305871725082397,
|
| 114 |
+
"pos_class": "step_onset",
|
| 115 |
+
"context": "for user interface and user experience *<|sep|>* sip water <|sep|> choose development framework",
|
| 116 |
+
"variant": "adhd"
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"act": 1.9305052757263184,
|
| 120 |
+
"pos_class": "step_onset",
|
| 121 |
+
"context": "<|sep|> add color and shading details *<|sep|>* deep breath <|sep|> pause and breathe",
|
| 122 |
+
"variant": "adhd"
|
| 123 |
+
}
|
| 124 |
+
],
|
| 125 |
+
"top_cofire_partners": [
|
| 126 |
+
2418,
|
| 127 |
+
653,
|
| 128 |
+
1216,
|
| 129 |
+
225,
|
| 130 |
+
1131,
|
| 131 |
+
2959,
|
| 132 |
+
1650,
|
| 133 |
+
3953,
|
| 134 |
+
702,
|
| 135 |
+
352
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
"symmetry_control_substantive_count": 5,
|
| 139 |
+
"symmetry_control_top_15": [
|
| 140 |
+
1406,
|
| 141 |
+
1258,
|
| 142 |
+
2097,
|
| 143 |
+
156,
|
| 144 |
+
697,
|
| 145 |
+
120,
|
| 146 |
+
1539,
|
| 147 |
+
1385,
|
| 148 |
+
3795,
|
| 149 |
+
1065,
|
| 150 |
+
531,
|
| 151 |
+
3280,
|
| 152 |
+
3927,
|
| 153 |
+
3967,
|
| 154 |
+
3846
|
| 155 |
+
],
|
| 156 |
+
"steering_results": {
|
| 157 |
+
"std_baseline": {
|
| 158 |
+
"sep_rate": 0.08425135764158262,
|
| 159 |
+
"mean_step_count": 6.8125,
|
| 160 |
+
"spearman_rho": 0.5305203306664215,
|
| 161 |
+
"spearman_pval": 4.155845216990538e-07,
|
| 162 |
+
"regulation_rate": 0.09357798165137615,
|
| 163 |
+
"n_tokens_total": 6445,
|
| 164 |
+
"n_steps_total": 545,
|
| 165 |
+
"reached_end_rate": 0.975
|
| 166 |
+
},
|
| 167 |
+
"std_coef_+2.0": {
|
| 168 |
+
"sep_rate": 0.08416458852867831,
|
| 169 |
+
"mean_step_count": 6.775,
|
| 170 |
+
"spearman_rho": 0.530739309117657,
|
| 171 |
+
"spearman_pval": 4.102184726171915e-07,
|
| 172 |
+
"regulation_rate": 0.0940959409594096,
|
| 173 |
+
"n_tokens_total": 6416,
|
| 174 |
+
"n_steps_total": 542,
|
| 175 |
+
"reached_end_rate": 0.975
|
| 176 |
+
},
|
| 177 |
+
"std_coef_+5.0": {
|
| 178 |
+
"sep_rate": 0.08429777916796997,
|
| 179 |
+
"mean_step_count": 6.7625,
|
| 180 |
+
"spearman_rho": 0.5327086311005121,
|
| 181 |
+
"spearman_pval": 3.64818647432245e-07,
|
| 182 |
+
"regulation_rate": 0.09057301293900184,
|
| 183 |
+
"n_tokens_total": 6394,
|
| 184 |
+
"n_steps_total": 541,
|
| 185 |
+
"reached_end_rate": 0.975
|
| 186 |
+
},
|
| 187 |
+
"std_coef_+10.0": {
|
| 188 |
+
"sep_rate": 0.08608971454463073,
|
| 189 |
+
"mean_step_count": 7.1375,
|
| 190 |
+
"spearman_rho": 0.4614572824649014,
|
| 191 |
+
"spearman_pval": 1.6459708212616308e-05,
|
| 192 |
+
"regulation_rate": 0.08756567425569177,
|
| 193 |
+
"n_tokens_total": 6621,
|
| 194 |
+
"n_steps_total": 571,
|
| 195 |
+
"reached_end_rate": 0.975
|
| 196 |
+
},
|
| 197 |
+
"adhd_baseline": {
|
| 198 |
+
"sep_rate": 0.12945795615661468,
|
| 199 |
+
"mean_step_count": 15.0625,
|
| 200 |
+
"spearman_rho": 0.8158904455366707,
|
| 201 |
+
"spearman_pval": 3.038475633405349e-20,
|
| 202 |
+
"regulation_rate": 0.41327800829875516,
|
| 203 |
+
"n_tokens_total": 9169,
|
| 204 |
+
"n_steps_total": 1205,
|
| 205 |
+
"reached_end_rate": 0.7625
|
| 206 |
+
},
|
| 207 |
+
"adhd_coef_-2.0": {
|
| 208 |
+
"sep_rate": 0.12845057880676758,
|
| 209 |
+
"mean_step_count": 14.625,
|
| 210 |
+
"spearman_rho": 0.7957172265026485,
|
| 211 |
+
"spearman_pval": 1.1613642559127553e-18,
|
| 212 |
+
"regulation_rate": 0.4008547008547009,
|
| 213 |
+
"n_tokens_total": 8984,
|
| 214 |
+
"n_steps_total": 1170,
|
| 215 |
+
"reached_end_rate": 0.7875
|
| 216 |
+
},
|
| 217 |
+
"adhd_coef_-5.0": {
|
| 218 |
+
"sep_rate": 0.1263146117699709,
|
| 219 |
+
"mean_step_count": 14.3375,
|
| 220 |
+
"spearman_rho": 0.780151209846579,
|
| 221 |
+
"spearman_pval": 1.4775933869589044e-17,
|
| 222 |
+
"regulation_rate": 0.4167393199651264,
|
| 223 |
+
"n_tokens_total": 8938,
|
| 224 |
+
"n_steps_total": 1147,
|
| 225 |
+
"reached_end_rate": 0.7625
|
| 226 |
+
},
|
| 227 |
+
"adhd_coef_-10.0": {
|
| 228 |
+
"sep_rate": 0.12151652624756967,
|
| 229 |
+
"mean_step_count": 14.225,
|
| 230 |
+
"spearman_rho": 0.7471782967641961,
|
| 231 |
+
"spearman_pval": 1.7289858832058174e-15,
|
| 232 |
+
"regulation_rate": 0.3945518453427065,
|
| 233 |
+
"n_tokens_total": 9258,
|
| 234 |
+
"n_steps_total": 1138,
|
| 235 |
+
"reached_end_rate": 0.725
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
}
|
feature_diff.json
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sae_results": {
|
| 3 |
+
"standard_L1": {
|
| 4 |
+
"final_fve": 0.9156635482984559,
|
| 5 |
+
"final_l0": 40.0,
|
| 6 |
+
"dead_pct_train": 0.0244140625,
|
| 7 |
+
"dead_pct_heldout": 1.5380859375
|
| 8 |
+
},
|
| 9 |
+
"standard_L2": {
|
| 10 |
+
"final_fve": 0.9112300092129357,
|
| 11 |
+
"final_l0": 40.0,
|
| 12 |
+
"dead_pct_train": 0.0244140625,
|
| 13 |
+
"dead_pct_heldout": 1.220703125
|
| 14 |
+
},
|
| 15 |
+
"standard_L3": {
|
| 16 |
+
"final_fve": 0.8956661837197387,
|
| 17 |
+
"final_l0": 40.0,
|
| 18 |
+
"dead_pct_train": 0.0244140625,
|
| 19 |
+
"dead_pct_heldout": 1.025390625
|
| 20 |
+
},
|
| 21 |
+
"adhd_L1": {
|
| 22 |
+
"final_fve": 0.9020028367114843,
|
| 23 |
+
"final_l0": 40.0,
|
| 24 |
+
"dead_pct_train": 0.1220703125,
|
| 25 |
+
"dead_pct_heldout": 1.3671875
|
| 26 |
+
},
|
| 27 |
+
"adhd_L2": {
|
| 28 |
+
"final_fve": 0.8937416797380295,
|
| 29 |
+
"final_l0": 40.0,
|
| 30 |
+
"dead_pct_train": 0.0244140625,
|
| 31 |
+
"dead_pct_heldout": 1.46484375
|
| 32 |
+
},
|
| 33 |
+
"adhd_L3": {
|
| 34 |
+
"final_fve": 0.8691270860953982,
|
| 35 |
+
"final_l0": 40.0,
|
| 36 |
+
"dead_pct_train": 0.0,
|
| 37 |
+
"dead_pct_heldout": 2.2216796875
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"primary_count": 312,
|
| 41 |
+
"symmetry_count": 139,
|
| 42 |
+
"layer_control_count": 194,
|
| 43 |
+
"primary_top_features": [
|
| 44 |
+
{
|
| 45 |
+
"feat_id": 2418,
|
| 46 |
+
"adhd_rate": 0.8974166512489319,
|
| 47 |
+
"std_rate": 0.0010833332780748606
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"feat_id": 653,
|
| 51 |
+
"adhd_rate": 0.8386666774749756,
|
| 52 |
+
"std_rate": 0.0
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"feat_id": 1216,
|
| 56 |
+
"adhd_rate": 0.6754166483879089,
|
| 57 |
+
"std_rate": 0.009583333507180214
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"feat_id": 225,
|
| 61 |
+
"adhd_rate": 0.6380833387374878,
|
| 62 |
+
"std_rate": 0.0
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"feat_id": 1131,
|
| 66 |
+
"adhd_rate": 0.6314166784286499,
|
| 67 |
+
"std_rate": 0.0
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"feat_id": 2504,
|
| 71 |
+
"adhd_rate": 0.546999990940094,
|
| 72 |
+
"std_rate": 0.0005000000237487257
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"feat_id": 1650,
|
| 76 |
+
"adhd_rate": 0.5131666660308838,
|
| 77 |
+
"std_rate": 8.333333244081587e-05
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"feat_id": 2959,
|
| 81 |
+
"adhd_rate": 0.4754999876022339,
|
| 82 |
+
"std_rate": 0.00016666666488163173
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"feat_id": 3953,
|
| 86 |
+
"adhd_rate": 0.4663333296775818,
|
| 87 |
+
"std_rate": 0.00016666666488163173
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"feat_id": 352,
|
| 91 |
+
"adhd_rate": 0.42516666650772095,
|
| 92 |
+
"std_rate": 0.0016666667070239782
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"feat_id": 702,
|
| 96 |
+
"adhd_rate": 0.4244999885559082,
|
| 97 |
+
"std_rate": 0.0
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"feat_id": 2505,
|
| 101 |
+
"adhd_rate": 0.4099166691303253,
|
| 102 |
+
"std_rate": 0.0
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"feat_id": 1156,
|
| 106 |
+
"adhd_rate": 0.3932499885559082,
|
| 107 |
+
"std_rate": 0.0
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"feat_id": 2512,
|
| 111 |
+
"adhd_rate": 0.38883334398269653,
|
| 112 |
+
"std_rate": 0.0
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"feat_id": 1835,
|
| 116 |
+
"adhd_rate": 0.3605000078678131,
|
| 117 |
+
"std_rate": 0.00016666666488163173
|
| 118 |
+
}
|
| 119 |
+
]
|
| 120 |
+
}
|
loading_example.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Minimal example: load one of the InterpGPT Phase 5 TopK SAEs from HuggingFace.
|
| 3 |
+
"""
|
| 4 |
+
from huggingface_hub import snapshot_download
|
| 5 |
+
from sae_lens import SAE
|
| 6 |
+
|
| 7 |
+
repo_id = "connaaa/interpgpt-sae-phase5"
|
| 8 |
+
local = snapshot_download(repo_id=repo_id, allow_patterns=["adhd_L2_hook_resid_post/*"])
|
| 9 |
+
sae = SAE.load_from_disk(f"{local}/adhd_L2_hook_resid_post")
|
| 10 |
+
print(sae)
|
standard_L0_hook_resid_post/analysis.json
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"act_rate_distribution": {
|
| 3 |
+
"mean": 0.009765625,
|
| 4 |
+
"dead_count": 768,
|
| 5 |
+
"high_count": 0
|
| 6 |
+
},
|
| 7 |
+
"sample_features": [
|
| 8 |
+
{
|
| 9 |
+
"feature_id": 159,
|
| 10 |
+
"act_rate": 0.1606599986553192,
|
| 11 |
+
"top_examples": [
|
| 12 |
+
{
|
| 13 |
+
"activation": 0.5935809016227722,
|
| 14 |
+
"position": 10,
|
| 15 |
+
"variant": "standard"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"activation": 0.5935809016227722,
|
| 19 |
+
"position": 10,
|
| 20 |
+
"variant": "standard"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"activation": 0.5628541707992554,
|
| 24 |
+
"position": 22,
|
| 25 |
+
"variant": "standard"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"activation": 0.5628541707992554,
|
| 29 |
+
"position": 22,
|
| 30 |
+
"variant": "standard"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"activation": 0.5378819108009338,
|
| 34 |
+
"position": 8,
|
| 35 |
+
"variant": "standard"
|
| 36 |
+
}
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"feature_id": 836,
|
| 41 |
+
"act_rate": 0.12184999883174896,
|
| 42 |
+
"top_examples": [
|
| 43 |
+
{
|
| 44 |
+
"activation": 0.6280968189239502,
|
| 45 |
+
"position": 15,
|
| 46 |
+
"variant": "standard"
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"activation": 0.6280968189239502,
|
| 50 |
+
"position": 15,
|
| 51 |
+
"variant": "standard"
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"activation": 0.5908636450767517,
|
| 55 |
+
"position": 1,
|
| 56 |
+
"variant": "standard"
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"activation": 0.5908636450767517,
|
| 60 |
+
"position": 1,
|
| 61 |
+
"variant": "standard"
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"activation": 0.5908636450767517,
|
| 65 |
+
"position": 1,
|
| 66 |
+
"variant": "standard"
|
| 67 |
+
}
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"feature_id": 2203,
|
| 72 |
+
"act_rate": 0.1174900010228157,
|
| 73 |
+
"top_examples": [
|
| 74 |
+
{
|
| 75 |
+
"activation": 0.6077227592468262,
|
| 76 |
+
"position": 7,
|
| 77 |
+
"variant": "standard"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"activation": 0.6077227592468262,
|
| 81 |
+
"position": 7,
|
| 82 |
+
"variant": "standard"
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"activation": 0.6077227592468262,
|
| 86 |
+
"position": 7,
|
| 87 |
+
"variant": "standard"
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"activation": 0.6077227592468262,
|
| 91 |
+
"position": 7,
|
| 92 |
+
"variant": "standard"
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"activation": 0.6077227592468262,
|
| 96 |
+
"position": 7,
|
| 97 |
+
"variant": "standard"
|
| 98 |
+
}
|
| 99 |
+
]
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"feature_id": 3106,
|
| 103 |
+
"act_rate": 0.11097999662160873,
|
| 104 |
+
"top_examples": [
|
| 105 |
+
{
|
| 106 |
+
"activation": 0.458252489566803,
|
| 107 |
+
"position": 3,
|
| 108 |
+
"variant": "standard"
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"activation": 0.458252489566803,
|
| 112 |
+
"position": 3,
|
| 113 |
+
"variant": "standard"
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"activation": 0.45644882321357727,
|
| 117 |
+
"position": 47,
|
| 118 |
+
"variant": "standard"
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"activation": 0.45644882321357727,
|
| 122 |
+
"position": 47,
|
| 123 |
+
"variant": "standard"
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"activation": 0.4534014165401459,
|
| 127 |
+
"position": 46,
|
| 128 |
+
"variant": "standard"
|
| 129 |
+
}
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"feature_id": 2939,
|
| 134 |
+
"act_rate": 0.10080999881029129,
|
| 135 |
+
"top_examples": [
|
| 136 |
+
{
|
| 137 |
+
"activation": 0.9360920190811157,
|
| 138 |
+
"position": 12,
|
| 139 |
+
"variant": "standard"
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"activation": 0.9360920190811157,
|
| 143 |
+
"position": 12,
|
| 144 |
+
"variant": "standard"
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"activation": 0.9333191514015198,
|
| 148 |
+
"position": 5,
|
| 149 |
+
"variant": "standard"
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"activation": 0.9333191514015198,
|
| 153 |
+
"position": 5,
|
| 154 |
+
"variant": "standard"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"activation": 0.8574860692024231,
|
| 158 |
+
"position": 20,
|
| 159 |
+
"variant": "standard"
|
| 160 |
+
}
|
| 161 |
+
]
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"feature_id": 61,
|
| 165 |
+
"act_rate": 0.0965299978852272,
|
| 166 |
+
"top_examples": [
|
| 167 |
+
{
|
| 168 |
+
"activation": 0.49229905009269714,
|
| 169 |
+
"position": 4,
|
| 170 |
+
"variant": "standard"
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"activation": 0.49229905009269714,
|
| 174 |
+
"position": 4,
|
| 175 |
+
"variant": "standard"
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"activation": 0.4829713702201843,
|
| 179 |
+
"position": 8,
|
| 180 |
+
"variant": "standard"
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"activation": 0.4829713702201843,
|
| 184 |
+
"position": 8,
|
| 185 |
+
"variant": "standard"
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"activation": 0.47879478335380554,
|
| 189 |
+
"position": 10,
|
| 190 |
+
"variant": "standard"
|
| 191 |
+
}
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"feature_id": 4034,
|
| 196 |
+
"act_rate": 0.09401000291109085,
|
| 197 |
+
"top_examples": [
|
| 198 |
+
{
|
| 199 |
+
"activation": 0.5372604131698608,
|
| 200 |
+
"position": 52,
|
| 201 |
+
"variant": "standard"
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"activation": 0.5372604131698608,
|
| 205 |
+
"position": 52,
|
| 206 |
+
"variant": "standard"
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"activation": 0.5347657203674316,
|
| 210 |
+
"position": 108,
|
| 211 |
+
"variant": "standard"
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"activation": 0.5347657203674316,
|
| 215 |
+
"position": 108,
|
| 216 |
+
"variant": "standard"
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"activation": 0.5321744680404663,
|
| 220 |
+
"position": 60,
|
| 221 |
+
"variant": "standard"
|
| 222 |
+
}
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"feature_id": 3787,
|
| 227 |
+
"act_rate": 0.091279998421669,
|
| 228 |
+
"top_examples": [
|
| 229 |
+
{
|
| 230 |
+
"activation": 0.5871065258979797,
|
| 231 |
+
"position": 22,
|
| 232 |
+
"variant": "standard"
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"activation": 0.5871065258979797,
|
| 236 |
+
"position": 22,
|
| 237 |
+
"variant": "standard"
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"activation": 0.5592474937438965,
|
| 241 |
+
"position": 55,
|
| 242 |
+
"variant": "standard"
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"activation": 0.5592474937438965,
|
| 246 |
+
"position": 55,
|
| 247 |
+
"variant": "standard"
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"activation": 0.5409792065620422,
|
| 251 |
+
"position": 91,
|
| 252 |
+
"variant": "standard"
|
| 253 |
+
}
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"feature_id": 280,
|
| 258 |
+
"act_rate": 0.09025000035762787,
|
| 259 |
+
"top_examples": [
|
| 260 |
+
{
|
| 261 |
+
"activation": 0.534440279006958,
|
| 262 |
+
"position": 7,
|
| 263 |
+
"variant": "standard"
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"activation": 0.534440279006958,
|
| 267 |
+
"position": 7,
|
| 268 |
+
"variant": "standard"
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"activation": 0.5156826376914978,
|
| 272 |
+
"position": 9,
|
| 273 |
+
"variant": "standard"
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"activation": 0.5156826376914978,
|
| 277 |
+
"position": 9,
|
| 278 |
+
"variant": "standard"
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"activation": 0.5135127305984497,
|
| 282 |
+
"position": 9,
|
| 283 |
+
"variant": "standard"
|
| 284 |
+
}
|
| 285 |
+
]
|
| 286 |
+
},
|
| 287 |
+
{
|
| 288 |
+
"feature_id": 2159,
|
| 289 |
+
"act_rate": 0.0877000018954277,
|
| 290 |
+
"top_examples": [
|
| 291 |
+
{
|
| 292 |
+
"activation": 0.603478729724884,
|
| 293 |
+
"position": 9,
|
| 294 |
+
"variant": "standard"
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"activation": 0.603478729724884,
|
| 298 |
+
"position": 9,
|
| 299 |
+
"variant": "standard"
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"activation": 0.603478729724884,
|
| 303 |
+
"position": 9,
|
| 304 |
+
"variant": "standard"
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"activation": 0.5963488817214966,
|
| 308 |
+
"position": 9,
|
| 309 |
+
"variant": "standard"
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"activation": 0.5963488817214966,
|
| 313 |
+
"position": 9,
|
| 314 |
+
"variant": "standard"
|
| 315 |
+
}
|
| 316 |
+
]
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"feature_id": 3645,
|
| 320 |
+
"act_rate": 0.003819999983534217,
|
| 321 |
+
"top_examples": [
|
| 322 |
+
{
|
| 323 |
+
"activation": 0.21240393817424774,
|
| 324 |
+
"position": 202,
|
| 325 |
+
"variant": "standard"
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"activation": 0.21240393817424774,
|
| 329 |
+
"position": 202,
|
| 330 |
+
"variant": "standard"
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"activation": 0.20894747972488403,
|
| 334 |
+
"position": 217,
|
| 335 |
+
"variant": "standard"
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"activation": 0.20894747972488403,
|
| 339 |
+
"position": 217,
|
| 340 |
+
"variant": "standard"
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"activation": 0.20496299862861633,
|
| 344 |
+
"position": 40,
|
| 345 |
+
"variant": "standard"
|
| 346 |
+
}
|
| 347 |
+
]
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"feature_id": 2682,
|
| 351 |
+
"act_rate": 0.003809999907389283,
|
| 352 |
+
"top_examples": [
|
| 353 |
+
{
|
| 354 |
+
"activation": 0.2780674993991852,
|
| 355 |
+
"position": 3,
|
| 356 |
+
"variant": "standard"
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"activation": 0.2780674993991852,
|
| 360 |
+
"position": 3,
|
| 361 |
+
"variant": "standard"
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"activation": 0.2724977433681488,
|
| 365 |
+
"position": 3,
|
| 366 |
+
"variant": "standard"
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"activation": 0.2724977433681488,
|
| 370 |
+
"position": 3,
|
| 371 |
+
"variant": "standard"
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"activation": 0.263476699590683,
|
| 375 |
+
"position": 3,
|
| 376 |
+
"variant": "standard"
|
| 377 |
+
}
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"feature_id": 2786,
|
| 382 |
+
"act_rate": 0.003800000064074993,
|
| 383 |
+
"top_examples": [
|
| 384 |
+
{
|
| 385 |
+
"activation": 0.3066694736480713,
|
| 386 |
+
"position": 2,
|
| 387 |
+
"variant": "standard"
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"activation": 0.3066694736480713,
|
| 391 |
+
"position": 2,
|
| 392 |
+
"variant": "standard"
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"activation": 0.3066694438457489,
|
| 396 |
+
"position": 2,
|
| 397 |
+
"variant": "standard"
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"activation": 0.3066694438457489,
|
| 401 |
+
"position": 2,
|
| 402 |
+
"variant": "standard"
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"activation": 0.3066694438457489,
|
| 406 |
+
"position": 2,
|
| 407 |
+
"variant": "standard"
|
| 408 |
+
}
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"feature_id": 1681,
|
| 413 |
+
"act_rate": 0.003800000064074993,
|
| 414 |
+
"top_examples": [
|
| 415 |
+
{
|
| 416 |
+
"activation": 0.21353764832019806,
|
| 417 |
+
"position": 40,
|
| 418 |
+
"variant": "standard"
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"activation": 0.21353764832019806,
|
| 422 |
+
"position": 40,
|
| 423 |
+
"variant": "standard"
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"activation": 0.2118273675441742,
|
| 427 |
+
"position": 217,
|
| 428 |
+
"variant": "standard"
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"activation": 0.2118273675441742,
|
| 432 |
+
"position": 217,
|
| 433 |
+
"variant": "standard"
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"activation": 0.21169668436050415,
|
| 437 |
+
"position": 202,
|
| 438 |
+
"variant": "standard"
|
| 439 |
+
}
|
| 440 |
+
]
|
| 441 |
+
}
|
| 442 |
+
],
|
| 443 |
+
"n_total_features": 4096
|
| 444 |
+
}
|
standard_L0_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "standard",
|
| 3 |
+
"layer": 0,
|
| 4 |
+
"hook": "hook_resid_post",
|
| 5 |
+
"d_sae": 4096,
|
| 6 |
+
"k": 40,
|
| 7 |
+
"n_tokens": 500000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final": {
|
| 11 |
+
"step": 122,
|
| 12 |
+
"loss": 3.7668752670288086,
|
| 13 |
+
"losses": {
|
| 14 |
+
"mse_loss": 3.7668752670288086,
|
| 15 |
+
"auxiliary_reconstruction_loss": 0.0
|
| 16 |
+
},
|
| 17 |
+
"l0": 40.0,
|
| 18 |
+
"mse": 0.007357178255915642,
|
| 19 |
+
"fve": 0.7880726447111882,
|
| 20 |
+
"n_dead": 0
|
| 21 |
+
}
|
| 22 |
+
}
|
standard_L0_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 50,
|
| 4 |
+
"loss": 6.100651741027832,
|
| 5 |
+
"losses": {
|
| 6 |
+
"mse_loss": 6.100651741027832,
|
| 7 |
+
"auxiliary_reconstruction_loss": 0.0
|
| 8 |
+
},
|
| 9 |
+
"l0": 40.0,
|
| 10 |
+
"mse": 0.011915335431694984,
|
| 11 |
+
"fve": 0.6626028674075588,
|
| 12 |
+
"n_dead": 0
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"step": 100,
|
| 16 |
+
"loss": 4.176002502441406,
|
| 17 |
+
"losses": {
|
| 18 |
+
"mse_loss": 4.176002502441406,
|
| 19 |
+
"auxiliary_reconstruction_loss": 0.0
|
| 20 |
+
},
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"mse": 0.008156255818903446,
|
| 23 |
+
"fve": 0.7637865209658066,
|
| 24 |
+
"n_dead": 0
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 122,
|
| 28 |
+
"loss": 3.7668752670288086,
|
| 29 |
+
"losses": {
|
| 30 |
+
"mse_loss": 3.7668752670288086,
|
| 31 |
+
"auxiliary_reconstruction_loss": 0.0
|
| 32 |
+
},
|
| 33 |
+
"l0": 40.0,
|
| 34 |
+
"mse": 0.007357178255915642,
|
| 35 |
+
"fve": 0.7880726447111882,
|
| 36 |
+
"n_dead": 0
|
| 37 |
+
}
|
| 38 |
+
]
|
standard_L0_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d41284ab9bceb803122433b0ecdfbdbe5027bacb38b6fa2d54727dd395c26cc1
|
| 3 |
+
size 16798005
|
standard_L1_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "standard",
|
| 3 |
+
"layer": 1,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 4.313589572906494,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.9156635482984559,
|
| 15 |
+
"n_dead": 1,
|
| 16 |
+
"mse": 0.008392197079956532
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 1.5380859375
|
| 19 |
+
}
|
standard_L1_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 8.472447395324707,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.8475441618823183,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.016547750681638718
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 13.104448318481445,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8741473969735887,
|
| 15 |
+
"n_dead": 602,
|
| 16 |
+
"mse": 0.013663655146956444
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 11.666557312011719,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.897244935256183,
|
| 23 |
+
"n_dead": 294,
|
| 24 |
+
"mse": 0.012026979587972164
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 8.557323455810547,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.9050136010213505,
|
| 31 |
+
"n_dead": 141,
|
| 32 |
+
"mse": 0.011152522638440132
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 5.929265975952148,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.8892356006901958,
|
| 39 |
+
"n_dead": 26,
|
| 40 |
+
"mse": 0.010530667379498482
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 5.310263633728027,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.9071743648944192,
|
| 47 |
+
"n_dead": 6,
|
| 48 |
+
"mse": 0.010134927928447723
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 5.067918300628662,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.9030405773914236,
|
| 55 |
+
"n_dead": 2,
|
| 56 |
+
"mse": 0.00982158724218607
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 4.7904052734375,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.9108113011155183,
|
| 63 |
+
"n_dead": 2,
|
| 64 |
+
"mse": 0.009283771738409996
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 4.771254062652588,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.9153215139625523,
|
| 71 |
+
"n_dead": 6,
|
| 72 |
+
"mse": 0.009105676785111427
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 4.466042518615723,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.9176716499875504,
|
| 79 |
+
"n_dead": 1,
|
| 80 |
+
"mse": 0.008688799105584621
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 4.474887371063232,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.9006545558548696,
|
| 87 |
+
"n_dead": 2,
|
| 88 |
+
"mse": 0.00867227278649807
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 4.392549991607666,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.9094376632806204,
|
| 95 |
+
"n_dead": 1,
|
| 96 |
+
"mse": 0.00854581780731678
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 4.313589572906494,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.9156635482984559,
|
| 103 |
+
"n_dead": 1,
|
| 104 |
+
"mse": 0.008392197079956532
|
| 105 |
+
}
|
| 106 |
+
]
|
standard_L1_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1099ad67ddea1fb3b4ef9752f9c57b6dba97c3cbfc88d595316e5c52c54704e1
|
| 3 |
+
size 16798005
|
standard_L2_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "standard",
|
| 3 |
+
"layer": 2,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 9.37691879272461,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.9112300092129357,
|
| 15 |
+
"n_dead": 1,
|
| 16 |
+
"mse": 0.018243035301566124
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 1.220703125
|
| 19 |
+
}
|
standard_L2_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 17.65301513671875,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.8277783719921248,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.03447854518890381
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 26.82003402709961,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8783504287003407,
|
| 15 |
+
"n_dead": 885,
|
| 16 |
+
"mse": 0.02819967456161976
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 24.623018264770508,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.8645155231541486,
|
| 23 |
+
"n_dead": 466,
|
| 24 |
+
"mse": 0.025606488808989525
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 20.887020111083984,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.8902317708880264,
|
| 31 |
+
"n_dead": 211,
|
| 32 |
+
"mse": 0.02367589809000492
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 13.424010276794434,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.8956536633832358,
|
| 39 |
+
"n_dead": 45,
|
| 40 |
+
"mse": 0.02241015061736107
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 11.20499324798584,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.9072051860620202,
|
| 47 |
+
"n_dead": 9,
|
| 48 |
+
"mse": 0.021145537495613098
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 10.573310852050781,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.9040789272639144,
|
| 55 |
+
"n_dead": 3,
|
| 56 |
+
"mse": 0.02041182667016983
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 10.361445426940918,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.9104271378826995,
|
| 63 |
+
"n_dead": 1,
|
| 64 |
+
"mse": 0.020158452913165092
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 9.941110610961914,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.9068027304791877,
|
| 71 |
+
"n_dead": 1,
|
| 72 |
+
"mse": 0.01934068836271763
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 9.595905303955078,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.9189567435127948,
|
| 79 |
+
"n_dead": 1,
|
| 80 |
+
"mse": 0.01866907998919487
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 9.470767974853516,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.9136617571485228,
|
| 87 |
+
"n_dead": 1,
|
| 88 |
+
"mse": 0.01842562109231949
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 9.251761436462402,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.9031395392007785,
|
| 95 |
+
"n_dead": 1,
|
| 96 |
+
"mse": 0.01799953728914261
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 9.37691879272461,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.9112300092129357,
|
| 103 |
+
"n_dead": 1,
|
| 104 |
+
"mse": 0.018243035301566124
|
| 105 |
+
}
|
| 106 |
+
]
|
standard_L2_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c36a71a6c67b951c929ca6a65422b6a1490dd39bcb78f79480fd3576b95e341
|
| 3 |
+
size 16798005
|
standard_L3_hook_resid_post/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "standard",
|
| 3 |
+
"layer": 3,
|
| 4 |
+
"d_sae": 4096,
|
| 5 |
+
"k": 40,
|
| 6 |
+
"normalize_activations": "expected_average_only_in",
|
| 7 |
+
"n_tokens": 10000000,
|
| 8 |
+
"batch_size": 4096,
|
| 9 |
+
"lr": 0.0003,
|
| 10 |
+
"final_training": {
|
| 11 |
+
"step": 2441,
|
| 12 |
+
"loss": 19.480852127075195,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8956661837197387,
|
| 15 |
+
"n_dead": 1,
|
| 16 |
+
"mse": 0.03790052607655525
|
| 17 |
+
},
|
| 18 |
+
"dead_pct_heldout": 1.025390625
|
| 19 |
+
}
|
standard_L3_hook_resid_post/history.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 200,
|
| 4 |
+
"loss": 34.482032775878906,
|
| 5 |
+
"l0": 40.0,
|
| 6 |
+
"fve": 0.8283586159312837,
|
| 7 |
+
"n_dead": 0,
|
| 8 |
+
"mse": 0.06734772026538849
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"step": 400,
|
| 12 |
+
"loss": 53.01472473144531,
|
| 13 |
+
"l0": 40.0,
|
| 14 |
+
"fve": 0.8312126634199706,
|
| 15 |
+
"n_dead": 1054,
|
| 16 |
+
"mse": 0.05626294016838074
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"step": 600,
|
| 20 |
+
"loss": 48.47029495239258,
|
| 21 |
+
"l0": 40.0,
|
| 22 |
+
"fve": 0.884058701037366,
|
| 23 |
+
"n_dead": 593,
|
| 24 |
+
"mse": 0.05088154226541519
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"step": 800,
|
| 28 |
+
"loss": 42.332054138183594,
|
| 29 |
+
"l0": 40.0,
|
| 30 |
+
"fve": 0.8804909955406929,
|
| 31 |
+
"n_dead": 205,
|
| 32 |
+
"mse": 0.04835565760731697
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"step": 1000,
|
| 36 |
+
"loss": 28.461124420166016,
|
| 37 |
+
"l0": 40.0,
|
| 38 |
+
"fve": 0.888121025084119,
|
| 39 |
+
"n_dead": 58,
|
| 40 |
+
"mse": 0.04564930498600006
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"step": 1200,
|
| 44 |
+
"loss": 24.47071647644043,
|
| 45 |
+
"l0": 40.0,
|
| 46 |
+
"fve": 0.8850747737111373,
|
| 47 |
+
"n_dead": 21,
|
| 48 |
+
"mse": 0.044211357831954956
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"step": 1400,
|
| 52 |
+
"loss": 23.19893455505371,
|
| 53 |
+
"l0": 40.0,
|
| 54 |
+
"fve": 0.8942765037624966,
|
| 55 |
+
"n_dead": 15,
|
| 56 |
+
"mse": 0.04282252490520477
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"step": 1600,
|
| 60 |
+
"loss": 21.993581771850586,
|
| 61 |
+
"l0": 40.0,
|
| 62 |
+
"fve": 0.8978253015716876,
|
| 63 |
+
"n_dead": 10,
|
| 64 |
+
"mse": 0.04135049134492874
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"step": 1800,
|
| 68 |
+
"loss": 21.293956756591797,
|
| 69 |
+
"l0": 40.0,
|
| 70 |
+
"fve": 0.8922899854658137,
|
| 71 |
+
"n_dead": 8,
|
| 72 |
+
"mse": 0.040335513651371
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 2000,
|
| 76 |
+
"loss": 20.39199447631836,
|
| 77 |
+
"l0": 40.0,
|
| 78 |
+
"fve": 0.9074694820636785,
|
| 79 |
+
"n_dead": 4,
|
| 80 |
+
"mse": 0.03921680152416229
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"step": 2200,
|
| 84 |
+
"loss": 20.141448974609375,
|
| 85 |
+
"l0": 40.0,
|
| 86 |
+
"fve": 0.8943716286803387,
|
| 87 |
+
"n_dead": 3,
|
| 88 |
+
"mse": 0.038883697241544724
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"step": 2400,
|
| 92 |
+
"loss": 19.7071475982666,
|
| 93 |
+
"l0": 40.0,
|
| 94 |
+
"fve": 0.8941642474910445,
|
| 95 |
+
"n_dead": 1,
|
| 96 |
+
"mse": 0.03834077715873718
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"step": 2441,
|
| 100 |
+
"loss": 19.480852127075195,
|
| 101 |
+
"l0": 40.0,
|
| 102 |
+
"fve": 0.8956661837197387,
|
| 103 |
+
"n_dead": 1,
|
| 104 |
+
"mse": 0.03790052607655525
|
| 105 |
+
}
|
| 106 |
+
]
|
standard_L3_hook_resid_post/sae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9dc2de61616a842a8ac0c32f8d1da82606b2fa17f57a91f7252d3d9dcaf215f
|
| 3 |
+
size 16798005
|
three_probes.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"probe_1_variant_split": {
|
| 3 |
+
"onsets_all": 12000,
|
| 4 |
+
"onsets_adhd": 8770,
|
| 5 |
+
"onsets_std": 3230,
|
| 6 |
+
"primary_mixed": 312,
|
| 7 |
+
"symmetry_mixed": 139,
|
| 8 |
+
"primary_adhd_only": 312,
|
| 9 |
+
"symmetry_std_only": 216,
|
| 10 |
+
"feat2504_rate_adhd_only_in_adhd_L2": 0.5907639861106873,
|
| 11 |
+
"feat2504_rate_adhd_only_in_std_L2": 0.00034207524731755257
|
| 12 |
+
},
|
| 13 |
+
"probe_2_zero_ablation": {
|
| 14 |
+
"adhd_baseline_reg_rate": 0.41327800829875516,
|
| 15 |
+
"adhd_zero_ablate_L2_step_onset_reg_rate": 0.4056761268781302,
|
| 16 |
+
"adhd_zero_ablate_L2_all_positions_reg_rate": 0.404344193817878,
|
| 17 |
+
"delta_step_onset": -0.007601881420624956,
|
| 18 |
+
"delta_all_positions": -0.008933814480877156
|
| 19 |
+
},
|
| 20 |
+
"probe_3_L1_steering": {
|
| 21 |
+
"baseline": {
|
| 22 |
+
"sep_rate": 0.08425135764158262,
|
| 23 |
+
"mean_step_count": 6.8125,
|
| 24 |
+
"spearman_rho": 0.5305203306664215,
|
| 25 |
+
"spearman_pval": 4.155845216990538e-07,
|
| 26 |
+
"regulation_rate": 0.09357798165137615,
|
| 27 |
+
"n_tokens_total": 6445,
|
| 28 |
+
"n_steps_total": 545,
|
| 29 |
+
"reached_end_rate": 0.975
|
| 30 |
+
},
|
| 31 |
+
"coef_+2.0": {
|
| 32 |
+
"sep_rate": 0.08416458852867831,
|
| 33 |
+
"mean_step_count": 6.775,
|
| 34 |
+
"spearman_rho": 0.5369945327780861,
|
| 35 |
+
"spearman_pval": 2.8189998198585957e-07,
|
| 36 |
+
"regulation_rate": 0.0940959409594096,
|
| 37 |
+
"n_tokens_total": 6416,
|
| 38 |
+
"n_steps_total": 542,
|
| 39 |
+
"reached_end_rate": 0.975
|
| 40 |
+
},
|
| 41 |
+
"coef_+5.0": {
|
| 42 |
+
"sep_rate": 0.08443188951663529,
|
| 43 |
+
"mean_step_count": 6.75,
|
| 44 |
+
"spearman_rho": 0.5421974917403188,
|
| 45 |
+
"spearman_pval": 2.051407247246999e-07,
|
| 46 |
+
"regulation_rate": 0.08703703703703704,
|
| 47 |
+
"n_tokens_total": 6372,
|
| 48 |
+
"n_steps_total": 540,
|
| 49 |
+
"reached_end_rate": 0.975
|
| 50 |
+
},
|
| 51 |
+
"coef_+10.0": {
|
| 52 |
+
"sep_rate": 0.08677685950413223,
|
| 53 |
+
"mean_step_count": 6.8125,
|
| 54 |
+
"spearman_rho": 0.33306527760157806,
|
| 55 |
+
"spearman_pval": 0.002537468671163372,
|
| 56 |
+
"regulation_rate": 0.08440366972477065,
|
| 57 |
+
"n_tokens_total": 6292,
|
| 58 |
+
"n_steps_total": 545,
|
| 59 |
+
"reached_end_rate": 0.975
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|