Upload training_logs/txc_bare_antidead_t3_kpos20__seed1.json with huggingface_hub

Browse files

Files changed (1) hide show

training_logs/txc_bare_antidead_t3_kpos20__seed1.json +106 -0

training_logs/txc_bare_antidead_t3_kpos20__seed1.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "arch_id": "txc_bare_antidead_t3_kpos20",
+  "src_class": "TXCBareAntidead",
+  "src_module": "src.architectures.txc_bare_antidead",
+  "T": 3,
+  "T_max": null,
+  "t_sample": null,
+  "k_win": 60,
+  "k_pos": 20,
+  "shifts": null,
+  "alpha": null,
+  "gamma": null,
+  "n_scales": null,
+  "n_layers": null,
+  "mlc_layers": null,
+  "d_in": 2304,
+  "d_sae": 18432,
+  "subject_model": "google/gemma-2-2b",
+  "anchor_layer": 12,
+  "hook_name": null,
+  "seed": 1,
+  "phase": "phase7_unification",
+  "group": 2,
+  "recipe": "TXCBareAntidead at k_pos=20 (W Phase 1 sweep)",
+  "purpose": "W Phase 1 sweep cell \u2014 sparsity-matched TXC",
+  "batch_size": 4096,
+  "lr": 0.0003,
+  "max_steps": 25000,
+  "elapsed_s": 1801.2035977840424,
+  "final_step": 4200,
+  "converged": true,
+  "plateau_last": 0.019798724405281475,
+  "loss": [
+    24299.3515625,
+    6538.9521484375,
+    5756.046875,
+    5459.55224609375,
+    5268.6748046875,
+    5079.5498046875,
+    4991.13720703125,
+    4949.1064453125,
+    4857.50048828125,
+    4806.00390625,
+    4760.54296875,
+    4740.56005859375,
+    4636.8310546875,
+    4667.8154296875,
+    4634.9638671875,
+    4666.92138671875,
+    4597.71826171875,
+    4586.5087890625,
+    4524.369140625,
+    4532.68896484375,
+    4551.59765625,
+    4549.6708984375
+  ],
+  "l0": [
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0,
+    60.0
+  ],
+  "steps_logged": [
+    0,
+    200,
+    400,
+    600,
+    800,
+    1000,
+    1200,
+    1400,
+    1600,
+    1800,
+    2000,
+    2200,
+    2400,
+    2600,
+    2800,
+    3000,
+    3200,
+    3400,
+    3600,
+    3800,
+    4000,
+    4200
+  ],
+  "n_train_seqs": 24000
+}