Upload training_logs/agentic_txc_02_kpos20__seed42.json with huggingface_hub

Browse files

Files changed (1) hide show

training_logs/agentic_txc_02_kpos20__seed42.json +95 -0

training_logs/agentic_txc_02_kpos20__seed42.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "arch_id": "agentic_txc_02_kpos20",
+  "src_class": "MatryoshkaTXCDRContrastiveMultiscale",
+  "src_module": "src.architectures.matryoshka_txcdr_contrastive_multiscale",
+  "T": 5,
+  "T_max": null,
+  "t_sample": null,
+  "k_win": 100,
+  "k_pos": 20,
+  "shifts": [
+    1,
+    2,
+    3
+  ],
+  "alpha": 1.0,
+  "gamma": 0.5,
+  "n_scales": 3,
+  "n_layers": null,
+  "mlc_layers": null,
+  "d_in": 2304,
+  "d_sae": 18432,
+  "subject_model": "google/gemma-2-2b",
+  "anchor_layer": 12,
+  "hook_name": null,
+  "seed": 42,
+  "phase": "phase7_unification",
+  "group": "W_phase1_kpos20",
+  "recipe": "agentic_txc_02 recipe (TXC + multi-scale matryoshka InfoNCE) at k_pos=20, T=5",
+  "purpose": "W Phase 1 sweep cell E \u2014 matryoshka multiscale TXC at matched per-token sparsity",
+  "batch_size": 4096,
+  "lr": 0.0003,
+  "max_steps": 25000,
+  "elapsed_s": 5705.477766036987,
+  "final_step": 3200,
+  "converged": true,
+  "plateau_last": 0.017188159642111015,
+  "loss": [
+    60403.3984375,
+    19076.173828125,
+    16767.482421875,
+    15894.0458984375,
+    15516.6494140625,
+    15243.857421875,
+    15038.884765625,
+    14869.1669921875,
+    14917.818359375,
+    14687.623046875,
+    14624.447265625,
+    14589.9912109375,
+    14582.19140625,
+    14464.4052734375,
+    14496.875,
+    14398.384765625,
+    14480.611328125
+  ],
+  "l0": [
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0,
+    100.0
+  ],
+  "steps_logged": [
+    0,
+    200,
+    400,
+    600,
+    800,
+    1000,
+    1200,
+    1400,
+    1600,
+    1800,
+    2000,
+    2200,
+    2400,
+    2600,
+    2800,
+    3000,
+    3200
+  ],
+  "n_train_seqs": 24000
+}