han1823123123 commited on
Commit
b5a6003
·
verified ·
1 Parent(s): 24d7f82

Upload training_logs/agentic_txc_02_kpos20__seed42.json with huggingface_hub

Browse files
training_logs/agentic_txc_02_kpos20__seed42.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_id": "agentic_txc_02_kpos20",
3
+ "src_class": "MatryoshkaTXCDRContrastiveMultiscale",
4
+ "src_module": "src.architectures.matryoshka_txcdr_contrastive_multiscale",
5
+ "T": 5,
6
+ "T_max": null,
7
+ "t_sample": null,
8
+ "k_win": 100,
9
+ "k_pos": 20,
10
+ "shifts": [
11
+ 1,
12
+ 2,
13
+ 3
14
+ ],
15
+ "alpha": 1.0,
16
+ "gamma": 0.5,
17
+ "n_scales": 3,
18
+ "n_layers": null,
19
+ "mlc_layers": null,
20
+ "d_in": 2304,
21
+ "d_sae": 18432,
22
+ "subject_model": "google/gemma-2-2b",
23
+ "anchor_layer": 12,
24
+ "hook_name": null,
25
+ "seed": 42,
26
+ "phase": "phase7_unification",
27
+ "group": "W_phase1_kpos20",
28
+ "recipe": "agentic_txc_02 recipe (TXC + multi-scale matryoshka InfoNCE) at k_pos=20, T=5",
29
+ "purpose": "W Phase 1 sweep cell E \u2014 matryoshka multiscale TXC at matched per-token sparsity",
30
+ "batch_size": 4096,
31
+ "lr": 0.0003,
32
+ "max_steps": 25000,
33
+ "elapsed_s": 5705.477766036987,
34
+ "final_step": 3200,
35
+ "converged": true,
36
+ "plateau_last": 0.017188159642111015,
37
+ "loss": [
38
+ 60403.3984375,
39
+ 19076.173828125,
40
+ 16767.482421875,
41
+ 15894.0458984375,
42
+ 15516.6494140625,
43
+ 15243.857421875,
44
+ 15038.884765625,
45
+ 14869.1669921875,
46
+ 14917.818359375,
47
+ 14687.623046875,
48
+ 14624.447265625,
49
+ 14589.9912109375,
50
+ 14582.19140625,
51
+ 14464.4052734375,
52
+ 14496.875,
53
+ 14398.384765625,
54
+ 14480.611328125
55
+ ],
56
+ "l0": [
57
+ 100.0,
58
+ 100.0,
59
+ 100.0,
60
+ 100.0,
61
+ 100.0,
62
+ 100.0,
63
+ 100.0,
64
+ 100.0,
65
+ 100.0,
66
+ 100.0,
67
+ 100.0,
68
+ 100.0,
69
+ 100.0,
70
+ 100.0,
71
+ 100.0,
72
+ 100.0,
73
+ 100.0
74
+ ],
75
+ "steps_logged": [
76
+ 0,
77
+ 200,
78
+ 400,
79
+ 600,
80
+ 800,
81
+ 1000,
82
+ 1200,
83
+ 1400,
84
+ 1600,
85
+ 1800,
86
+ 2000,
87
+ 2200,
88
+ 2400,
89
+ 2600,
90
+ 2800,
91
+ 3000,
92
+ 3200
93
+ ],
94
+ "n_train_seqs": 24000
95
+ }