han1823123123 commited on
Commit
54d8b7c
·
verified ·
1 Parent(s): cd9482b

Upload training_logs/spatial_matry_h8_t10_kpos20_shifts2_pref3686_9216_18432_sub1_5_10_nested_uniform_contr__seed42.json with huggingface_hub

Browse files
training_logs/spatial_matry_h8_t10_kpos20_shifts2_pref3686_9216_18432_sub1_5_10_nested_uniform_contr__seed42.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 85536.9453125,
4
+ 38466.1328125,
5
+ 35270.8046875,
6
+ 33850.078125,
7
+ 32935.3984375,
8
+ 32410.20703125,
9
+ 31999.58984375,
10
+ 31649.3984375,
11
+ 31516.763671875,
12
+ 31348.26171875,
13
+ 31197.630859375,
14
+ 30997.08984375,
15
+ 31125.05859375,
16
+ 30974.0,
17
+ 30723.126953125,
18
+ 30741.302734375
19
+ ],
20
+ "l0": [
21
+ 200.0,
22
+ 200.0,
23
+ 200.0,
24
+ 200.0,
25
+ 200.0,
26
+ 200.0,
27
+ 200.0,
28
+ 200.0,
29
+ 200.0,
30
+ 200.0,
31
+ 200.0,
32
+ 200.0,
33
+ 200.0,
34
+ 200.0,
35
+ 200.0,
36
+ 200.0
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.019979922317185896,
59
+ "elapsed_s": 4568.830448389053,
60
+ "shifts": [
61
+ 2
62
+ ],
63
+ "level_prefix_sizes": [
64
+ 3686,
65
+ 9216,
66
+ 18432
67
+ ],
68
+ "level_subset_sizes": [
69
+ 1,
70
+ 5,
71
+ 10
72
+ ],
73
+ "nested": true,
74
+ "subset_mode": "uniform",
75
+ "enable_contrastive": true,
76
+ "matryoshka_h_size": 3686,
77
+ "final_step_wall_s": 4593.298326253891,
78
+ "row": -1,
79
+ "arch_id": "spatial_matry_h8_t10_kpos20_shifts2_pref3686_9216_18432_sub1_5_10_nested_uniform_contr",
80
+ "arch": "spatial_matry_h8_t10_kpos20_shifts2_pref3686_9216_18432_sub1_5_10_nested_uniform_contr",
81
+ "group": "deadzone_escape_phase2",
82
+ "src_class": "SpatialMatryoshkaH8",
83
+ "src_module": "src.architectures.spatial_matryoshka_h8",
84
+ "T": 10,
85
+ "T_max": null,
86
+ "t_sample": null,
87
+ "n_layers": null,
88
+ "k_win": 200,
89
+ "k_pos": 20,
90
+ "sigma_range": null,
91
+ "n_gaussians": 1,
92
+ "alpha": 1.0,
93
+ "gamma": null,
94
+ "n_scales": null,
95
+ "seed": 42,
96
+ "d_in": 2304,
97
+ "d_sae": 18432,
98
+ "subject_model": "google/gemma-2-2b",
99
+ "anchor_layer": 12,
100
+ "mlc_layers": [
101
+ 10,
102
+ 11,
103
+ 12,
104
+ 13,
105
+ 14
106
+ ],
107
+ "phase": "phase7_unification",
108
+ "purpose": "Han's deadzone-escape: low-rank features must reconstruct any single position (position-flexible); deeper features add compositional cross-position info. Tests whether T=10/20 with this loss escapes the T=2-5 deadzone.",
109
+ "recipe": "SpatialMatryoshkaH8 \u2014 H8 stack at T=10 k_pos=20 with random-subset Matryoshka decoder loss (prefixes=(3686, 9216, 18432), subsets=(1, 5, 10), nested, sample=uniform, contrastive=True)",
110
+ "run_id": "spatial_matry_h8_t10_kpos20_shifts2_pref3686_9216_18432_sub1_5_10_nested_uniform_contr__seed42"
111
+ }