han1823123123 commited on
Commit
eadf1a1
·
verified ·
1 Parent(s): 5ae23c0

Upload training_logs/hill_z_shared_T20__seed42.json with huggingface_hub

Browse files
training_logs/hill_z_shared_T20__seed42.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 97536064.0,
4
+ 117491.5078125,
5
+ 101118.1875,
6
+ 97619.4140625,
7
+ 96506.7890625,
8
+ 95350.0625,
9
+ 94947.6953125,
10
+ 94305.328125,
11
+ 94223.5,
12
+ 94205.7734375,
13
+ 93646.6328125,
14
+ 95639.0078125,
15
+ 93721.515625,
16
+ 93406.5703125,
17
+ 93761.3828125,
18
+ 93166.7265625
19
+ ],
20
+ "l0": [
21
+ 500.0,
22
+ 241.929443359375,
23
+ 244.61865234375,
24
+ 251.415771484375,
25
+ 262.099365234375,
26
+ 266.942138671875,
27
+ 272.4658203125,
28
+ 277.576904296875,
29
+ 281.66064453125,
30
+ 283.83154296875,
31
+ 287.229248046875,
32
+ 289.229248046875,
33
+ 294.20556640625,
34
+ 297.845947265625,
35
+ 300.75634765625,
36
+ 304.318359375
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.003466213210343796,
59
+ "elapsed_s": 802.3623249530792,
60
+ "T_max": 20,
61
+ "shifts": null,
62
+ "matryoshka_h_size": 3686,
63
+ "alpha": 1.0,
64
+ "src_class": "SubseqSharedH8",
65
+ "n_seqs_used": 24000,
66
+ "ctx_used": 64,
67
+ "ctx_slice_direction": "last",
68
+ "row": 520,
69
+ "arch_id": "hill_z_shared_T20",
70
+ "arch": "hill_z_shared_T20",
71
+ "group": 99,
72
+ "src_module": "src.architectures.phase7_subseq_z_variants",
73
+ "T": null,
74
+ "t_sample": null,
75
+ "n_layers": null,
76
+ "k_win": 500,
77
+ "k_pos": null,
78
+ "gamma": null,
79
+ "n_scales": null,
80
+ "seed": 42,
81
+ "d_in": 2304,
82
+ "d_sae": 18432,
83
+ "subject_model": "google/gemma-2-2b",
84
+ "anchor_layer": 12,
85
+ "mlc_layers": [
86
+ 10,
87
+ 11,
88
+ 12,
89
+ 13,
90
+ 14
91
+ ],
92
+ "phase": "phase7_unification",
93
+ "run_id": "hill_z_shared_T20__seed42"
94
+ }