han1823123123 commited on
Commit
916715d
·
verified ·
1 Parent(s): d3a5a62

Upload training_logs/phase57_partB_h8_bare_multidistance_t3__seed42.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t3__seed42.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 83048.0859375,
4
+ 19990.353515625,
5
+ 17844.322265625,
6
+ 16666.828125,
7
+ 16000.46484375,
8
+ 15332.2353515625,
9
+ 15093.251953125,
10
+ 14802.3876953125,
11
+ 14581.7060546875,
12
+ 14409.3466796875,
13
+ 14118.1943359375,
14
+ 14076.8818359375,
15
+ 13808.857421875,
16
+ 13853.79296875,
17
+ 13769.1025390625,
18
+ 13705.7763671875,
19
+ 13522.9287109375,
20
+ 13596.2265625,
21
+ 13438.6591796875,
22
+ 13464.921875,
23
+ 13481.96484375,
24
+ 13347.9755859375
25
+ ],
26
+ "l0": [
27
+ 500.0,
28
+ 498.59228515625,
29
+ 497.63134765625,
30
+ 497.25341796875,
31
+ 496.314453125,
32
+ 496.404052734375,
33
+ 496.162841796875,
34
+ 496.5146484375,
35
+ 496.245361328125,
36
+ 496.47412109375,
37
+ 495.576904296875,
38
+ 495.006591796875,
39
+ 495.041015625,
40
+ 494.9541015625,
41
+ 495.672607421875,
42
+ 496.245849609375,
43
+ 495.298828125,
44
+ 495.80712890625,
45
+ 494.984619140625,
46
+ 496.044921875,
47
+ 495.775390625,
48
+ 494.7958984375
49
+ ],
50
+ "steps_logged": [
51
+ 0,
52
+ 200,
53
+ 400,
54
+ 600,
55
+ 800,
56
+ 1000,
57
+ 1200,
58
+ 1400,
59
+ 1600,
60
+ 1800,
61
+ 2000,
62
+ 2200,
63
+ 2400,
64
+ 2600,
65
+ 2800,
66
+ 3000,
67
+ 3200,
68
+ 3400,
69
+ 3600,
70
+ 3800,
71
+ 4000,
72
+ 4200
73
+ ],
74
+ "final_step": 4200,
75
+ "converged": true,
76
+ "plateau_last": 0.019381023656819877,
77
+ "elapsed_s": 1337.4303081035614,
78
+ "shifts": [
79
+ 1
80
+ ],
81
+ "matryoshka_h_size": 3686,
82
+ "alpha": null,
83
+ "row": 30,
84
+ "arch_id": "phase57_partB_h8_bare_multidistance_t3",
85
+ "arch": "phase57_partB_h8_bare_multidistance_t3",
86
+ "group": 4,
87
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
88
+ "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
89
+ "T": 3,
90
+ "T_max": null,
91
+ "t_sample": null,
92
+ "n_layers": null,
93
+ "k_win": 500,
94
+ "k_pos": 167,
95
+ "gamma": null,
96
+ "n_scales": null,
97
+ "seed": 42,
98
+ "d_in": 2304,
99
+ "d_sae": 18432,
100
+ "subject_model": "google/gemma-2-2b",
101
+ "anchor_layer": 12,
102
+ "mlc_layers": [
103
+ 10,
104
+ 11,
105
+ 12,
106
+ 13,
107
+ 14
108
+ ],
109
+ "phase": "phase7_unification",
110
+ "run_id": "phase57_partB_h8_bare_multidistance_t3__seed42"
111
+ }