han1823123123 commited on
Commit
e8f2be4
·
verified ·
1 Parent(s): 93bb0f4

Upload training_logs/phase57_partB_h8_bare_multidistance_t3__seed1.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t3__seed1.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 81026.6875,
4
+ 20049.75,
5
+ 17770.591796875,
6
+ 16721.71484375,
7
+ 15854.0146484375,
8
+ 15304.7548828125,
9
+ 15011.404296875,
10
+ 14804.1513671875,
11
+ 14541.837890625,
12
+ 14240.3232421875,
13
+ 14219.7314453125,
14
+ 14054.931640625,
15
+ 13853.1943359375,
16
+ 13815.9404296875,
17
+ 13741.4990234375,
18
+ 13533.9208984375,
19
+ 13518.9638671875,
20
+ 13471.1416015625,
21
+ 13421.5625,
22
+ 13458.0595703125,
23
+ 13460.150390625,
24
+ 13369.78125
25
+ ],
26
+ "l0": [
27
+ 500.0,
28
+ 497.9169921875,
29
+ 497.3984375,
30
+ 497.33349609375,
31
+ 496.41357421875,
32
+ 496.36328125,
33
+ 495.83740234375,
34
+ 496.934814453125,
35
+ 496.689697265625,
36
+ 496.27783203125,
37
+ 496.7578125,
38
+ 496.390625,
39
+ 496.63623046875,
40
+ 496.7197265625,
41
+ 496.091796875,
42
+ 495.321533203125,
43
+ 496.3427734375,
44
+ 495.509033203125,
45
+ 495.74560546875,
46
+ 495.616455078125,
47
+ 495.8837890625,
48
+ 495.42578125
49
+ ],
50
+ "steps_logged": [
51
+ 0,
52
+ 200,
53
+ 400,
54
+ 600,
55
+ 800,
56
+ 1000,
57
+ 1200,
58
+ 1400,
59
+ 1600,
60
+ 1800,
61
+ 2000,
62
+ 2200,
63
+ 2400,
64
+ 2600,
65
+ 2800,
66
+ 3000,
67
+ 3200,
68
+ 3400,
69
+ 3600,
70
+ 3800,
71
+ 4000,
72
+ 4200
73
+ ],
74
+ "final_step": 4200,
75
+ "converged": true,
76
+ "plateau_last": 0.018737325648298355,
77
+ "elapsed_s": 1338.4662330150604,
78
+ "shifts": [
79
+ 1
80
+ ],
81
+ "matryoshka_h_size": 3686,
82
+ "alpha": null,
83
+ "row": 30,
84
+ "arch_id": "phase57_partB_h8_bare_multidistance_t3",
85
+ "arch": "phase57_partB_h8_bare_multidistance_t3",
86
+ "group": 4,
87
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
88
+ "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
89
+ "T": 3,
90
+ "T_max": null,
91
+ "t_sample": null,
92
+ "n_layers": null,
93
+ "k_win": 500,
94
+ "k_pos": 167,
95
+ "gamma": null,
96
+ "n_scales": null,
97
+ "seed": 1,
98
+ "d_in": 2304,
99
+ "d_sae": 18432,
100
+ "subject_model": "google/gemma-2-2b",
101
+ "anchor_layer": 12,
102
+ "mlc_layers": [
103
+ 10,
104
+ 11,
105
+ 12,
106
+ 13,
107
+ 14
108
+ ],
109
+ "phase": "phase7_unification",
110
+ "run_id": "phase57_partB_h8_bare_multidistance_t3__seed1"
111
+ }