han1823123123 commited on
Commit
3aeb1fa
·
verified ·
1 Parent(s): 9bece02

Upload training_logs/phase57_partB_h8_bare_multidistance_t7__seed1.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t7__seed1.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 102423.515625,
4
+ 36875.29296875,
5
+ 33404.2890625,
6
+ 31978.7890625,
7
+ 30758.66015625,
8
+ 30117.58203125,
9
+ 29693.828125,
10
+ 29147.90234375,
11
+ 29045.857421875,
12
+ 28883.203125,
13
+ 28585.37109375,
14
+ 28582.10546875,
15
+ 28524.521484375,
16
+ 28450.23828125,
17
+ 28135.734375,
18
+ 28080.806640625,
19
+ 27982.140625,
20
+ 28020.876953125,
21
+ 27976.21875
22
+ ],
23
+ "l0": [
24
+ 500.0,
25
+ 496.361328125,
26
+ 494.86962890625,
27
+ 494.8671875,
28
+ 492.7890625,
29
+ 493.0693359375,
30
+ 492.90869140625,
31
+ 493.23388671875,
32
+ 493.05615234375,
33
+ 493.57568359375,
34
+ 492.6591796875,
35
+ 493.228759765625,
36
+ 493.667236328125,
37
+ 493.692138671875,
38
+ 494.049072265625,
39
+ 491.560791015625,
40
+ 493.307373046875,
41
+ 492.43505859375,
42
+ 492.982421875
43
+ ],
44
+ "steps_logged": [
45
+ 0,
46
+ 200,
47
+ 400,
48
+ 600,
49
+ 800,
50
+ 1000,
51
+ 1200,
52
+ 1400,
53
+ 1600,
54
+ 1800,
55
+ 2000,
56
+ 2200,
57
+ 2400,
58
+ 2600,
59
+ 2800,
60
+ 3000,
61
+ 3200,
62
+ 3400,
63
+ 3600
64
+ ],
65
+ "final_step": 3600,
66
+ "converged": true,
67
+ "plateau_last": 0.019784327321031542,
68
+ "elapsed_s": 3795.054627418518,
69
+ "shifts": [
70
+ 1,
71
+ 3
72
+ ],
73
+ "matryoshka_h_size": 3686,
74
+ "alpha": null,
75
+ "row": 34,
76
+ "arch_id": "phase57_partB_h8_bare_multidistance_t7",
77
+ "arch": "phase57_partB_h8_bare_multidistance_t7",
78
+ "group": 4,
79
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
80
+ "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
81
+ "T": 7,
82
+ "T_max": null,
83
+ "t_sample": null,
84
+ "n_layers": null,
85
+ "k_win": 500,
86
+ "k_pos": 71,
87
+ "gamma": null,
88
+ "n_scales": null,
89
+ "seed": 1,
90
+ "d_in": 2304,
91
+ "d_sae": 18432,
92
+ "subject_model": "google/gemma-2-2b",
93
+ "anchor_layer": 12,
94
+ "mlc_layers": [
95
+ 10,
96
+ 11,
97
+ 12,
98
+ 13,
99
+ 14
100
+ ],
101
+ "phase": "phase7_unification",
102
+ "run_id": "phase57_partB_h8_bare_multidistance_t7__seed1"
103
+ }