han1823123123 commited on
Commit
e9a2ca7
·
verified ·
1 Parent(s): b3b7d8c

Upload training_logs/phase57_partB_h8_bare_multidistance_t20_kpos100__seed1.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t20_kpos100__seed1.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 119784.234375,
4
+ 56975.9140625,
5
+ 51753.40625,
6
+ 49978.28125,
7
+ 48053.6015625,
8
+ 47701.35546875,
9
+ 47039.38671875,
10
+ 46531.49609375,
11
+ 46129.5625,
12
+ 46964.25390625,
13
+ 45744.32421875,
14
+ 45892.984375,
15
+ 45480.4765625,
16
+ 45348.38671875,
17
+ 45839.36328125,
18
+ 44866.78515625,
19
+ 44752.23828125,
20
+ 44665.0078125,
21
+ 44356.98828125,
22
+ 44178.8359375,
23
+ 44674.6015625,
24
+ 44085.671875
25
+ ],
26
+ "l0": [
27
+ 2000.0,
28
+ 1978.4814453125,
29
+ 1979.98095703125,
30
+ 1975.941162109375,
31
+ 1971.893798828125,
32
+ 1976.93603515625,
33
+ 1971.63427734375,
34
+ 1973.780029296875,
35
+ 1976.515625,
36
+ 1974.138427734375,
37
+ 1969.6689453125,
38
+ 1973.668701171875,
39
+ 1974.801513671875,
40
+ 1974.387451171875,
41
+ 1967.912109375,
42
+ 1971.716796875,
43
+ 1975.186279296875,
44
+ 1972.721923828125,
45
+ 1975.2333984375,
46
+ 1971.415771484375,
47
+ 1964.80322265625,
48
+ 1968.521240234375
49
+ ],
50
+ "steps_logged": [
51
+ 0,
52
+ 200,
53
+ 400,
54
+ 600,
55
+ 800,
56
+ 1000,
57
+ 1200,
58
+ 1400,
59
+ 1600,
60
+ 1800,
61
+ 2000,
62
+ 2200,
63
+ 2400,
64
+ 2600,
65
+ 2800,
66
+ 3000,
67
+ 3200,
68
+ 3400,
69
+ 3600,
70
+ 3800,
71
+ 4000,
72
+ 4200
73
+ ],
74
+ "final_step": 4200,
75
+ "converged": true,
76
+ "plateau_last": 0.0191179332076818,
77
+ "elapsed_s": 15691.54605603218,
78
+ "shifts": [
79
+ 1,
80
+ 5,
81
+ 10
82
+ ],
83
+ "matryoshka_h_size": 3686,
84
+ "alpha": null,
85
+ "row": 47,
86
+ "arch_id": "phase57_partB_h8_bare_multidistance_t20_kpos100",
87
+ "arch": "phase57_partB_h8_bare_multidistance_t20_kpos100",
88
+ "group": 5,
89
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
90
+ "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
91
+ "T": 20,
92
+ "T_max": null,
93
+ "t_sample": null,
94
+ "n_layers": null,
95
+ "k_win": 2000,
96
+ "k_pos": 100,
97
+ "gamma": null,
98
+ "n_scales": null,
99
+ "seed": 1,
100
+ "d_in": 2304,
101
+ "d_sae": 18432,
102
+ "subject_model": "google/gemma-2-2b",
103
+ "anchor_layer": 12,
104
+ "mlc_layers": [
105
+ 10,
106
+ 11,
107
+ 12,
108
+ 13,
109
+ 14
110
+ ],
111
+ "phase": "phase7_unification",
112
+ "run_id": "phase57_partB_h8_bare_multidistance_t20_kpos100__seed1"
113
+ }