han1823123123 commited on
Commit
ef5a7cf
·
verified ·
1 Parent(s): 98b7123

Upload training_logs/phase57_partB_h8_bare_multidistance_t9__seed42.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t9__seed42.json ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 126904.4296875,
4
+ 51430.64453125,
5
+ 47022.50390625,
6
+ 44962.5,
7
+ 43597.08203125,
8
+ 42971.12890625,
9
+ 42641.19140625,
10
+ 42199.46875,
11
+ 41431.29296875,
12
+ 41358.40234375,
13
+ 41064.09765625,
14
+ 41134.94140625,
15
+ 40698.94921875,
16
+ 40528.8359375,
17
+ 40633.19140625,
18
+ 40375.20703125,
19
+ 40111.76953125,
20
+ 40203.0859375
21
+ ],
22
+ "l0": [
23
+ 500.0,
24
+ 495.06103515625,
25
+ 493.483642578125,
26
+ 493.888671875,
27
+ 493.165771484375,
28
+ 493.677734375,
29
+ 494.17236328125,
30
+ 495.0654296875,
31
+ 493.17626953125,
32
+ 493.29345703125,
33
+ 493.232421875,
34
+ 495.16015625,
35
+ 492.48095703125,
36
+ 492.49951171875,
37
+ 493.813720703125,
38
+ 492.6513671875,
39
+ 492.574951171875,
40
+ 492.665283203125
41
+ ],
42
+ "steps_logged": [
43
+ 0,
44
+ 200,
45
+ 400,
46
+ 600,
47
+ 800,
48
+ 1000,
49
+ 1200,
50
+ 1400,
51
+ 1600,
52
+ 1800,
53
+ 2000,
54
+ 2200,
55
+ 2400,
56
+ 2600,
57
+ 2800,
58
+ 3000,
59
+ 3200,
60
+ 3400
61
+ ],
62
+ "final_step": 3400,
63
+ "converged": true,
64
+ "plateau_last": 0.018647658833941747,
65
+ "elapsed_s": 5757.940834760666,
66
+ "shifts": [
67
+ 1,
68
+ 2,
69
+ 4
70
+ ],
71
+ "matryoshka_h_size": 3686,
72
+ "alpha": null,
73
+ "row": 36,
74
+ "arch_id": "phase57_partB_h8_bare_multidistance_t9",
75
+ "arch": "phase57_partB_h8_bare_multidistance_t9",
76
+ "group": 4,
77
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
78
+ "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
79
+ "T": 9,
80
+ "T_max": null,
81
+ "t_sample": null,
82
+ "n_layers": null,
83
+ "k_win": 500,
84
+ "k_pos": 56,
85
+ "gamma": null,
86
+ "n_scales": null,
87
+ "seed": 42,
88
+ "d_in": 2304,
89
+ "d_sae": 18432,
90
+ "subject_model": "google/gemma-2-2b",
91
+ "anchor_layer": 12,
92
+ "mlc_layers": [
93
+ 10,
94
+ 11,
95
+ 12,
96
+ 13,
97
+ 14
98
+ ],
99
+ "phase": "phase7_unification",
100
+ "run_id": "phase57_partB_h8_bare_multidistance_t9__seed42"
101
+ }