han1823123123 commited on
Commit
7d000f8
·
verified ·
1 Parent(s): 66bf08f

Upload training_logs/txc_bare_antidead_t6__seed42.json with huggingface_hub

Browse files
training_logs/txc_bare_antidead_t6__seed42.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 13036.1240234375,
4
+ 5839.79052734375,
5
+ 5169.66357421875,
6
+ 4787.41552734375,
7
+ 4576.44482421875,
8
+ 4405.8486328125,
9
+ 4273.591796875,
10
+ 4159.86865234375,
11
+ 4090.8046875,
12
+ 4027.442138671875,
13
+ 3998.333984375,
14
+ 3951.990234375,
15
+ 3904.77294921875,
16
+ 3890.715576171875,
17
+ 3875.372802734375,
18
+ 3821.909912109375,
19
+ 3804.688232421875,
20
+ 3804.86767578125,
21
+ 3801.357421875,
22
+ 3779.475341796875,
23
+ 3757.30126953125,
24
+ 3722.21484375,
25
+ 3729.3564453125,
26
+ 3717.6826171875,
27
+ 3712.84912109375
28
+ ],
29
+ "l0": [
30
+ 500.0,
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0,
45
+ 500.0,
46
+ 500.0,
47
+ 500.0,
48
+ 500.0,
49
+ 500.0,
50
+ 500.0,
51
+ 500.0,
52
+ 500.0,
53
+ 500.0,
54
+ 500.0
55
+ ],
56
+ "steps_logged": [
57
+ 0,
58
+ 200,
59
+ 400,
60
+ 600,
61
+ 800,
62
+ 1000,
63
+ 1200,
64
+ 1400,
65
+ 1600,
66
+ 1800,
67
+ 2000,
68
+ 2200,
69
+ 2400,
70
+ 2600,
71
+ 2800,
72
+ 3000,
73
+ 3200,
74
+ 3400,
75
+ 3600,
76
+ 3800,
77
+ 4000,
78
+ 4200,
79
+ 4400,
80
+ 4600,
81
+ 4800
82
+ ],
83
+ "final_step": 4800,
84
+ "converged": true,
85
+ "plateau_last": 0.01961331952904919,
86
+ "elapsed_s": 1407.0589573383331,
87
+ "T": 6,
88
+ "src_class": "TXCBareAntidead",
89
+ "n_seqs_used": 24000,
90
+ "ctx_used": 64,
91
+ "ctx_slice_direction": "last",
92
+ "row": 606,
93
+ "arch_id": "txc_bare_antidead_t6",
94
+ "arch": "txc_bare_antidead_t6",
95
+ "group": 99,
96
+ "src_module": "src.architectures.txc_bare_antidead",
97
+ "T_max": null,
98
+ "t_sample": null,
99
+ "n_layers": null,
100
+ "k_win": 500,
101
+ "k_pos": 83,
102
+ "shifts": null,
103
+ "alpha": null,
104
+ "gamma": null,
105
+ "n_scales": null,
106
+ "seed": 42,
107
+ "d_in": 2304,
108
+ "d_sae": 18432,
109
+ "subject_model": "google/gemma-2-2b",
110
+ "anchor_layer": 12,
111
+ "mlc_layers": [
112
+ 10,
113
+ 11,
114
+ 12,
115
+ 13,
116
+ 14
117
+ ],
118
+ "phase": "phase7_unification",
119
+ "run_id": "txc_bare_antidead_t6__seed42"
120
+ }