han1823123123 commited on
Commit
391ef14
·
verified ·
1 Parent(s): 3e64920

Upload training_logs/txc_bare_antidead_t7__seed2.json with huggingface_hub

Browse files
training_logs/txc_bare_antidead_t7__seed2.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 13044.13671875,
4
+ 6023.318359375,
5
+ 5353.54443359375,
6
+ 5016.18115234375,
7
+ 4764.8408203125,
8
+ 4595.6279296875,
9
+ 4475.71875,
10
+ 4379.71484375,
11
+ 4304.25830078125,
12
+ 4227.61865234375,
13
+ 4158.75390625,
14
+ 4138.01318359375,
15
+ 4093.712890625,
16
+ 4076.626953125,
17
+ 4084.19091796875,
18
+ 4027.330078125,
19
+ 4021.2919921875,
20
+ 4008.130859375,
21
+ 4004.975830078125,
22
+ 3975.064453125,
23
+ 3972.56103515625,
24
+ 3946.59228515625
25
+ ],
26
+ "l0": [
27
+ 500.0,
28
+ 500.0,
29
+ 500.0,
30
+ 500.0,
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0,
45
+ 500.0,
46
+ 500.0,
47
+ 500.0,
48
+ 500.0
49
+ ],
50
+ "steps_logged": [
51
+ 0,
52
+ 200,
53
+ 400,
54
+ 600,
55
+ 800,
56
+ 1000,
57
+ 1200,
58
+ 1400,
59
+ 1600,
60
+ 1800,
61
+ 2000,
62
+ 2200,
63
+ 2400,
64
+ 2600,
65
+ 2800,
66
+ 3000,
67
+ 3200,
68
+ 3400,
69
+ 3600,
70
+ 3800,
71
+ 4000,
72
+ 4200
73
+ ],
74
+ "final_step": 4200,
75
+ "converged": true,
76
+ "plateau_last": 0.019495906493702225,
77
+ "elapsed_s": 1454.086974143982,
78
+ "T": 7,
79
+ "src_class": "TXCBareAntidead",
80
+ "n_seqs_used": 24000,
81
+ "ctx_used": 64,
82
+ "ctx_slice_direction": "last",
83
+ "row": 607,
84
+ "arch_id": "txc_bare_antidead_t7",
85
+ "arch": "txc_bare_antidead_t7",
86
+ "group": 99,
87
+ "src_module": "src.architectures.txc_bare_antidead",
88
+ "T_max": null,
89
+ "t_sample": null,
90
+ "n_layers": null,
91
+ "k_win": 500,
92
+ "k_pos": 71,
93
+ "shifts": null,
94
+ "alpha": null,
95
+ "gamma": null,
96
+ "n_scales": null,
97
+ "seed": 2,
98
+ "d_in": 2304,
99
+ "d_sae": 18432,
100
+ "subject_model": "google/gemma-2-2b",
101
+ "anchor_layer": 12,
102
+ "mlc_layers": [
103
+ 10,
104
+ 11,
105
+ 12,
106
+ 13,
107
+ 14
108
+ ],
109
+ "phase": "phase7_unification",
110
+ "run_id": "txc_bare_antidead_t7__seed2"
111
+ }