han1823123123 commited on
Commit
e92cb47
·
verified ·
1 Parent(s): 49c0daf

Upload training_logs/it_txcdr_t5__seed42.json with huggingface_hub

Browse files
training_logs/it_txcdr_t5__seed42.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 91069.1171875,
4
+ 10548.923828125,
5
+ 9195.5654296875,
6
+ 8541.21875,
7
+ 8101.53759765625,
8
+ 7824.19873046875,
9
+ 7624.8095703125,
10
+ 7498.95556640625,
11
+ 7347.88232421875,
12
+ 7244.47265625,
13
+ 7197.34130859375,
14
+ 7113.96337890625,
15
+ 7059.77734375,
16
+ 6956.88818359375,
17
+ 6919.7587890625,
18
+ 6908.16552734375,
19
+ 6866.046875,
20
+ 6851.07666015625,
21
+ 6790.3798828125,
22
+ 6751.21337890625,
23
+ 6702.0361328125,
24
+ 6708.22998046875,
25
+ 6681.625,
26
+ 6668.82373046875,
27
+ 6671.85009765625,
28
+ 6613.18603515625
29
+ ],
30
+ "l0": [
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0,
45
+ 500.0,
46
+ 500.0,
47
+ 500.0,
48
+ 500.0,
49
+ 500.0,
50
+ 500.0,
51
+ 500.0,
52
+ 500.0,
53
+ 500.0,
54
+ 500.0,
55
+ 500.0,
56
+ 500.0
57
+ ],
58
+ "steps_logged": [
59
+ 0,
60
+ 200,
61
+ 400,
62
+ 600,
63
+ 800,
64
+ 1000,
65
+ 1200,
66
+ 1400,
67
+ 1600,
68
+ 1800,
69
+ 2000,
70
+ 2200,
71
+ 2400,
72
+ 2600,
73
+ 2800,
74
+ 3000,
75
+ 3200,
76
+ 3400,
77
+ 3600,
78
+ 3800,
79
+ 4000,
80
+ 4200,
81
+ 4400,
82
+ 4600,
83
+ 4800,
84
+ 5000
85
+ ],
86
+ "final_step": 5000,
87
+ "converged": true,
88
+ "plateau_last": 0.01816915211553231,
89
+ "elapsed_s": 2391.3861033916473,
90
+ "row": 16,
91
+ "arch_id": "txcdr_t5",
92
+ "arch": "txcdr_t5",
93
+ "group": 3,
94
+ "src_class": "TemporalCrosscoder",
95
+ "src_module": "src.architectures.crosscoder",
96
+ "T": 5,
97
+ "T_max": null,
98
+ "t_sample": null,
99
+ "n_layers": null,
100
+ "k_win": 500,
101
+ "k_pos": 100,
102
+ "shifts": null,
103
+ "alpha": null,
104
+ "gamma": null,
105
+ "n_scales": null,
106
+ "seed": 42,
107
+ "d_in": 2304,
108
+ "d_sae": 18432,
109
+ "subject_model": "google/gemma-2-2b-it",
110
+ "anchor_layer": 13,
111
+ "mlc_layers": [
112
+ 11,
113
+ 12,
114
+ 13,
115
+ 14,
116
+ 15
117
+ ],
118
+ "phase": "phase7_unification",
119
+ "run_id": "it_txcdr_t5__seed42"
120
+ }