han1823123123 commited on
Commit
22c2e57
·
verified ·
1 Parent(s): 305e22c

Upload training_logs/txcdr_t6__seed2.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_logs/txcdr_t6__seed2.json +123 -0
training_logs/txcdr_t6__seed2.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 54171.03125,
4
+ 6650.0341796875,
5
+ 5778.8916015625,
6
+ 5370.9873046875,
7
+ 5066.1904296875,
8
+ 4947.537109375,
9
+ 4783.05224609375,
10
+ 4727.25146484375,
11
+ 4608.16357421875,
12
+ 4525.3857421875,
13
+ 4464.26513671875,
14
+ 4405.87451171875,
15
+ 4409.7841796875,
16
+ 4379.986328125,
17
+ 4358.74853515625,
18
+ 4317.25537109375,
19
+ 4294.2431640625,
20
+ 4237.53125,
21
+ 4268.90673828125,
22
+ 4221.75,
23
+ 4226.3876953125,
24
+ 4176.494140625,
25
+ 4168.09716796875,
26
+ 4161.02099609375,
27
+ 4171.30712890625,
28
+ 4111.2939453125,
29
+ 4131.61328125
30
+ ],
31
+ "l0": [
32
+ 500.0,
33
+ 496.3466796875,
34
+ 495.33447265625,
35
+ 494.26806640625,
36
+ 493.890625,
37
+ 494.245361328125,
38
+ 493.401611328125,
39
+ 493.9443359375,
40
+ 492.8857421875,
41
+ 493.117431640625,
42
+ 491.75732421875,
43
+ 492.416259765625,
44
+ 492.255126953125,
45
+ 493.2607421875,
46
+ 493.692138671875,
47
+ 492.292236328125,
48
+ 492.390869140625,
49
+ 491.463623046875,
50
+ 492.8193359375,
51
+ 493.078369140625,
52
+ 492.216796875,
53
+ 491.498779296875,
54
+ 491.876953125,
55
+ 492.4609375,
56
+ 492.950927734375,
57
+ 491.66552734375,
58
+ 491.526611328125
59
+ ],
60
+ "steps_logged": [
61
+ 0,
62
+ 200,
63
+ 400,
64
+ 600,
65
+ 800,
66
+ 1000,
67
+ 1200,
68
+ 1400,
69
+ 1600,
70
+ 1800,
71
+ 2000,
72
+ 2200,
73
+ 2400,
74
+ 2600,
75
+ 2800,
76
+ 3000,
77
+ 3200,
78
+ 3400,
79
+ 3600,
80
+ 3800,
81
+ 4000,
82
+ 4200,
83
+ 4400,
84
+ 4600,
85
+ 4800,
86
+ 5000,
87
+ 5200
88
+ ],
89
+ "final_step": 5200,
90
+ "converged": true,
91
+ "plateau_last": 0.018349156380293843,
92
+ "elapsed_s": 1233.7482249736786,
93
+ "row": 17,
94
+ "arch_id": "txcdr_t6",
95
+ "arch": "txcdr_t6",
96
+ "group": 3,
97
+ "src_class": "TemporalCrosscoder",
98
+ "src_module": "src.architectures.crosscoder",
99
+ "T": 6,
100
+ "T_max": null,
101
+ "t_sample": null,
102
+ "n_layers": null,
103
+ "k_win": 500,
104
+ "k_pos": 83,
105
+ "shifts": null,
106
+ "alpha": null,
107
+ "gamma": null,
108
+ "n_scales": null,
109
+ "seed": 2,
110
+ "d_in": 2304,
111
+ "d_sae": 18432,
112
+ "subject_model": "google/gemma-2-2b",
113
+ "anchor_layer": 12,
114
+ "mlc_layers": [
115
+ 10,
116
+ 11,
117
+ 12,
118
+ 13,
119
+ 14
120
+ ],
121
+ "phase": "phase7_unification",
122
+ "run_id": "txcdr_t6__seed2"
123
+ }