han1823123123 commited on
Commit
9f3d7d3
·
verified ·
1 Parent(s): ef56153

Upload training_logs/tfa_big__seed42.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_logs/tfa_big__seed42.json +119 -0
training_logs/tfa_big__seed42.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 2623.37548828125,
4
+ 1125.829345703125,
5
+ 1073.6173095703125,
6
+ 1067.659912109375,
7
+ 1026.404296875,
8
+ 1048.281005859375,
9
+ 1022.142333984375,
10
+ 1024.8359375,
11
+ 994.5186767578125,
12
+ 993.80908203125,
13
+ 987.5577392578125,
14
+ 987.12646484375,
15
+ 973.5612182617188,
16
+ 970.4193725585938,
17
+ 978.142822265625,
18
+ 960.9208374023438,
19
+ 952.0487060546875,
20
+ 956.1536865234375,
21
+ 957.6717529296875,
22
+ 934.4334106445312,
23
+ 948.057373046875,
24
+ 934.8899536132812,
25
+ 935.3495483398438,
26
+ 938.99267578125,
27
+ 926.7506103515625
28
+ ],
29
+ "l0": [
30
+ 500.0,
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0,
45
+ 500.0,
46
+ 500.0,
47
+ 500.0,
48
+ 500.0,
49
+ 500.0,
50
+ 500.0,
51
+ 500.0,
52
+ 500.0,
53
+ 500.0,
54
+ 500.0
55
+ ],
56
+ "steps_logged": [
57
+ 0,
58
+ 200,
59
+ 400,
60
+ 600,
61
+ 800,
62
+ 1000,
63
+ 1200,
64
+ 1400,
65
+ 1600,
66
+ 1800,
67
+ 2000,
68
+ 2200,
69
+ 2400,
70
+ 2600,
71
+ 2800,
72
+ 3000,
73
+ 3200,
74
+ 3400,
75
+ 3600,
76
+ 3800,
77
+ 4000,
78
+ 4200,
79
+ 4400,
80
+ 4600,
81
+ 4800
82
+ ],
83
+ "final_step": 4800,
84
+ "converged": true,
85
+ "plateau_last": 0.016211831494234736,
86
+ "elapsed_s": 2582.1489868164062,
87
+ "scaling_factor": 0.2944906023536945,
88
+ "skipped_steps": 0,
89
+ "row": 7,
90
+ "arch_id": "tfa_big",
91
+ "arch": "tfa_big",
92
+ "group": 1,
93
+ "src_class": "TemporalSAE",
94
+ "src_module": "src.architectures._tfa_module",
95
+ "T": null,
96
+ "T_max": null,
97
+ "t_sample": null,
98
+ "n_layers": null,
99
+ "k_win": 500,
100
+ "k_pos": null,
101
+ "shifts": null,
102
+ "alpha": null,
103
+ "gamma": null,
104
+ "n_scales": null,
105
+ "seed": 42,
106
+ "d_in": 2304,
107
+ "d_sae": 18432,
108
+ "subject_model": "google/gemma-2-2b",
109
+ "anchor_layer": 12,
110
+ "mlc_layers": [
111
+ 10,
112
+ 11,
113
+ 12,
114
+ 13,
115
+ 14
116
+ ],
117
+ "phase": "phase7_unification",
118
+ "run_id": "tfa_big__seed42"
119
+ }