anshdadhich commited on
Commit
9a3a029
·
verified ·
1 Parent(s): 0927acb

Add v5 results

Browse files
Files changed (1) hide show
  1. results_v5.json +351 -0
results_v5.json ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "main_tasks": {
3
+ "Complex Fn (4D)": {
4
+ "Vanilla": {
5
+ "mean": 0.05014639471968015,
6
+ "std": 0.00929074178664744,
7
+ "scores": [
8
+ 0.0560552217066288,
9
+ 0.057355206459760666,
10
+ 0.037028755992650986
11
+ ],
12
+ "params": 4993,
13
+ "hidden": 48
14
+ },
15
+ "RichV1": {
16
+ "mean": 0.17897194375594458,
17
+ "std": 0.05929861375505353,
18
+ "scores": [
19
+ 0.11175825446844101,
20
+ 0.2560095191001892,
21
+ 0.1691480576992035
22
+ ],
23
+ "params": 4951,
24
+ "hidden": 33
25
+ },
26
+ "SinGLU": {
27
+ "mean": 0.012662170765300592,
28
+ "std": 0.009025980600835806,
29
+ "scores": [
30
+ 0.02520008198916912,
31
+ 0.008467600680887699,
32
+ 0.0043188296258449554
33
+ ],
34
+ "params": 4811,
35
+ "hidden": 31
36
+ },
37
+ "Hybrid": {
38
+ "mean": 0.07436376934250195,
39
+ "std": 0.020922922410045546,
40
+ "scores": [
41
+ 0.05885079875588417,
42
+ 0.1039414331316948,
43
+ 0.06029907613992691
44
+ ],
45
+ "params": 4959,
46
+ "hidden": 34
47
+ }
48
+ },
49
+ "Nested Fn (2D)": {
50
+ "Vanilla": {
51
+ "mean": 0.04167063037554423,
52
+ "std": 0.0013826140176434484,
53
+ "scores": [
54
+ 0.04034700244665146,
55
+ 0.04357881471514702,
56
+ 0.04108607396483421
57
+ ],
58
+ "params": 2961,
59
+ "hidden": 37
60
+ },
61
+ "RichV1": {
62
+ "mean": 0.001206373757061859,
63
+ "std": 0.0010383498294791104,
64
+ "scores": [
65
+ 0.00035041512455791235,
66
+ 0.0006010299548506737,
67
+ 0.002667676191776991
68
+ ],
69
+ "params": 3069,
70
+ "hidden": 26
71
+ },
72
+ "SinGLU": {
73
+ "mean": 0.000256501533537327,
74
+ "std": 0.00014266684270291342,
75
+ "scores": [
76
+ 7.845076470403001e-05,
77
+ 0.0002633430704008788,
78
+ 0.0004277107655070722
79
+ ],
80
+ "params": 2993,
81
+ "hidden": 24
82
+ },
83
+ "Hybrid": {
84
+ "mean": 0.0009293731806489328,
85
+ "std": 0.00015638037345160952,
86
+ "scores": [
87
+ 0.0010697855614125729,
88
+ 0.0007111956947483122,
89
+ 0.0010071382857859135
90
+ ],
91
+ "params": 2959,
92
+ "hidden": 26
93
+ }
94
+ },
95
+ "Spiral": {
96
+ "Vanilla": {
97
+ "mean": 0.9022222359975179,
98
+ "std": 0.12650085789091728,
99
+ "scores": [
100
+ 0.9933333396911621,
101
+ 0.7233333587646484,
102
+ 0.9900000095367432
103
+ ],
104
+ "params": 2999,
105
+ "hidden": 37
106
+ },
107
+ "RichV1": {
108
+ "mean": 0.998888889948527,
109
+ "std": 0.0015713469040821585,
110
+ "scores": [
111
+ 1.0,
112
+ 0.996666669845581,
113
+ 1.0
114
+ ],
115
+ "params": 3096,
116
+ "hidden": 26
117
+ },
118
+ "SinGLU": {
119
+ "mean": 0.4444444378217061,
120
+ "std": 0.021829873123329135,
121
+ "scores": [
122
+ 0.4699999988079071,
123
+ 0.4166666567325592,
124
+ 0.4466666579246521
125
+ ],
126
+ "params": 3018,
127
+ "hidden": 24
128
+ },
129
+ "Hybrid": {
130
+ "mean": 1.0,
131
+ "std": 0.0,
132
+ "scores": [
133
+ 1.0,
134
+ 1.0,
135
+ 1.0
136
+ ],
137
+ "params": 2986,
138
+ "hidden": 26
139
+ }
140
+ },
141
+ "Checkerboard": {
142
+ "Vanilla": {
143
+ "mean": 0.6088888843854269,
144
+ "std": 0.037548314598774495,
145
+ "scores": [
146
+ 0.6433333158493042,
147
+ 0.5566666722297668,
148
+ 0.6266666650772095
149
+ ],
150
+ "params": 2999,
151
+ "hidden": 37
152
+ },
153
+ "RichV1": {
154
+ "mean": 0.9044444362322489,
155
+ "std": 0.008748917050117025,
156
+ "scores": [
157
+ 0.9166666865348816,
158
+ 0.8966666460037231,
159
+ 0.8999999761581421
160
+ ],
161
+ "params": 3096,
162
+ "hidden": 26
163
+ },
164
+ "SinGLU": {
165
+ "mean": 0.9366666674613953,
166
+ "std": 0.015153520767335832,
167
+ "scores": [
168
+ 0.9166666865348816,
169
+ 0.9399999976158142,
170
+ 0.95333331823349
171
+ ],
172
+ "params": 3018,
173
+ "hidden": 24
174
+ },
175
+ "Hybrid": {
176
+ "mean": 0.8955555558204651,
177
+ "std": 0.013966457328736363,
178
+ "scores": [
179
+ 0.8999999761581421,
180
+ 0.8766666650772095,
181
+ 0.9100000262260437
182
+ ],
183
+ "params": 2986,
184
+ "hidden": 26
185
+ }
186
+ },
187
+ "High-Freq": {
188
+ "Vanilla": {
189
+ "mean": 1.1838690439860027,
190
+ "std": 0.028954010436214626,
191
+ "scores": [
192
+ 1.1441768407821655,
193
+ 1.2124271392822266,
194
+ 1.1950031518936157
195
+ ],
196
+ "params": 7999,
197
+ "hidden": 62
198
+ },
199
+ "RichV1": {
200
+ "mean": 1.6797597805658977,
201
+ "std": 0.18753213617719802,
202
+ "scores": [
203
+ 1.5120912790298462,
204
+ 1.9415488243103027,
205
+ 1.585639238357544
206
+ ],
207
+ "params": 7913,
208
+ "hidden": 43
209
+ },
210
+ "SinGLU": {
211
+ "mean": 1.463480571905772,
212
+ "std": 0.6593260909943727,
213
+ "scores": [
214
+ 2.395895481109619,
215
+ 1.0015356540679932,
216
+ 0.9930105805397034
217
+ ],
218
+ "params": 8214,
219
+ "hidden": 41
220
+ },
221
+ "Hybrid": {
222
+ "mean": 1.4304812749226887,
223
+ "std": 0.28564243680425555,
224
+ "scores": [
225
+ 1.0885547399520874,
226
+ 1.4151595830917358,
227
+ 1.7877295017242432
228
+ ],
229
+ "params": 7997,
230
+ "hidden": 44
231
+ }
232
+ },
233
+ "Memorization": {
234
+ "Vanilla": {
235
+ "mean": 0.023636028170585632,
236
+ "std": 0.005544727067146954,
237
+ "scores": [
238
+ 0.028810137882828712,
239
+ 0.01594628393650055,
240
+ 0.026151662692427635
241
+ ],
242
+ "params": 4926,
243
+ "hidden": 46
244
+ },
245
+ "RichV1": {
246
+ "mean": 5.6470338132695034e-08,
247
+ "std": 7.940899452098823e-08,
248
+ "scores": [
249
+ 6.393849338870439e-10,
250
+ 6.198520971674448e-13,
251
+ 1.687710096121009e-07
252
+ ],
253
+ "params": 5028,
254
+ "hidden": 32
255
+ },
256
+ "SinGLU": {
257
+ "mean": 1.4922602130836477e-08,
258
+ "std": 1.9137241711772547e-08,
259
+ "scores": [
260
+ 2.659182207764843e-09,
261
+ 1.6035532623970283e-10,
262
+ 4.194826885850489e-08
263
+ ],
264
+ "params": 5067,
265
+ "hidden": 31
266
+ },
267
+ "Hybrid": {
268
+ "mean": 3.557413054873967e-08,
269
+ "std": 4.040383113636077e-08,
270
+ "scores": [
271
+ 1.3963182610154945e-08,
272
+ 9.218822327738962e-08,
273
+ 5.709857586744249e-10
274
+ ],
275
+ "params": 4807,
276
+ "hidden": 32
277
+ }
278
+ }
279
+ },
280
+ "ood": {
281
+ "Vanilla": {
282
+ "id_mean": 0.19710678855578104,
283
+ "id_std": 0.01372835167636716,
284
+ "ood_mean": 1.340779185295105,
285
+ "ood_std": 0.07496366555450877,
286
+ "params": 5097,
287
+ "degradation": 6.80229836384182
288
+ },
289
+ "RichV1": {
290
+ "id_mean": 0.0030629046571751437,
291
+ "id_std": 0.0007370664671714653,
292
+ "ood_mean": 5.920439720153809,
293
+ "ood_std": 1.6229635781605876,
294
+ "params": 5101,
295
+ "degradation": 1932.949400264425
296
+ },
297
+ "SinGLU": {
298
+ "id_mean": 0.25248077015082043,
299
+ "id_std": 0.018526770338375,
300
+ "ood_mean": 6.372718969980876,
301
+ "ood_std": 0.8959624824918172,
302
+ "params": 5109,
303
+ "degradation": 25.24041322503138
304
+ },
305
+ "Hybrid": {
306
+ "id_mean": 0.0026089861057698727,
307
+ "id_std": 0.00042023361449633836,
308
+ "ood_mean": 4.857578992843628,
309
+ "ood_std": 1.0302407877868605,
310
+ "params": 4819,
311
+ "degradation": 1861.864646232077
312
+ }
313
+ },
314
+ "gradient_norms": {
315
+ "Vanilla": [
316
+ 0.639647362423343,
317
+ 0.3283495173433725,
318
+ 0.23426363702089711,
319
+ 0.27894396879219774,
320
+ 0.16422125111024313
321
+ ],
322
+ "RichV1": [
323
+ 15.839934002021176,
324
+ 6.613218660117891,
325
+ 3.4827730708357985,
326
+ 1.6238777871843217,
327
+ 0.17907922274133237
328
+ ],
329
+ "SinGLU": [
330
+ 19.53187113650771,
331
+ 14.939248550547868,
332
+ 5.134043674832624,
333
+ 1.3400197622788466,
334
+ 0.4012023661781001
335
+ ],
336
+ "Shared(S2)": [
337
+ 1159.4268234957435,
338
+ 884.1079387319159,
339
+ 904.1129815185649,
340
+ 714.1639373805058,
341
+ 174.37114304372977
342
+ ],
343
+ "Hybrid": [
344
+ 13.887360774140875,
345
+ 5.165627499528781,
346
+ 4.734903446652124,
347
+ 2.255145877088462,
348
+ 0.5620669780578126
349
+ ]
350
+ }
351
+ }