dancinlife commited on
Commit
ce75607
·
verified ·
1 Parent(s): a8bd371

feat(hexad): v4-py-hexad-tension-d768x12L-cycle1-2026-05-17 — result.json

Browse files
Files changed (1) hide show
  1. result.json +603 -0
result.json ADDED
@@ -0,0 +1,603 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "substrate": "PYTHON / PyTorch \u2014 interim LM-scale executor; NOT a hexa-native fire",
3
+ "fire_kind": "cycle 5 \u2014 DD155 Step+Tension hybrid LR overlay",
4
+ "honest_framing": "DD155 Law 187 hybrid LR: lr_step = (tension/EMA) \u00d7 base_cosine_lr, tension = grad_norm L2 (PROXY for hexa spine \u03a8-deviation). Formula is closed-form (B-TT-5 + B-FIRE-CYCLE5-2 sympy verified). OUTCOME = empirical (B-FIRE-CYCLE5-NOTE / B-D-NOTE family). PyTorch substrate, not hexa-native; corpus v3 carry from cycle 4.",
5
+ "arch": "ConsciousDecoderV2 (ready/models/conscious_decoder.py)",
6
+ "arch_features": "RoPE + SwiGLU + RMSNorm + GQA + PureFieldFFN + cross-attn + tied head",
7
+ "from_scratch": true,
8
+ "base_ckpt": null,
9
+ "dd155_hybrid_lr": {
10
+ "tension_ema_beta": 0.99,
11
+ "hybrid_clip_lo": 0.5,
12
+ "hybrid_clip_hi": 2.0,
13
+ "tension_proxy": "grad_norm L2 (post clip_grad_norm_)",
14
+ "law_anchor": "DD155 Law 187 Pareto optimal lr = (tension/EMA) \u00d7 base_lr",
15
+ "final_tension_ema": 0.046574,
16
+ "mult_distribution": {
17
+ "lt_0_75": 1599,
18
+ "0_75_to_1_25": 686,
19
+ "gt_1_25": 215
20
+ }
21
+ },
22
+ "config": {
23
+ "d_model": 768,
24
+ "n_head": 12,
25
+ "n_kv_head": 4,
26
+ "n_layer": 12,
27
+ "block_size": 128,
28
+ "lr": 0.0003,
29
+ "bsz": 32,
30
+ "steps": 2500,
31
+ "warmup": 125,
32
+ "seed": 1337,
33
+ "log_every": 62,
34
+ "corpus": "corpus_v3.jsonl",
35
+ "out_dir": "out_main",
36
+ "tension_ema_beta": 0.99,
37
+ "hybrid_clip_lo": 0.5,
38
+ "hybrid_clip_hi": 2.0
39
+ },
40
+ "n_params": 283722336,
41
+ "n_params_M": 283.72,
42
+ "gpu": "NVIDIA A100-SXM4-40GB",
43
+ "device": "cuda",
44
+ "init_ce": 5.640663,
45
+ "final_ce": 0.007762,
46
+ "final_gn2": 0.001495,
47
+ "final_tension": 0.038659,
48
+ "final_ppl": 1.0078,
49
+ "ce_descent": 5.632901,
50
+ "steps": 2500,
51
+ "wall_s": 321.3,
52
+ "peak_gpu_mem_gb": 9.685,
53
+ "trajectory": [
54
+ {
55
+ "step": 1,
56
+ "ce": 5.640663,
57
+ "gn2": 30.418763,
58
+ "tension": 5.515321,
59
+ "tension_ema": 5.515321,
60
+ "hybrid_mult": 1.0,
61
+ "ppl": 281.6494,
62
+ "base_lr": 2.4e-06,
63
+ "lr": 2.4e-06,
64
+ "wall_s": 0.41,
65
+ "gpu_mem_gb": 7.455
66
+ },
67
+ {
68
+ "step": 62,
69
+ "ce": 2.32522,
70
+ "gn2": 4.459097,
71
+ "tension": 2.111657,
72
+ "tension_ema": 4.051757,
73
+ "hybrid_mult": 0.5187,
74
+ "ppl": 10.2289,
75
+ "base_lr": 0.0001488,
76
+ "lr": 7.718e-05,
77
+ "wall_s": 8.24,
78
+ "gpu_mem_gb": 9.685
79
+ },
80
+ {
81
+ "step": 124,
82
+ "ce": 1.425178,
83
+ "gn2": 3.213177,
84
+ "tension": 1.792534,
85
+ "tension_ema": 3.156928,
86
+ "hybrid_mult": 0.5653,
87
+ "ppl": 4.1586,
88
+ "base_lr": 0.0002976,
89
+ "lr": 0.00016825,
90
+ "wall_s": 16.2,
91
+ "gpu_mem_gb": 9.685
92
+ },
93
+ {
94
+ "step": 186,
95
+ "ce": 0.367892,
96
+ "gn2": 2.70603,
97
+ "tension": 1.645002,
98
+ "tension_ema": 2.595913,
99
+ "hybrid_mult": 0.6314,
100
+ "ppl": 1.4447,
101
+ "base_lr": 0.00029958,
102
+ "lr": 0.00018914,
103
+ "wall_s": 24.15,
104
+ "gpu_mem_gb": 9.685
105
+ },
106
+ {
107
+ "step": 248,
108
+ "ce": 0.092343,
109
+ "gn2": 0.343442,
110
+ "tension": 0.586039,
111
+ "tension_ema": 1.84945,
112
+ "hybrid_mult": 0.5,
113
+ "ppl": 1.0967,
114
+ "base_lr": 0.00029825,
115
+ "lr": 0.00014912,
116
+ "wall_s": 32.11,
117
+ "gpu_mem_gb": 9.685
118
+ },
119
+ {
120
+ "step": 310,
121
+ "ce": 0.051104,
122
+ "gn2": 0.155324,
123
+ "tension": 0.394111,
124
+ "tension_ema": 1.720276,
125
+ "hybrid_mult": 0.5,
126
+ "ppl": 1.0524,
127
+ "base_lr": 0.00029602,
128
+ "lr": 0.00014801,
129
+ "wall_s": 40.07,
130
+ "gpu_mem_gb": 9.685
131
+ },
132
+ {
133
+ "step": 372,
134
+ "ce": 0.032159,
135
+ "gn2": 0.096184,
136
+ "tension": 0.310136,
137
+ "tension_ema": 1.107521,
138
+ "hybrid_mult": 0.5,
139
+ "ppl": 1.0327,
140
+ "base_lr": 0.00029292,
141
+ "lr": 0.00014646,
142
+ "wall_s": 48.03,
143
+ "gpu_mem_gb": 9.685
144
+ },
145
+ {
146
+ "step": 434,
147
+ "ce": 0.073192,
148
+ "gn2": 0.189307,
149
+ "tension": 0.435094,
150
+ "tension_ema": 1.509156,
151
+ "hybrid_mult": 0.5,
152
+ "ppl": 1.0759,
153
+ "base_lr": 0.00028895,
154
+ "lr": 0.00014448,
155
+ "wall_s": 55.99,
156
+ "gpu_mem_gb": 9.685
157
+ },
158
+ {
159
+ "step": 496,
160
+ "ce": 0.025241,
161
+ "gn2": 0.03459,
162
+ "tension": 0.185985,
163
+ "tension_ema": 0.9298,
164
+ "hybrid_mult": 0.5,
165
+ "ppl": 1.0256,
166
+ "base_lr": 0.00028415,
167
+ "lr": 0.00014208,
168
+ "wall_s": 64.0,
169
+ "gpu_mem_gb": 9.685
170
+ },
171
+ {
172
+ "step": 558,
173
+ "ce": 0.026985,
174
+ "gn2": 0.053764,
175
+ "tension": 0.23187,
176
+ "tension_ema": 0.637487,
177
+ "hybrid_mult": 0.5,
178
+ "ppl": 1.0274,
179
+ "base_lr": 0.00027855,
180
+ "lr": 0.00013928,
181
+ "wall_s": 71.95,
182
+ "gpu_mem_gb": 9.685
183
+ },
184
+ {
185
+ "step": 620,
186
+ "ce": 0.044962,
187
+ "gn2": 0.082591,
188
+ "tension": 0.287387,
189
+ "tension_ema": 0.93649,
190
+ "hybrid_mult": 0.5,
191
+ "ppl": 1.046,
192
+ "base_lr": 0.00027219,
193
+ "lr": 0.00013609,
194
+ "wall_s": 79.91,
195
+ "gpu_mem_gb": 9.685
196
+ },
197
+ {
198
+ "step": 682,
199
+ "ce": 0.020919,
200
+ "gn2": 0.055819,
201
+ "tension": 0.236261,
202
+ "tension_ema": 0.587089,
203
+ "hybrid_mult": 0.5,
204
+ "ppl": 1.0211,
205
+ "base_lr": 0.00026511,
206
+ "lr": 0.00013255,
207
+ "wall_s": 87.87,
208
+ "gpu_mem_gb": 9.685
209
+ },
210
+ {
211
+ "step": 744,
212
+ "ce": 0.019956,
213
+ "gn2": 0.032276,
214
+ "tension": 0.179656,
215
+ "tension_ema": 0.40721,
216
+ "hybrid_mult": 0.5,
217
+ "ppl": 1.0202,
218
+ "base_lr": 0.00025735,
219
+ "lr": 0.00012867,
220
+ "wall_s": 95.83,
221
+ "gpu_mem_gb": 9.685
222
+ },
223
+ {
224
+ "step": 806,
225
+ "ce": 0.023102,
226
+ "gn2": 0.010866,
227
+ "tension": 0.10424,
228
+ "tension_ema": 0.508826,
229
+ "hybrid_mult": 0.5,
230
+ "ppl": 1.0234,
231
+ "base_lr": 0.00024897,
232
+ "lr": 0.00012449,
233
+ "wall_s": 103.79,
234
+ "gpu_mem_gb": 9.685
235
+ },
236
+ {
237
+ "step": 868,
238
+ "ce": 0.019404,
239
+ "gn2": 0.214611,
240
+ "tension": 0.463261,
241
+ "tension_ema": 0.333574,
242
+ "hybrid_mult": 1.3943,
243
+ "ppl": 1.0196,
244
+ "base_lr": 0.00024003,
245
+ "lr": 0.00033466,
246
+ "wall_s": 111.75,
247
+ "gpu_mem_gb": 9.685
248
+ },
249
+ {
250
+ "step": 930,
251
+ "ce": 0.01847,
252
+ "gn2": 0.006393,
253
+ "tension": 0.079957,
254
+ "tension_ema": 0.384131,
255
+ "hybrid_mult": 0.5,
256
+ "ppl": 1.0186,
257
+ "base_lr": 0.00023058,
258
+ "lr": 0.00011529,
259
+ "wall_s": 119.7,
260
+ "gpu_mem_gb": 9.685
261
+ },
262
+ {
263
+ "step": 992,
264
+ "ce": 0.019321,
265
+ "gn2": 0.016822,
266
+ "tension": 0.1297,
267
+ "tension_ema": 0.261188,
268
+ "hybrid_mult": 0.5,
269
+ "ppl": 1.0195,
270
+ "base_lr": 0.0002207,
271
+ "lr": 0.00011035,
272
+ "wall_s": 127.66,
273
+ "gpu_mem_gb": 9.685
274
+ },
275
+ {
276
+ "step": 1054,
277
+ "ce": 0.133722,
278
+ "gn2": 2.679645,
279
+ "tension": 1.636962,
280
+ "tension_ema": 0.254753,
281
+ "hybrid_mult": 2.0,
282
+ "ppl": 1.1431,
283
+ "base_lr": 0.00021044,
284
+ "lr": 0.00042087,
285
+ "wall_s": 135.62,
286
+ "gpu_mem_gb": 9.685
287
+ },
288
+ {
289
+ "step": 1116,
290
+ "ce": 0.016094,
291
+ "gn2": 0.019727,
292
+ "tension": 0.140452,
293
+ "tension_ema": 0.278774,
294
+ "hybrid_mult": 0.5013,
295
+ "ppl": 1.0162,
296
+ "base_lr": 0.00019987,
297
+ "lr": 0.0001002,
298
+ "wall_s": 143.58,
299
+ "gpu_mem_gb": 9.685
300
+ },
301
+ {
302
+ "step": 1178,
303
+ "ce": 0.014073,
304
+ "gn2": 0.005173,
305
+ "tension": 0.071925,
306
+ "tension_ema": 0.187478,
307
+ "hybrid_mult": 0.5,
308
+ "ppl": 1.0142,
309
+ "base_lr": 0.00018907,
310
+ "lr": 9.453e-05,
311
+ "wall_s": 151.54,
312
+ "gpu_mem_gb": 9.685
313
+ },
314
+ {
315
+ "step": 1240,
316
+ "ce": 0.02907,
317
+ "gn2": 0.039467,
318
+ "tension": 0.198663,
319
+ "tension_ema": 0.157182,
320
+ "hybrid_mult": 1.2673,
321
+ "ppl": 1.0295,
322
+ "base_lr": 0.0001781,
323
+ "lr": 0.00022571,
324
+ "wall_s": 159.49,
325
+ "gpu_mem_gb": 9.685
326
+ },
327
+ {
328
+ "step": 1302,
329
+ "ce": 0.014461,
330
+ "gn2": 0.008681,
331
+ "tension": 0.093172,
332
+ "tension_ema": 0.224678,
333
+ "hybrid_mult": 0.5,
334
+ "ppl": 1.0146,
335
+ "base_lr": 0.00016705,
336
+ "lr": 8.353e-05,
337
+ "wall_s": 167.44,
338
+ "gpu_mem_gb": 9.685
339
+ },
340
+ {
341
+ "step": 1364,
342
+ "ce": 0.015863,
343
+ "gn2": 0.007843,
344
+ "tension": 0.088562,
345
+ "tension_ema": 0.176058,
346
+ "hybrid_mult": 0.5005,
347
+ "ppl": 1.016,
348
+ "base_lr": 0.00015599,
349
+ "lr": 7.807e-05,
350
+ "wall_s": 175.4,
351
+ "gpu_mem_gb": 9.685
352
+ },
353
+ {
354
+ "step": 1426,
355
+ "ce": 0.016053,
356
+ "gn2": 0.272274,
357
+ "tension": 0.521799,
358
+ "tension_ema": 0.287108,
359
+ "hybrid_mult": 1.8326,
360
+ "ppl": 1.0162,
361
+ "base_lr": 0.00014498,
362
+ "lr": 0.00026569,
363
+ "wall_s": 183.36,
364
+ "gpu_mem_gb": 9.685
365
+ },
366
+ {
367
+ "step": 1488,
368
+ "ce": 0.011513,
369
+ "gn2": 0.003074,
370
+ "tension": 0.055445,
371
+ "tension_ema": 0.1871,
372
+ "hybrid_mult": 0.5,
373
+ "ppl": 1.0116,
374
+ "base_lr": 0.00013411,
375
+ "lr": 6.706e-05,
376
+ "wall_s": 191.38,
377
+ "gpu_mem_gb": 9.685
378
+ },
379
+ {
380
+ "step": 1550,
381
+ "ce": 0.014431,
382
+ "gn2": 0.007639,
383
+ "tension": 0.087404,
384
+ "tension_ema": 0.131829,
385
+ "hybrid_mult": 0.6608,
386
+ "ppl": 1.0145,
387
+ "base_lr": 0.00012345,
388
+ "lr": 8.157e-05,
389
+ "wall_s": 199.34,
390
+ "gpu_mem_gb": 9.685
391
+ },
392
+ {
393
+ "step": 1612,
394
+ "ce": 0.015038,
395
+ "gn2": 0.069711,
396
+ "tension": 0.264028,
397
+ "tension_ema": 0.11471,
398
+ "hybrid_mult": 2.0,
399
+ "ppl": 1.0152,
400
+ "base_lr": 0.00011307,
401
+ "lr": 0.00022614,
402
+ "wall_s": 207.31,
403
+ "gpu_mem_gb": 9.685
404
+ },
405
+ {
406
+ "step": 1674,
407
+ "ce": 0.011891,
408
+ "gn2": 0.003348,
409
+ "tension": 0.057862,
410
+ "tension_ema": 0.101351,
411
+ "hybrid_mult": 0.5684,
412
+ "ppl": 1.012,
413
+ "base_lr": 0.00010304,
414
+ "lr": 5.857e-05,
415
+ "wall_s": 215.26,
416
+ "gpu_mem_gb": 9.685
417
+ },
418
+ {
419
+ "step": 1736,
420
+ "ce": 0.010649,
421
+ "gn2": 0.00499,
422
+ "tension": 0.070638,
423
+ "tension_ema": 0.083665,
424
+ "hybrid_mult": 0.843,
425
+ "ppl": 1.0107,
426
+ "base_lr": 9.342e-05,
427
+ "lr": 7.875e-05,
428
+ "wall_s": 223.22,
429
+ "gpu_mem_gb": 9.685
430
+ },
431
+ {
432
+ "step": 1798,
433
+ "ce": 0.006095,
434
+ "gn2": 0.001609,
435
+ "tension": 0.040115,
436
+ "tension_ema": 0.072206,
437
+ "hybrid_mult": 0.5531,
438
+ "ppl": 1.0061,
439
+ "base_lr": 8.428e-05,
440
+ "lr": 4.662e-05,
441
+ "wall_s": 231.17,
442
+ "gpu_mem_gb": 9.685
443
+ },
444
+ {
445
+ "step": 1860,
446
+ "ce": 0.011065,
447
+ "gn2": 0.006058,
448
+ "tension": 0.07783,
449
+ "tension_ema": 0.077525,
450
+ "hybrid_mult": 1.004,
451
+ "ppl": 1.0111,
452
+ "base_lr": 7.569e-05,
453
+ "lr": 7.599e-05,
454
+ "wall_s": 239.13,
455
+ "gpu_mem_gb": 9.685
456
+ },
457
+ {
458
+ "step": 1922,
459
+ "ce": 0.009454,
460
+ "gn2": 0.007259,
461
+ "tension": 0.0852,
462
+ "tension_ema": 0.07025,
463
+ "hybrid_mult": 1.2154,
464
+ "ppl": 1.0095,
465
+ "base_lr": 6.77e-05,
466
+ "lr": 8.228e-05,
467
+ "wall_s": 247.09,
468
+ "gpu_mem_gb": 9.685
469
+ },
470
+ {
471
+ "step": 1984,
472
+ "ce": 0.008228,
473
+ "gn2": 0.001878,
474
+ "tension": 0.04334,
475
+ "tension_ema": 0.063311,
476
+ "hybrid_mult": 0.6824,
477
+ "ppl": 1.0083,
478
+ "base_lr": 6.036e-05,
479
+ "lr": 4.119e-05,
480
+ "wall_s": 255.05,
481
+ "gpu_mem_gb": 9.685
482
+ },
483
+ {
484
+ "step": 2046,
485
+ "ce": 0.008526,
486
+ "gn2": 0.001148,
487
+ "tension": 0.033876,
488
+ "tension_ema": 0.057639,
489
+ "hybrid_mult": 0.5853,
490
+ "ppl": 1.0086,
491
+ "base_lr": 5.372e-05,
492
+ "lr": 3.144e-05,
493
+ "wall_s": 263.01,
494
+ "gpu_mem_gb": 9.685
495
+ },
496
+ {
497
+ "step": 2108,
498
+ "ce": 0.010443,
499
+ "gn2": 0.00357,
500
+ "tension": 0.059749,
501
+ "tension_ema": 0.054624,
502
+ "hybrid_mult": 1.0949,
503
+ "ppl": 1.0105,
504
+ "base_lr": 4.783e-05,
505
+ "lr": 5.237e-05,
506
+ "wall_s": 270.96,
507
+ "gpu_mem_gb": 9.685
508
+ },
509
+ {
510
+ "step": 2170,
511
+ "ce": 0.009984,
512
+ "gn2": 0.004114,
513
+ "tension": 0.064138,
514
+ "tension_ema": 0.052662,
515
+ "hybrid_mult": 1.2206,
516
+ "ppl": 1.01,
517
+ "base_lr": 4.273e-05,
518
+ "lr": 5.216e-05,
519
+ "wall_s": 278.92,
520
+ "gpu_mem_gb": 9.685
521
+ },
522
+ {
523
+ "step": 2232,
524
+ "ce": 0.008678,
525
+ "gn2": 0.001758,
526
+ "tension": 0.041926,
527
+ "tension_ema": 0.049852,
528
+ "hybrid_mult": 0.8397,
529
+ "ppl": 1.0087,
530
+ "base_lr": 3.846e-05,
531
+ "lr": 3.229e-05,
532
+ "wall_s": 286.84,
533
+ "gpu_mem_gb": 9.685
534
+ },
535
+ {
536
+ "step": 2294,
537
+ "ce": 0.008668,
538
+ "gn2": 0.001581,
539
+ "tension": 0.039767,
540
+ "tension_ema": 0.048133,
541
+ "hybrid_mult": 0.8247,
542
+ "ppl": 1.0087,
543
+ "base_lr": 3.503e-05,
544
+ "lr": 2.889e-05,
545
+ "wall_s": 294.8,
546
+ "gpu_mem_gb": 9.685
547
+ },
548
+ {
549
+ "step": 2356,
550
+ "ce": 0.008016,
551
+ "gn2": 0.00296,
552
+ "tension": 0.054409,
553
+ "tension_ema": 0.047827,
554
+ "hybrid_mult": 1.1392,
555
+ "ppl": 1.008,
556
+ "base_lr": 3.248e-05,
557
+ "lr": 3.7e-05,
558
+ "wall_s": 302.76,
559
+ "gpu_mem_gb": 9.685
560
+ },
561
+ {
562
+ "step": 2418,
563
+ "ce": 0.012541,
564
+ "gn2": 0.002424,
565
+ "tension": 0.049238,
566
+ "tension_ema": 0.047811,
567
+ "hybrid_mult": 1.0302,
568
+ "ppl": 1.0126,
569
+ "base_lr": 3.081e-05,
570
+ "lr": 3.174e-05,
571
+ "wall_s": 310.72,
572
+ "gpu_mem_gb": 9.685
573
+ },
574
+ {
575
+ "step": 2480,
576
+ "ce": 0.009382,
577
+ "gn2": 0.002626,
578
+ "tension": 0.051242,
579
+ "tension_ema": 0.04711,
580
+ "hybrid_mult": 1.0887,
581
+ "ppl": 1.0094,
582
+ "base_lr": 3.005e-05,
583
+ "lr": 3.272e-05,
584
+ "wall_s": 318.73,
585
+ "gpu_mem_gb": 9.685
586
+ },
587
+ {
588
+ "step": 2500,
589
+ "ce": 0.007762,
590
+ "gn2": 0.001495,
591
+ "tension": 0.038659,
592
+ "tension_ema": 0.046574,
593
+ "hybrid_mult": 0.8286,
594
+ "ppl": 1.0078,
595
+ "base_lr": 3e-05,
596
+ "lr": 2.486e-05,
597
+ "wall_s": 321.3,
598
+ "gpu_mem_gb": 9.685
599
+ }
600
+ ],
601
+ "corpus": "corpus_v3.jsonl",
602
+ "corpus_bytes": 6223023
603
+ }