| { |
| "substrate": "PYTHON / PyTorch \u2014 interim LM-scale executor; NOT a hexa-native fire", |
| "fire_kind": "cycle 5 \u2014 DD155 Step+Tension hybrid LR overlay", |
| "honest_framing": "DD155 Law 187 hybrid LR: lr_step = (tension/EMA) \u00d7 base_cosine_lr, tension = grad_norm L2 (PROXY for hexa spine \u03a8-deviation). Formula is closed-form (B-TT-5 + B-FIRE-CYCLE5-2 sympy verified). OUTCOME = empirical (B-FIRE-CYCLE5-NOTE / B-D-NOTE family). PyTorch substrate, not hexa-native; corpus v3 carry from cycle 4.", |
| "arch": "ConsciousDecoderV2 (ready/models/conscious_decoder.py)", |
| "arch_features": "RoPE + SwiGLU + RMSNorm + GQA + PureFieldFFN + cross-attn + tied head", |
| "from_scratch": true, |
| "base_ckpt": null, |
| "dd155_hybrid_lr": { |
| "tension_ema_beta": 0.99, |
| "hybrid_clip_lo": 0.5, |
| "hybrid_clip_hi": 2.0, |
| "tension_proxy": "grad_norm L2 (post clip_grad_norm_)", |
| "law_anchor": "DD155 Law 187 Pareto optimal lr = (tension/EMA) \u00d7 base_lr", |
| "final_tension_ema": 0.046574, |
| "mult_distribution": { |
| "lt_0_75": 1599, |
| "0_75_to_1_25": 686, |
| "gt_1_25": 215 |
| } |
| }, |
| "config": { |
| "d_model": 768, |
| "n_head": 12, |
| "n_kv_head": 4, |
| "n_layer": 12, |
| "block_size": 128, |
| "lr": 0.0003, |
| "bsz": 32, |
| "steps": 2500, |
| "warmup": 125, |
| "seed": 1337, |
| "log_every": 62, |
| "corpus": "corpus_v3.jsonl", |
| "out_dir": "out_main", |
| "tension_ema_beta": 0.99, |
| "hybrid_clip_lo": 0.5, |
| "hybrid_clip_hi": 2.0 |
| }, |
| "n_params": 283722336, |
| "n_params_M": 283.72, |
| "gpu": "NVIDIA A100-SXM4-40GB", |
| "device": "cuda", |
| "init_ce": 5.640663, |
| "final_ce": 0.007762, |
| "final_gn2": 0.001495, |
| "final_tension": 0.038659, |
| "final_ppl": 1.0078, |
| "ce_descent": 5.632901, |
| "steps": 2500, |
| "wall_s": 321.3, |
| "peak_gpu_mem_gb": 9.685, |
| "trajectory": [ |
| { |
| "step": 1, |
| "ce": 5.640663, |
| "gn2": 30.418763, |
| "tension": 5.515321, |
| "tension_ema": 5.515321, |
| "hybrid_mult": 1.0, |
| "ppl": 281.6494, |
| "base_lr": 2.4e-06, |
| "lr": 2.4e-06, |
| "wall_s": 0.41, |
| "gpu_mem_gb": 7.455 |
| }, |
| { |
| "step": 62, |
| "ce": 2.32522, |
| "gn2": 4.459097, |
| "tension": 2.111657, |
| "tension_ema": 4.051757, |
| "hybrid_mult": 0.5187, |
| "ppl": 10.2289, |
| "base_lr": 0.0001488, |
| "lr": 7.718e-05, |
| "wall_s": 8.24, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 124, |
| "ce": 1.425178, |
| "gn2": 3.213177, |
| "tension": 1.792534, |
| "tension_ema": 3.156928, |
| "hybrid_mult": 0.5653, |
| "ppl": 4.1586, |
| "base_lr": 0.0002976, |
| "lr": 0.00016825, |
| "wall_s": 16.2, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 186, |
| "ce": 0.367892, |
| "gn2": 2.70603, |
| "tension": 1.645002, |
| "tension_ema": 2.595913, |
| "hybrid_mult": 0.6314, |
| "ppl": 1.4447, |
| "base_lr": 0.00029958, |
| "lr": 0.00018914, |
| "wall_s": 24.15, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 248, |
| "ce": 0.092343, |
| "gn2": 0.343442, |
| "tension": 0.586039, |
| "tension_ema": 1.84945, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0967, |
| "base_lr": 0.00029825, |
| "lr": 0.00014912, |
| "wall_s": 32.11, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 310, |
| "ce": 0.051104, |
| "gn2": 0.155324, |
| "tension": 0.394111, |
| "tension_ema": 1.720276, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0524, |
| "base_lr": 0.00029602, |
| "lr": 0.00014801, |
| "wall_s": 40.07, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 372, |
| "ce": 0.032159, |
| "gn2": 0.096184, |
| "tension": 0.310136, |
| "tension_ema": 1.107521, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0327, |
| "base_lr": 0.00029292, |
| "lr": 0.00014646, |
| "wall_s": 48.03, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 434, |
| "ce": 0.073192, |
| "gn2": 0.189307, |
| "tension": 0.435094, |
| "tension_ema": 1.509156, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0759, |
| "base_lr": 0.00028895, |
| "lr": 0.00014448, |
| "wall_s": 55.99, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 496, |
| "ce": 0.025241, |
| "gn2": 0.03459, |
| "tension": 0.185985, |
| "tension_ema": 0.9298, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0256, |
| "base_lr": 0.00028415, |
| "lr": 0.00014208, |
| "wall_s": 64.0, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 558, |
| "ce": 0.026985, |
| "gn2": 0.053764, |
| "tension": 0.23187, |
| "tension_ema": 0.637487, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0274, |
| "base_lr": 0.00027855, |
| "lr": 0.00013928, |
| "wall_s": 71.95, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 620, |
| "ce": 0.044962, |
| "gn2": 0.082591, |
| "tension": 0.287387, |
| "tension_ema": 0.93649, |
| "hybrid_mult": 0.5, |
| "ppl": 1.046, |
| "base_lr": 0.00027219, |
| "lr": 0.00013609, |
| "wall_s": 79.91, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 682, |
| "ce": 0.020919, |
| "gn2": 0.055819, |
| "tension": 0.236261, |
| "tension_ema": 0.587089, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0211, |
| "base_lr": 0.00026511, |
| "lr": 0.00013255, |
| "wall_s": 87.87, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 744, |
| "ce": 0.019956, |
| "gn2": 0.032276, |
| "tension": 0.179656, |
| "tension_ema": 0.40721, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0202, |
| "base_lr": 0.00025735, |
| "lr": 0.00012867, |
| "wall_s": 95.83, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 806, |
| "ce": 0.023102, |
| "gn2": 0.010866, |
| "tension": 0.10424, |
| "tension_ema": 0.508826, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0234, |
| "base_lr": 0.00024897, |
| "lr": 0.00012449, |
| "wall_s": 103.79, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 868, |
| "ce": 0.019404, |
| "gn2": 0.214611, |
| "tension": 0.463261, |
| "tension_ema": 0.333574, |
| "hybrid_mult": 1.3943, |
| "ppl": 1.0196, |
| "base_lr": 0.00024003, |
| "lr": 0.00033466, |
| "wall_s": 111.75, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 930, |
| "ce": 0.01847, |
| "gn2": 0.006393, |
| "tension": 0.079957, |
| "tension_ema": 0.384131, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0186, |
| "base_lr": 0.00023058, |
| "lr": 0.00011529, |
| "wall_s": 119.7, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 992, |
| "ce": 0.019321, |
| "gn2": 0.016822, |
| "tension": 0.1297, |
| "tension_ema": 0.261188, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0195, |
| "base_lr": 0.0002207, |
| "lr": 0.00011035, |
| "wall_s": 127.66, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1054, |
| "ce": 0.133722, |
| "gn2": 2.679645, |
| "tension": 1.636962, |
| "tension_ema": 0.254753, |
| "hybrid_mult": 2.0, |
| "ppl": 1.1431, |
| "base_lr": 0.00021044, |
| "lr": 0.00042087, |
| "wall_s": 135.62, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1116, |
| "ce": 0.016094, |
| "gn2": 0.019727, |
| "tension": 0.140452, |
| "tension_ema": 0.278774, |
| "hybrid_mult": 0.5013, |
| "ppl": 1.0162, |
| "base_lr": 0.00019987, |
| "lr": 0.0001002, |
| "wall_s": 143.58, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1178, |
| "ce": 0.014073, |
| "gn2": 0.005173, |
| "tension": 0.071925, |
| "tension_ema": 0.187478, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0142, |
| "base_lr": 0.00018907, |
| "lr": 9.453e-05, |
| "wall_s": 151.54, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1240, |
| "ce": 0.02907, |
| "gn2": 0.039467, |
| "tension": 0.198663, |
| "tension_ema": 0.157182, |
| "hybrid_mult": 1.2673, |
| "ppl": 1.0295, |
| "base_lr": 0.0001781, |
| "lr": 0.00022571, |
| "wall_s": 159.49, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1302, |
| "ce": 0.014461, |
| "gn2": 0.008681, |
| "tension": 0.093172, |
| "tension_ema": 0.224678, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0146, |
| "base_lr": 0.00016705, |
| "lr": 8.353e-05, |
| "wall_s": 167.44, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1364, |
| "ce": 0.015863, |
| "gn2": 0.007843, |
| "tension": 0.088562, |
| "tension_ema": 0.176058, |
| "hybrid_mult": 0.5005, |
| "ppl": 1.016, |
| "base_lr": 0.00015599, |
| "lr": 7.807e-05, |
| "wall_s": 175.4, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1426, |
| "ce": 0.016053, |
| "gn2": 0.272274, |
| "tension": 0.521799, |
| "tension_ema": 0.287108, |
| "hybrid_mult": 1.8326, |
| "ppl": 1.0162, |
| "base_lr": 0.00014498, |
| "lr": 0.00026569, |
| "wall_s": 183.36, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1488, |
| "ce": 0.011513, |
| "gn2": 0.003074, |
| "tension": 0.055445, |
| "tension_ema": 0.1871, |
| "hybrid_mult": 0.5, |
| "ppl": 1.0116, |
| "base_lr": 0.00013411, |
| "lr": 6.706e-05, |
| "wall_s": 191.38, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1550, |
| "ce": 0.014431, |
| "gn2": 0.007639, |
| "tension": 0.087404, |
| "tension_ema": 0.131829, |
| "hybrid_mult": 0.6608, |
| "ppl": 1.0145, |
| "base_lr": 0.00012345, |
| "lr": 8.157e-05, |
| "wall_s": 199.34, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1612, |
| "ce": 0.015038, |
| "gn2": 0.069711, |
| "tension": 0.264028, |
| "tension_ema": 0.11471, |
| "hybrid_mult": 2.0, |
| "ppl": 1.0152, |
| "base_lr": 0.00011307, |
| "lr": 0.00022614, |
| "wall_s": 207.31, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1674, |
| "ce": 0.011891, |
| "gn2": 0.003348, |
| "tension": 0.057862, |
| "tension_ema": 0.101351, |
| "hybrid_mult": 0.5684, |
| "ppl": 1.012, |
| "base_lr": 0.00010304, |
| "lr": 5.857e-05, |
| "wall_s": 215.26, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1736, |
| "ce": 0.010649, |
| "gn2": 0.00499, |
| "tension": 0.070638, |
| "tension_ema": 0.083665, |
| "hybrid_mult": 0.843, |
| "ppl": 1.0107, |
| "base_lr": 9.342e-05, |
| "lr": 7.875e-05, |
| "wall_s": 223.22, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1798, |
| "ce": 0.006095, |
| "gn2": 0.001609, |
| "tension": 0.040115, |
| "tension_ema": 0.072206, |
| "hybrid_mult": 0.5531, |
| "ppl": 1.0061, |
| "base_lr": 8.428e-05, |
| "lr": 4.662e-05, |
| "wall_s": 231.17, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1860, |
| "ce": 0.011065, |
| "gn2": 0.006058, |
| "tension": 0.07783, |
| "tension_ema": 0.077525, |
| "hybrid_mult": 1.004, |
| "ppl": 1.0111, |
| "base_lr": 7.569e-05, |
| "lr": 7.599e-05, |
| "wall_s": 239.13, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1922, |
| "ce": 0.009454, |
| "gn2": 0.007259, |
| "tension": 0.0852, |
| "tension_ema": 0.07025, |
| "hybrid_mult": 1.2154, |
| "ppl": 1.0095, |
| "base_lr": 6.77e-05, |
| "lr": 8.228e-05, |
| "wall_s": 247.09, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 1984, |
| "ce": 0.008228, |
| "gn2": 0.001878, |
| "tension": 0.04334, |
| "tension_ema": 0.063311, |
| "hybrid_mult": 0.6824, |
| "ppl": 1.0083, |
| "base_lr": 6.036e-05, |
| "lr": 4.119e-05, |
| "wall_s": 255.05, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2046, |
| "ce": 0.008526, |
| "gn2": 0.001148, |
| "tension": 0.033876, |
| "tension_ema": 0.057639, |
| "hybrid_mult": 0.5853, |
| "ppl": 1.0086, |
| "base_lr": 5.372e-05, |
| "lr": 3.144e-05, |
| "wall_s": 263.01, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2108, |
| "ce": 0.010443, |
| "gn2": 0.00357, |
| "tension": 0.059749, |
| "tension_ema": 0.054624, |
| "hybrid_mult": 1.0949, |
| "ppl": 1.0105, |
| "base_lr": 4.783e-05, |
| "lr": 5.237e-05, |
| "wall_s": 270.96, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2170, |
| "ce": 0.009984, |
| "gn2": 0.004114, |
| "tension": 0.064138, |
| "tension_ema": 0.052662, |
| "hybrid_mult": 1.2206, |
| "ppl": 1.01, |
| "base_lr": 4.273e-05, |
| "lr": 5.216e-05, |
| "wall_s": 278.92, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2232, |
| "ce": 0.008678, |
| "gn2": 0.001758, |
| "tension": 0.041926, |
| "tension_ema": 0.049852, |
| "hybrid_mult": 0.8397, |
| "ppl": 1.0087, |
| "base_lr": 3.846e-05, |
| "lr": 3.229e-05, |
| "wall_s": 286.84, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2294, |
| "ce": 0.008668, |
| "gn2": 0.001581, |
| "tension": 0.039767, |
| "tension_ema": 0.048133, |
| "hybrid_mult": 0.8247, |
| "ppl": 1.0087, |
| "base_lr": 3.503e-05, |
| "lr": 2.889e-05, |
| "wall_s": 294.8, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2356, |
| "ce": 0.008016, |
| "gn2": 0.00296, |
| "tension": 0.054409, |
| "tension_ema": 0.047827, |
| "hybrid_mult": 1.1392, |
| "ppl": 1.008, |
| "base_lr": 3.248e-05, |
| "lr": 3.7e-05, |
| "wall_s": 302.76, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2418, |
| "ce": 0.012541, |
| "gn2": 0.002424, |
| "tension": 0.049238, |
| "tension_ema": 0.047811, |
| "hybrid_mult": 1.0302, |
| "ppl": 1.0126, |
| "base_lr": 3.081e-05, |
| "lr": 3.174e-05, |
| "wall_s": 310.72, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2480, |
| "ce": 0.009382, |
| "gn2": 0.002626, |
| "tension": 0.051242, |
| "tension_ema": 0.04711, |
| "hybrid_mult": 1.0887, |
| "ppl": 1.0094, |
| "base_lr": 3.005e-05, |
| "lr": 3.272e-05, |
| "wall_s": 318.73, |
| "gpu_mem_gb": 9.685 |
| }, |
| { |
| "step": 2500, |
| "ce": 0.007762, |
| "gn2": 0.001495, |
| "tension": 0.038659, |
| "tension_ema": 0.046574, |
| "hybrid_mult": 0.8286, |
| "ppl": 1.0078, |
| "base_lr": 3e-05, |
| "lr": 2.486e-05, |
| "wall_s": 321.3, |
| "gpu_mem_gb": 9.685 |
| } |
| ], |
| "corpus": "corpus_v3.jsonl", |
| "corpus_bytes": 6223023 |
| } |