{ "summary": { "max_step": 17336, "loss_initial": 2.7233519554138184, "loss_final": 2.3416449626286826, "loss_min": 2.2302972316741942, "loss_min_step": 17260, "ppl_final": 10.398327361926201, "avg_tokens_per_sec": 1960335.6302571176, "rankme_initial": 414.6168212890625, "rankme_min": 414.6168212890625, "rankme_min_step": 0, "rankme_final": 431.12603759765625, "rankme_rebound_ratio": 1.0398180089685358, "ww_alpha_initial": 7.489332379498283, "ww_alpha_final": 7.571896228633995, "ww_healthy_frac_final": 0.15736040609137056, "twonn_id_final": 6.818349838256836, "tokens_B": 36.358324224 }, "dynamics": { "max_step": 17336, "landmarks": { "pct_10": 1733, "pct_25": 4334, "pct_50": 8668, "pct_75": 13002, "pct_90": 15602 }, "metrics": { "train/loss": { "n_points": 1735, "slopes_at_landmarks": { "pct_10": { "w100": -4.28877743807706e-05, "w500": -1.9821559681611903e-05, "w1000": -2.0593762133095024e-05, "w5000": -3.688523080177231e-05 }, "pct_25": { "w100": 0.0001107275558240486, "w500": -3.8268886694387226e-05, "w1000": -1.0966062831907276e-05, "w5000": -9.462775826338291e-06 }, "pct_50": { "w100": 0.00019825918024236506, "w500": -3.840362209947456e-06, "w1000": -1.657888490875455e-05, "w5000": -4.913578465691426e-06 }, "pct_75": { "w100": -2.04078500921076e-05, "w500": -6.083978467485627e-06, "w1000": -4.660763942261841e-06, "w5000": -1.4281992576798431e-06 }, "pct_90": { "w100": -0.0003515260002886256, "w500": -3.884735576817397e-05, "w1000": -1.6693517605964394e-05, "w5000": -1.4644461233839928e-05 } }, "inflection_points": [ { "step": 1760, "direction": "positive\u2192negative" }, { "step": 1890, "direction": "negative\u2192positive" }, { "step": 2210, "direction": "positive\u2192negative" }, { "step": 2380, "direction": "negative\u2192positive" }, { "step": 2590, "direction": "positive\u2192negative" }, { "step": 2970, "direction": "negative\u2192positive" }, { "step": 3260, "direction": "positive\u2192negative" }, { "step": 3400, "direction": "negative\u2192positive" }, { "step": 3620, "direction": "positive\u2192negative" }, { "step": 4000, "direction": "negative\u2192positive" }, { "step": 4290, "direction": "positive\u2192negative" }, { "step": 4500, "direction": "negative\u2192positive" }, { "step": 5300, "direction": "positive\u2192negative" }, { "step": 5880, "direction": "negative\u2192positive" }, { "step": 6120, "direction": "positive\u2192negative" }, { "step": 6280, "direction": "negative\u2192positive" }, { "step": 6570, "direction": "positive\u2192negative" }, { "step": 6920, "direction": "negative\u2192positive" }, { "step": 7250, "direction": "positive\u2192negative" }, { "step": 7320, "direction": "negative\u2192positive" }, { "step": 7810, "direction": "positive\u2192negative" }, { "step": 8050, "direction": "negative\u2192positive" }, { "step": 8190, "direction": "positive\u2192negative" }, { "step": 8600, "direction": "negative\u2192positive" }, { "step": 9000, "direction": "positive\u2192negative" }, { "step": 9070, "direction": "negative\u2192positive" }, { "step": 9310, "direction": "positive\u2192negative" }, { "step": 9670, "direction": "negative\u2192positive" }, { "step": 10040, "direction": "positive\u2192negative" }, { "step": 10730, "direction": "negative\u2192positive" }, { "step": 11280, "direction": "positive\u2192negative" }, { "step": 11960, "direction": "negative\u2192positive" }, { "step": 12240, "direction": "positive\u2192negative" }, { "step": 12700, "direction": "negative\u2192positive" }, { "step": 13530, "direction": "positive\u2192negative" }, { "step": 14200, "direction": "negative\u2192positive" }, { "step": 14540, "direction": "positive\u2192negative" }, { "step": 14870, "direction": "negative\u2192positive" }, { "step": 14980, "direction": "positive\u2192negative" }, { "step": 15280, "direction": "negative\u2192positive" }, { "step": 15450, "direction": "positive\u2192negative" }, { "step": 16410, "direction": "negative\u2192positive" } ], "rolling_stats": [ { "step_center": 250, "mean": 2.5536492648124693, "std": 0.06794927528669344, "p25": 2.494672417640686, "p75": 2.6106740951538088, "n": 50 }, { "step_center": 750, "mean": 2.448444580078125, "std": 0.02509056922621886, "p25": 2.4327359437942504, "p75": 2.465353083610535, "n": 50 }, { "step_center": 1250, "mean": 2.4215744848251344, "std": 0.026759712703853474, "p25": 2.3993499994277956, "p75": 2.437948441505432, "n": 50 }, { "step_center": 1750, "mean": 2.4059697308540344, "std": 0.025170641630145284, "p25": 2.3834667682647703, "p75": 2.424547815322876, "n": 50 }, { "step_center": 2250, "mean": 2.3959087138175965, "std": 0.023488664878493968, "p25": 2.376794672012329, "p75": 2.4120164632797243, "n": 50 }, { "step_center": 2750, "mean": 2.393886389732361, "std": 0.025725386553165204, "p25": 2.373078489303589, "p75": 2.4160205841064455, "n": 50 }, { "step_center": 3250, "mean": 2.383705304145813, "std": 0.02064650608402684, "p25": 2.368594765663147, "p75": 2.3968960523605345, "n": 50 }, { "step_center": 3750, "mean": 2.379629012107849, "std": 0.025284434826176497, "p25": 2.36467387676239, "p75": 2.400493335723877, "n": 50 }, { "step_center": 4250, "mean": 2.370320144176483, "std": 0.023835743914342983, "p25": 2.3525022745132445, "p75": 2.3869575023651124, "n": 50 }, { "step_center": 4750, "mean": 2.367443947792053, "std": 0.025451306105213456, "p25": 2.3461528539657595, "p75": 2.382859945297241, "n": 50 }, { "step_center": 5250, "mean": 2.3662837538719175, "std": 0.023101883335323745, "p25": 2.3492135047912597, "p75": 2.3814218521118162, "n": 50 }, { "step_center": 5750, "mean": 2.3646195211410523, "std": 0.023650718565722668, "p25": 2.346358561515808, "p75": 2.381751036643982, "n": 50 }, { "step_center": 6250, "mean": 2.3603261704444884, "std": 0.025939884185818045, "p25": 2.341030740737915, "p75": 2.380238080024719, "n": 50 }, { "step_center": 6750, "mean": 2.3573404712677, "std": 0.018924405171920438, "p25": 2.3400394916534424, "p75": 2.3729241132736205, "n": 50 }, { "step_center": 7250, "mean": 2.346787754058838, "std": 0.028397110004011492, "p25": 2.3257726430892944, "p75": 2.366376209259033, "n": 50 }, { "step_center": 7750, "mean": 2.3498280205726623, "std": 0.0222118732939722, "p25": 2.335556411743164, "p75": 2.361265182495117, "n": 50 }, { "step_center": 8250, "mean": 2.348032169342041, "std": 0.028050880889420815, "p25": 2.3264118671417235, "p75": 2.3686971426010133, "n": 50 }, { "step_center": 8750, "mean": 2.34533736371994, "std": 0.02437305733889354, "p25": 2.3267060041427614, "p75": 2.3611632585525513, "n": 50 }, { "step_center": 9250, "mean": 2.3430724515914916, "std": 0.02720907854868419, "p25": 2.32452507019043, "p75": 2.3617564916610716, "n": 50 }, { "step_center": 9750, "mean": 2.3414637241363523, "std": 0.026057814236921148, "p25": 2.3215897560119627, "p75": 2.357621693611145, "n": 50 }, { "step_center": 10250, "mean": 2.3405535078048705, "std": 0.02665497429150158, "p25": 2.321121668815613, "p75": 2.35941686630249, "n": 50 }, { "step_center": 10750, "mean": 2.3323154163360598, "std": 0.02170759677431937, "p25": 2.3169103145599363, "p75": 2.345703053474426, "n": 50 }, { "step_center": 11250, "mean": 2.334129483699799, "std": 0.023022957389190198, "p25": 2.319528651237488, "p75": 2.3511791706085203, "n": 50 }, { "step_center": 11750, "mean": 2.3361384110450745, "std": 0.02471085499602499, "p25": 2.3174596309661863, "p75": 2.350165772438049, "n": 50 }, { "step_center": 12250, "mean": 2.3365752921104432, "std": 0.027231387794148867, "p25": 2.313059759140015, "p75": 2.356953167915344, "n": 50 }, { "step_center": 12750, "mean": 2.331771935462952, "std": 0.025773844962216148, "p25": 2.3146870851516725, "p75": 2.3490057706832888, "n": 50 }, { "step_center": 13250, "mean": 2.3304363389015195, "std": 0.022590550447677177, "p25": 2.314772891998291, "p75": 2.350356388092041, "n": 50 }, { "step_center": 13750, "mean": 2.332354657649994, "std": 0.0278746048668042, "p25": 2.3138842582702637, "p75": 2.353069567680359, "n": 50 }, { "step_center": 14250, "mean": 2.328071443080902, "std": 0.027385776869959595, "p25": 2.3080037593841554, "p75": 2.350447750091553, "n": 50 }, { "step_center": 14750, "mean": 2.3318208928108213, "std": 0.024377556097741115, "p25": 2.312630367279053, "p75": 2.3485876083374024, "n": 50 }, { "step_center": 15250, "mean": 2.3255881023406983, "std": 0.02616183998425422, "p25": 2.30555522441864, "p75": 2.344960618019104, "n": 50 }, { "step_center": 15750, "mean": 2.3202085700035098, "std": 0.023800025651637833, "p25": 2.3002145290374756, "p75": 2.3358023881912233, "n": 50 }, { "step_center": 16250, "mean": 2.2996851239204408, "std": 0.026246680909362278, "p25": 2.2763946056365967, "p75": 2.3184911012649536, "n": 50 }, { "step_center": 16750, "mean": 2.2773553647994995, "std": 0.02097443365212286, "p25": 2.2597983360290526, "p75": 2.2906126737594605, "n": 50 }, { "step_center": 17250, "mean": 2.282346619424366, "std": 0.028653366820936898, "p25": 2.2540221214294434, "p75": 2.305117154121399, "n": 35 } ], "stability_score": 0.8442793520944198, "plateaus": [], "jumps": [ { "step": 7300, "delta": 0.11637275218963605, "sigma": 3.41 }, { "step": 9290, "delta": 0.11292421817779541, "sigma": 3.25 }, { "step": 9920, "delta": -0.10988917350769034, "sigma": 3.1 } ], "slope_sign_changes": [ { "step": 4890, "direction": "negative\u2192positive" }, { "step": 4900, "direction": "positive\u2192negative" }, { "step": 5090, "direction": "negative\u2192positive" }, { "step": 5100, "direction": "positive\u2192negative" }, { "step": 5140, "direction": "negative\u2192positive" }, { "step": 5150, "direction": "positive\u2192negative" }, { "step": 5160, "direction": "negative\u2192positive" }, { "step": 5260, "direction": "positive\u2192negative" }, { "step": 5300, "direction": "negative\u2192positive" }, { "step": 5460, "direction": "positive\u2192negative" }, { "step": 5480, "direction": "negative\u2192positive" }, { "step": 5510, "direction": "positive\u2192negative" }, { "step": 7490, "direction": "negative\u2192positive" }, { "step": 7510, "direction": "positive\u2192negative" }, { "step": 7530, "direction": "negative\u2192positive" }, { "step": 7540, "direction": "positive\u2192negative" }, { "step": 7670, "direction": "negative\u2192positive" }, { "step": 7790, "direction": "positive\u2192negative" }, { "step": 7830, "direction": "negative\u2192positive" }, { "step": 7850, "direction": "positive\u2192negative" }, { "step": 7860, "direction": "negative\u2192positive" }, { "step": 7940, "direction": "positive\u2192negative" }, { "step": 8030, "direction": "negative\u2192positive" }, { "step": 8100, "direction": "positive\u2192negative" }, { "step": 8170, "direction": "negative\u2192positive" }, { "step": 8290, "direction": "positive\u2192negative" }, { "step": 8300, "direction": "negative\u2192positive" }, { "step": 8310, "direction": "positive\u2192negative" }, { "step": 8390, "direction": "negative\u2192positive" }, { "step": 8400, "direction": "positive\u2192negative" }, { "step": 8890, "direction": "negative\u2192positive" }, { "step": 8920, "direction": "positive\u2192negative" }, { "step": 9230, "direction": "negative\u2192positive" }, { "step": 9250, "direction": "positive\u2192negative" }, { "step": 9280, "direction": "negative\u2192positive" }, { "step": 9290, "direction": "positive\u2192negative" }, { "step": 9350, "direction": "negative\u2192positive" }, { "step": 9360, "direction": "positive\u2192negative" }, { "step": 9370, "direction": "negative\u2192positive" }, { "step": 9380, "direction": "positive\u2192negative" }, { "step": 9410, "direction": "negative\u2192positive" }, { "step": 9420, "direction": "positive\u2192negative" }, { "step": 9900, "direction": "negative\u2192positive" }, { "step": 10080, "direction": "positive\u2192negative" }, { "step": 10100, "direction": "negative\u2192positive" }, { "step": 10220, "direction": "positive\u2192negative" }, { "step": 10250, "direction": "negative\u2192positive" }, { "step": 10270, "direction": "positive\u2192negative" }, { "step": 10290, "direction": "negative\u2192positive" }, { "step": 10320, "direction": "positive\u2192negative" }, { "step": 10960, "direction": "negative\u2192positive" }, { "step": 11560, "direction": "positive\u2192negative" }, { "step": 11600, "direction": "negative\u2192positive" }, { "step": 11750, "direction": "positive\u2192negative" }, { "step": 11820, "direction": "negative\u2192positive" }, { "step": 11830, "direction": "positive\u2192negative" }, { "step": 12130, "direction": "negative\u2192positive" }, { "step": 12150, "direction": "positive\u2192negative" }, { "step": 12240, "direction": "negative\u2192positive" }, { "step": 12290, "direction": "positive\u2192negative" }, { "step": 12340, "direction": "negative\u2192positive" }, { "step": 12360, "direction": "positive\u2192negative" }, { "step": 13110, "direction": "negative\u2192positive" }, { "step": 13120, "direction": "positive\u2192negative" }, { "step": 13130, "direction": "negative\u2192positive" }, { "step": 13240, "direction": "positive\u2192negative" }, { "step": 13250, "direction": "negative\u2192positive" }, { "step": 13750, "direction": "positive\u2192negative" }, { "step": 13780, "direction": "negative\u2192positive" }, { "step": 13790, "direction": "positive\u2192negative" }, { "step": 13860, "direction": "negative\u2192positive" }, { "step": 13870, "direction": "positive\u2192negative" }, { "step": 13950, "direction": "negative\u2192positive" }, { "step": 13990, "direction": "positive\u2192negative" }, { "step": 14010, "direction": "negative\u2192positive" }, { "step": 14030, "direction": "positive\u2192negative" }, { "step": 14360, "direction": "negative\u2192positive" }, { "step": 14370, "direction": "positive\u2192negative" }, { "step": 14380, "direction": "negative\u2192positive" }, { "step": 14390, "direction": "positive\u2192negative" }, { "step": 14400, "direction": "negative\u2192positive" }, { "step": 14410, "direction": "positive\u2192negative" }, { "step": 14430, "direction": "negative\u2192positive" }, { "step": 14550, "direction": "positive\u2192negative" }, { "step": 14630, "direction": "negative\u2192positive" }, { "step": 14710, "direction": "positive\u2192negative" }, { "step": 14720, "direction": "negative\u2192positive" }, { "step": 14730, "direction": "positive\u2192negative" }, { "step": 14760, "direction": "negative\u2192positive" }, { "step": 14770, "direction": "positive\u2192negative" }, { "step": 14880, "direction": "negative\u2192positive" }, { "step": 14890, "direction": "positive\u2192negative" }, { "step": 14900, "direction": "negative\u2192positive" }, { "step": 14910, "direction": "positive\u2192negative" }, { "step": 14940, "direction": "negative\u2192positive" }, { "step": 14950, "direction": "positive\u2192negative" }, { "step": 14970, "direction": "negative\u2192positive" }, { "step": 15010, "direction": "positive\u2192negative" }, { "step": 15080, "direction": "negative\u2192positive" }, { "step": 15090, "direction": "positive\u2192negative" }, { "step": 16890, "direction": "negative\u2192positive" } ] }, "geo/rankme_last": { "n_points": 232, "slopes_at_landmarks": { "pct_10": { "w100": null, "w500": 0.0033722650437127974, "w1000": 0.0013678783374828298, "w5000": 0.0019266211783584095 }, "pct_25": { "w100": null, "w500": 0.0008363414946056547, "w1000": 0.000515038347069597, "w5000": 0.0009746530627070197 }, "pct_50": { "w100": null, "w500": 0.004231329055059524, "w1000": 0.0006157128190819597, "w5000": 0.00034299754947128433 }, "pct_75": { "w100": null, "w500": 0.0007408040364583334, "w1000": 0.00018919467576694138, "w5000": 0.00010008815155809676 }, "pct_90": { "w100": null, "w500": -0.0008631388346354167, "w1000": 0.00013333386990613553, "w5000": 0.00033015691526263127 } }, "inflection_points": [], "rolling_stats": [ { "step_center": 250, "mean": 418.8371276855469, "std": 1.9706179295918487, "p25": 414.6168212890625, "p75": 419.4991760253906, "n": 7 }, { "step_center": 750, "mean": 420.7526070731027, "std": 1.1694264542259036, "p25": 419.0769348144531, "p75": 422.18658447265625, "n": 7 }, { "step_center": 1250, "mean": 422.3680369059245, "std": 0.612363257463856, "p25": 421.7069091796875, "p75": 422.3238830566406, "n": 6 }, { "step_center": 1750, "mean": 423.146728515625, "std": 0.7143769249048134, "p25": 422.30780029296875, "p75": 423.8494873046875, "n": 7 }, { "step_center": 2250, "mean": 423.8157217843192, "std": 0.6644047915811648, "p25": 422.3829040527344, "p75": 424.26641845703125, "n": 7 }, { "step_center": 2750, "mean": 424.7442321777344, "std": 0.38605224992275294, "p25": 424.3502197265625, "p75": 424.84075927734375, "n": 6 }, { "step_center": 3250, "mean": 425.2377493722098, "std": 0.9928863556386898, "p25": 424.3253173828125, "p75": 426.3678894042969, "n": 7 }, { "step_center": 3750, "mean": 426.2940150669643, "std": 0.36511910276686976, "p25": 425.8962707519531, "p75": 426.6614074707031, "n": 7 }, { "step_center": 4250, "mean": 426.9090983072917, "std": 0.5799380755235445, "p25": 426.3441162109375, "p75": 427.4007263183594, "n": 6 }, { "step_center": 4750, "mean": 426.70751517159596, "std": 0.3777274089993139, "p25": 426.2264404296875, "p75": 426.9776611328125, "n": 7 }, { "step_center": 5250, "mean": 427.1932460239955, "std": 0.3474182400524559, "p25": 426.6791076660156, "p75": 427.4221496582031, "n": 7 }, { "step_center": 5750, "mean": 428.02931722005206, "std": 0.7422348443206302, "p25": 427.381591796875, "p75": 428.8571472167969, "n": 6 }, { "step_center": 6250, "mean": 427.67706298828125, "std": 0.5068596593637932, "p25": 427.1493225097656, "p75": 428.06072998046875, "n": 7 }, { "step_center": 6750, "mean": 428.2369689941406, "std": 0.37135674083717257, "p25": 427.6800537109375, "p75": 428.4487609863281, "n": 7 }, { "step_center": 7250, "mean": 428.5103810628255, "std": 0.5867578912524614, "p25": 427.70166015625, "p75": 428.73895263671875, "n": 6 }, { "step_center": 7750, "mean": 428.0627746582031, "std": 0.5193927369314885, "p25": 427.2740173339844, "p75": 428.5312805175781, "n": 7 }, { "step_center": 8250, "mean": 428.3086591448103, "std": 0.5933176949414425, "p25": 427.58868408203125, "p75": 428.940185546875, "n": 7 }, { "step_center": 8750, "mean": 428.5880533854167, "std": 0.551078102138243, "p25": 427.8064880371094, "p75": 429.1161193847656, "n": 6 }, { "step_center": 9250, "mean": 428.8888636997768, "std": 0.5694960752056711, "p25": 428.0375061035156, "p75": 429.12469482421875, "n": 7 }, { "step_center": 9750, "mean": 429.2879638671875, "std": 0.7433667524753128, "p25": 428.4444580078125, "p75": 430.04833984375, "n": 7 }, { "step_center": 10250, "mean": 429.48259989420575, "std": 0.4522335608488025, "p25": 429.08953857421875, "p75": 429.6427001953125, "n": 6 }, { "step_center": 10750, "mean": 429.14767020089283, "std": 0.2757999472749072, "p25": 428.6318664550781, "p75": 429.35455322265625, "n": 7 }, { "step_center": 11250, "mean": 429.68481881277904, "std": 0.33605217382389435, "p25": 429.0997314453125, "p75": 429.8418884277344, "n": 7 }, { "step_center": 11750, "mean": 429.19440205891925, "std": 0.5124843902568214, "p25": 428.5133361816406, "p75": 429.4604187011719, "n": 6 }, { "step_center": 12250, "mean": 429.8018711635045, "std": 0.497040027800742, "p25": 428.8563232421875, "p75": 430.1318054199219, "n": 7 }, { "step_center": 12750, "mean": 429.53719220842635, "std": 0.6863248219684418, "p25": 428.7851867675781, "p75": 429.690673828125, "n": 7 }, { "step_center": 13250, "mean": 429.95374552408856, "std": 0.7975096227032695, "p25": 428.9168701171875, "p75": 430.7697448730469, "n": 6 }, { "step_center": 13750, "mean": 429.67769949776783, "std": 0.6044692586774199, "p25": 428.8565368652344, "p75": 430.0766296386719, "n": 7 }, { "step_center": 14250, "mean": 429.861807686942, "std": 0.7339570078253661, "p25": 428.733642578125, "p75": 430.5356140136719, "n": 7 }, { "step_center": 14750, "mean": 429.5887451171875, "std": 0.9283298841866231, "p25": 428.0323791503906, "p75": 430.1398010253906, "n": 6 }, { "step_center": 15250, "mean": 429.84422084263394, "std": 0.412175806266064, "p25": 429.30645751953125, "p75": 430.1964111328125, "n": 7 }, { "step_center": 15750, "mean": 430.33884974888394, "std": 0.47739178059533427, "p25": 429.5009765625, "p75": 430.82623291015625, "n": 7 }, { "step_center": 16250, "mean": 430.0373840332031, "std": 0.5542415663214534, "p25": 429.2351989746094, "p75": 430.5093688964844, "n": 6 }, { "step_center": 16750, "mean": 430.9304722377232, "std": 0.27740500209971125, "p25": 430.6304931640625, "p75": 431.13427734375, "n": 7 }, { "step_center": 17250, "mean": 430.9951477050781, "std": 0.11213697889472371, "p25": 430.8769836425781, "p75": 431.0931701660156, "n": 5 } ], "stability_score": 0.633204481136617, "plateaus": [], "jumps": [], "slope_sign_changes": [ { "step": 4575, "direction": "positive\u2192negative" }, { "step": 4875, "direction": "negative\u2192positive" }, { "step": 6000, "direction": "positive\u2192negative" }, { "step": 6375, "direction": "negative\u2192positive" }, { "step": 7125, "direction": "positive\u2192negative" }, { "step": 7650, "direction": "negative\u2192positive" }, { "step": 8175, "direction": "positive\u2192negative" }, { "step": 8325, "direction": "negative\u2192positive" }, { "step": 9675, "direction": "positive\u2192negative" }, { "step": 9750, "direction": "negative\u2192positive" }, { "step": 10125, "direction": "positive\u2192negative" }, { "step": 10200, "direction": "negative\u2192positive" }, { "step": 10275, "direction": "positive\u2192negative" }, { "step": 10725, "direction": "negative\u2192positive" }, { "step": 11325, "direction": "positive\u2192negative" }, { "step": 11775, "direction": "negative\u2192positive" }, { "step": 12300, "direction": "positive\u2192negative" }, { "step": 12825, "direction": "negative\u2192positive" }, { "step": 13425, "direction": "positive\u2192negative" }, { "step": 13800, "direction": "negative\u2192positive" }, { "step": 13950, "direction": "positive\u2192negative" }, { "step": 14400, "direction": "negative\u2192positive" }, { "step": 14475, "direction": "positive\u2192negative" }, { "step": 14700, "direction": "negative\u2192positive" }, { "step": 15750, "direction": "positive\u2192negative" }, { "step": 16050, "direction": "negative\u2192positive" } ] } } }, "geometric_health": { "layers": [ 0, 7, 14, 21, 27 ], "landmarks": { "early": 1725, "quarter": 4350, "mid": 8700, "three_quarter": 12975, "late": 15600, "final": 17325 }, "profiles": { "early": { "step": 1725, "layer_0": { "stable_rank_q_proj": 20.133892059326172, "stable_rank_k_proj": 17.006507873535156, "stable_rank_o_proj": 46.15428161621094, "stable_rank_gate_proj": 129.89878845214844, "stable_rank_down_proj": 55.68263244628906, "attn_entropy_mean": 6.25703239440918, "attn_entropy_std": 0.4080713391304016, "anisotropy": 0.06490539014339447, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 42.706878662109375, "stable_rank_k_proj": 40.13029861450195, "stable_rank_o_proj": 89.30697631835938, "stable_rank_gate_proj": 79.12543487548828, "stable_rank_down_proj": 143.33609008789062, "attn_entropy_mean": 4.700146675109863, "attn_entropy_std": 0.7736154198646545, "anisotropy": 0.4299015402793884, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 50.8188362121582, "stable_rank_k_proj": 41.04155349731445, "stable_rank_o_proj": 42.97220993041992, "stable_rank_gate_proj": 71.69290161132812, "stable_rank_down_proj": 126.66923522949219, "attn_entropy_mean": 5.53626823425293, "attn_entropy_std": 0.4032261073589325, "anisotropy": 0.39128610491752625, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 40.08066940307617, "stable_rank_k_proj": 29.80875587463379, "stable_rank_o_proj": 67.41558837890625, "stable_rank_gate_proj": 63.583194732666016, "stable_rank_down_proj": 49.89845275878906, "attn_entropy_mean": 5.884044647216797, "attn_entropy_std": 0.31702759861946106, "anisotropy": 0.1451684832572937, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 44.02388381958008, "stable_rank_k_proj": 31.312538146972656, "stable_rank_o_proj": 114.83377838134766, "stable_rank_gate_proj": 75.68281555175781, "stable_rank_down_proj": 127.66222381591797, "attn_entropy_mean": 4.35567045211792, "attn_entropy_std": 0.6677613854408264, "anisotropy": 0.10006999224424362, "dead_units": 0.0 } }, "quarter": { "step": 4350, "layer_0": { "stable_rank_q_proj": 20.62217140197754, "stable_rank_k_proj": 17.198904037475586, "stable_rank_o_proj": 45.66370391845703, "stable_rank_gate_proj": 130.02139282226562, "stable_rank_down_proj": 56.49225616455078, "attn_entropy_mean": 6.258755207061768, "attn_entropy_std": 0.4235442876815796, "anisotropy": 0.06752505153417587, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 42.567344665527344, "stable_rank_k_proj": 39.40324783325195, "stable_rank_o_proj": 89.7774887084961, "stable_rank_gate_proj": 79.37095642089844, "stable_rank_down_proj": 143.08059692382812, "attn_entropy_mean": 4.703330039978027, "attn_entropy_std": 0.7717511653900146, "anisotropy": 0.4199843406677246, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 51.64799880981445, "stable_rank_k_proj": 41.63627243041992, "stable_rank_o_proj": 42.701908111572266, "stable_rank_gate_proj": 71.93598937988281, "stable_rank_down_proj": 127.28962707519531, "attn_entropy_mean": 5.524231910705566, "attn_entropy_std": 0.43370646238327026, "anisotropy": 0.3828665316104889, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 39.59270095825195, "stable_rank_k_proj": 29.09538459777832, "stable_rank_o_proj": 66.52333068847656, "stable_rank_gate_proj": 62.608497619628906, "stable_rank_down_proj": 49.814796447753906, "attn_entropy_mean": 5.853401184082031, "attn_entropy_std": 0.3115076422691345, "anisotropy": 0.1433832198381424, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 43.69539260864258, "stable_rank_k_proj": 30.684505462646484, "stable_rank_o_proj": 110.17560577392578, "stable_rank_gate_proj": 73.6478271484375, "stable_rank_down_proj": 127.23016357421875, "attn_entropy_mean": 4.3218841552734375, "attn_entropy_std": 0.6490551233291626, "anisotropy": 0.10033217072486877, "dead_units": 0.0 } }, "mid": { "step": 8700, "layer_0": { "stable_rank_q_proj": 20.74656105041504, "stable_rank_k_proj": 17.162168502807617, "stable_rank_o_proj": 44.727996826171875, "stable_rank_gate_proj": 127.23369598388672, "stable_rank_down_proj": 56.846107482910156, "attn_entropy_mean": 6.235823631286621, "attn_entropy_std": 0.43743276596069336, "anisotropy": 0.06675643473863602, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 42.293479919433594, "stable_rank_k_proj": 38.610774993896484, "stable_rank_o_proj": 89.17684936523438, "stable_rank_gate_proj": 79.0400161743164, "stable_rank_down_proj": 144.4713134765625, "attn_entropy_mean": 4.706167697906494, "attn_entropy_std": 0.7591027617454529, "anisotropy": 0.4047275185585022, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 51.58811569213867, "stable_rank_k_proj": 42.609825134277344, "stable_rank_o_proj": 42.335105895996094, "stable_rank_gate_proj": 71.9311752319336, "stable_rank_down_proj": 126.46110534667969, "attn_entropy_mean": 5.540119171142578, "attn_entropy_std": 0.45732536911964417, "anisotropy": 0.37118563055992126, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 39.304080963134766, "stable_rank_k_proj": 28.829919815063477, "stable_rank_o_proj": 65.58588409423828, "stable_rank_gate_proj": 61.5308837890625, "stable_rank_down_proj": 49.71337127685547, "attn_entropy_mean": 5.866855621337891, "attn_entropy_std": 0.3209395408630371, "anisotropy": 0.13976767659187317, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 44.39973449707031, "stable_rank_k_proj": 30.10407829284668, "stable_rank_o_proj": 107.94586181640625, "stable_rank_gate_proj": 71.3941879272461, "stable_rank_down_proj": 129.004150390625, "attn_entropy_mean": 4.340854644775391, "attn_entropy_std": 0.6970290541648865, "anisotropy": 0.09302495419979095, "dead_units": 0.0 } }, "three_quarter": { "step": 12975, "layer_0": { "stable_rank_q_proj": 20.623004913330078, "stable_rank_k_proj": 16.95781898498535, "stable_rank_o_proj": 43.821876525878906, "stable_rank_gate_proj": 125.1052474975586, "stable_rank_down_proj": 57.683834075927734, "attn_entropy_mean": 6.233590126037598, "attn_entropy_std": 0.45708325505256653, "anisotropy": 0.06800613552331924, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 41.80685043334961, "stable_rank_k_proj": 38.90849685668945, "stable_rank_o_proj": 88.35002136230469, "stable_rank_gate_proj": 78.26079559326172, "stable_rank_down_proj": 144.59579467773438, "attn_entropy_mean": 4.734737396240234, "attn_entropy_std": 0.7458856701850891, "anisotropy": 0.40183892846107483, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 51.84466552734375, "stable_rank_k_proj": 43.42570114135742, "stable_rank_o_proj": 42.2808723449707, "stable_rank_gate_proj": 71.79308319091797, "stable_rank_down_proj": 127.38919067382812, "attn_entropy_mean": 5.545279026031494, "attn_entropy_std": 0.4711344242095947, "anisotropy": 0.37494924664497375, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 38.30750274658203, "stable_rank_k_proj": 28.626514434814453, "stable_rank_o_proj": 65.13455963134766, "stable_rank_gate_proj": 59.88165283203125, "stable_rank_down_proj": 48.73686981201172, "attn_entropy_mean": 5.843373775482178, "attn_entropy_std": 0.332601934671402, "anisotropy": 0.1376378834247589, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 44.75471878051758, "stable_rank_k_proj": 30.430049896240234, "stable_rank_o_proj": 106.74622344970703, "stable_rank_gate_proj": 69.8398666381836, "stable_rank_down_proj": 130.29200744628906, "attn_entropy_mean": 4.317837715148926, "attn_entropy_std": 0.69074946641922, "anisotropy": 0.10297069698572159, "dead_units": 0.0 } }, "late": { "step": 15600, "layer_0": { "stable_rank_q_proj": 20.5914363861084, "stable_rank_k_proj": 16.745973587036133, "stable_rank_o_proj": 43.73870849609375, "stable_rank_gate_proj": 123.3493423461914, "stable_rank_down_proj": 57.89291000366211, "attn_entropy_mean": 6.228974342346191, "attn_entropy_std": 0.46670883893966675, "anisotropy": 0.06984122097492218, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 41.609771728515625, "stable_rank_k_proj": 38.61107635498047, "stable_rank_o_proj": 87.21759796142578, "stable_rank_gate_proj": 77.33723449707031, "stable_rank_down_proj": 143.90965270996094, "attn_entropy_mean": 4.746259689331055, "attn_entropy_std": 0.7679674029350281, "anisotropy": 0.38015806674957275, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 51.66292190551758, "stable_rank_k_proj": 44.319820404052734, "stable_rank_o_proj": 42.175537109375, "stable_rank_gate_proj": 71.88668823242188, "stable_rank_down_proj": 126.96123504638672, "attn_entropy_mean": 5.53586483001709, "attn_entropy_std": 0.45710787177085876, "anisotropy": 0.36756598949432373, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 38.205936431884766, "stable_rank_k_proj": 28.410799026489258, "stable_rank_o_proj": 64.53252410888672, "stable_rank_gate_proj": 59.53862380981445, "stable_rank_down_proj": 48.487876892089844, "attn_entropy_mean": 5.86893367767334, "attn_entropy_std": 0.3390011489391327, "anisotropy": 0.1381273865699768, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 45.488624572753906, "stable_rank_k_proj": 30.428009033203125, "stable_rank_o_proj": 106.52822875976562, "stable_rank_gate_proj": 68.88688659667969, "stable_rank_down_proj": 129.5972442626953, "attn_entropy_mean": 4.31506872177124, "attn_entropy_std": 0.6959003210067749, "anisotropy": 0.09733413904905319, "dead_units": 0.0 } }, "final": { "step": 17325, "layer_0": { "stable_rank_q_proj": 20.46058464050293, "stable_rank_k_proj": 16.590166091918945, "stable_rank_o_proj": 43.638336181640625, "stable_rank_gate_proj": 122.99588775634766, "stable_rank_down_proj": 57.99134063720703, "attn_entropy_mean": 6.221095561981201, "attn_entropy_std": 0.4666074216365814, "anisotropy": 0.06611216813325882, "dead_units": 0.0 }, "layer_7": { "stable_rank_q_proj": 41.592491149902344, "stable_rank_k_proj": 38.8099479675293, "stable_rank_o_proj": 86.76081848144531, "stable_rank_gate_proj": 76.83895874023438, "stable_rank_down_proj": 144.66598510742188, "attn_entropy_mean": 4.743325710296631, "attn_entropy_std": 0.7405821681022644, "anisotropy": 0.3939518332481384, "dead_units": 0.0 }, "layer_14": { "stable_rank_q_proj": 51.51805114746094, "stable_rank_k_proj": 44.543739318847656, "stable_rank_o_proj": 42.110252380371094, "stable_rank_gate_proj": 71.66548156738281, "stable_rank_down_proj": 126.4752426147461, "attn_entropy_mean": 5.5013909339904785, "attn_entropy_std": 0.4690777361392975, "anisotropy": 0.3837595582008362, "dead_units": 0.0 }, "layer_21": { "stable_rank_q_proj": 38.228172302246094, "stable_rank_k_proj": 28.443443298339844, "stable_rank_o_proj": 64.04421997070312, "stable_rank_gate_proj": 59.131065368652344, "stable_rank_down_proj": 48.718719482421875, "attn_entropy_mean": 5.846819877624512, "attn_entropy_std": 0.3328602910041809, "anisotropy": 0.1341758519411087, "dead_units": 0.0 }, "layer_27": { "stable_rank_q_proj": 45.467472076416016, "stable_rank_k_proj": 30.417682647705078, "stable_rank_o_proj": 106.4880599975586, "stable_rank_gate_proj": 68.29488372802734, "stable_rank_down_proj": 129.311279296875, "attn_entropy_mean": 4.277439594268799, "attn_entropy_std": 0.7137026190757751, "anisotropy": 0.09188564866781235, "dead_units": 0.0 } } }, "depth_gradient": { "layers": { "first": 0, "mid": 14, "last": 27 }, "attn_entropy_mean": { "first": 6.221095561981201, "mid": 5.5013909339904785, "last": 4.277439594268799 }, "anisotropy": { "first": 0.06611216813325882, "mid": 0.3837595582008362, "last": 0.09188564866781235 }, "stable_rank_q_proj": { "first": 20.46058464050293, "mid": 51.51805114746094, "last": 45.467472076416016 } }, "rankme_stability": 0.28062874758414175 } }