| { |
| "summary": { |
| "max_step": 17336, |
| "loss_initial": 2.7233519554138184, |
| "loss_final": 2.3416449626286826, |
| "loss_min": 2.2302972316741942, |
| "loss_min_step": 17260, |
| "ppl_final": 10.398327361926201, |
| "avg_tokens_per_sec": 1960335.6302571176, |
| "rankme_initial": 414.6168212890625, |
| "rankme_min": 414.6168212890625, |
| "rankme_min_step": 0, |
| "rankme_final": 431.12603759765625, |
| "rankme_rebound_ratio": 1.0398180089685358, |
| "ww_alpha_initial": 7.489332379498283, |
| "ww_alpha_final": 7.571896228633995, |
| "ww_healthy_frac_final": 0.15736040609137056, |
| "twonn_id_final": 6.818349838256836, |
| "tokens_B": 36.358324224 |
| }, |
| "dynamics": { |
| "max_step": 17336, |
| "landmarks": { |
| "pct_10": 1733, |
| "pct_25": 4334, |
| "pct_50": 8668, |
| "pct_75": 13002, |
| "pct_90": 15602 |
| }, |
| "metrics": { |
| "train/loss": { |
| "n_points": 1735, |
| "slopes_at_landmarks": { |
| "pct_10": { |
| "w100": -4.28877743807706e-05, |
| "w500": -1.9821559681611903e-05, |
| "w1000": -2.0593762133095024e-05, |
| "w5000": -3.688523080177231e-05 |
| }, |
| "pct_25": { |
| "w100": 0.0001107275558240486, |
| "w500": -3.8268886694387226e-05, |
| "w1000": -1.0966062831907276e-05, |
| "w5000": -9.462775826338291e-06 |
| }, |
| "pct_50": { |
| "w100": 0.00019825918024236506, |
| "w500": -3.840362209947456e-06, |
| "w1000": -1.657888490875455e-05, |
| "w5000": -4.913578465691426e-06 |
| }, |
| "pct_75": { |
| "w100": -2.04078500921076e-05, |
| "w500": -6.083978467485627e-06, |
| "w1000": -4.660763942261841e-06, |
| "w5000": -1.4281992576798431e-06 |
| }, |
| "pct_90": { |
| "w100": -0.0003515260002886256, |
| "w500": -3.884735576817397e-05, |
| "w1000": -1.6693517605964394e-05, |
| "w5000": -1.4644461233839928e-05 |
| } |
| }, |
| "inflection_points": [ |
| { |
| "step": 1760, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 1890, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 2210, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 2380, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 2590, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 2970, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 3260, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 3400, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 3620, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 4000, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 4290, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 4500, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5300, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5880, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 6120, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 6280, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 6570, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 6920, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7250, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7320, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7810, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8050, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8190, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8600, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9000, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9070, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9310, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9670, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10040, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10730, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 11280, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 11960, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 12240, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 12700, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13530, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14200, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14540, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14870, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14980, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 15280, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 15450, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 16410, |
| "direction": "negative\u2192positive" |
| } |
| ], |
| "rolling_stats": [ |
| { |
| "step_center": 250, |
| "mean": 2.5536492648124693, |
| "std": 0.06794927528669344, |
| "p25": 2.494672417640686, |
| "p75": 2.6106740951538088, |
| "n": 50 |
| }, |
| { |
| "step_center": 750, |
| "mean": 2.448444580078125, |
| "std": 0.02509056922621886, |
| "p25": 2.4327359437942504, |
| "p75": 2.465353083610535, |
| "n": 50 |
| }, |
| { |
| "step_center": 1250, |
| "mean": 2.4215744848251344, |
| "std": 0.026759712703853474, |
| "p25": 2.3993499994277956, |
| "p75": 2.437948441505432, |
| "n": 50 |
| }, |
| { |
| "step_center": 1750, |
| "mean": 2.4059697308540344, |
| "std": 0.025170641630145284, |
| "p25": 2.3834667682647703, |
| "p75": 2.424547815322876, |
| "n": 50 |
| }, |
| { |
| "step_center": 2250, |
| "mean": 2.3959087138175965, |
| "std": 0.023488664878493968, |
| "p25": 2.376794672012329, |
| "p75": 2.4120164632797243, |
| "n": 50 |
| }, |
| { |
| "step_center": 2750, |
| "mean": 2.393886389732361, |
| "std": 0.025725386553165204, |
| "p25": 2.373078489303589, |
| "p75": 2.4160205841064455, |
| "n": 50 |
| }, |
| { |
| "step_center": 3250, |
| "mean": 2.383705304145813, |
| "std": 0.02064650608402684, |
| "p25": 2.368594765663147, |
| "p75": 2.3968960523605345, |
| "n": 50 |
| }, |
| { |
| "step_center": 3750, |
| "mean": 2.379629012107849, |
| "std": 0.025284434826176497, |
| "p25": 2.36467387676239, |
| "p75": 2.400493335723877, |
| "n": 50 |
| }, |
| { |
| "step_center": 4250, |
| "mean": 2.370320144176483, |
| "std": 0.023835743914342983, |
| "p25": 2.3525022745132445, |
| "p75": 2.3869575023651124, |
| "n": 50 |
| }, |
| { |
| "step_center": 4750, |
| "mean": 2.367443947792053, |
| "std": 0.025451306105213456, |
| "p25": 2.3461528539657595, |
| "p75": 2.382859945297241, |
| "n": 50 |
| }, |
| { |
| "step_center": 5250, |
| "mean": 2.3662837538719175, |
| "std": 0.023101883335323745, |
| "p25": 2.3492135047912597, |
| "p75": 2.3814218521118162, |
| "n": 50 |
| }, |
| { |
| "step_center": 5750, |
| "mean": 2.3646195211410523, |
| "std": 0.023650718565722668, |
| "p25": 2.346358561515808, |
| "p75": 2.381751036643982, |
| "n": 50 |
| }, |
| { |
| "step_center": 6250, |
| "mean": 2.3603261704444884, |
| "std": 0.025939884185818045, |
| "p25": 2.341030740737915, |
| "p75": 2.380238080024719, |
| "n": 50 |
| }, |
| { |
| "step_center": 6750, |
| "mean": 2.3573404712677, |
| "std": 0.018924405171920438, |
| "p25": 2.3400394916534424, |
| "p75": 2.3729241132736205, |
| "n": 50 |
| }, |
| { |
| "step_center": 7250, |
| "mean": 2.346787754058838, |
| "std": 0.028397110004011492, |
| "p25": 2.3257726430892944, |
| "p75": 2.366376209259033, |
| "n": 50 |
| }, |
| { |
| "step_center": 7750, |
| "mean": 2.3498280205726623, |
| "std": 0.0222118732939722, |
| "p25": 2.335556411743164, |
| "p75": 2.361265182495117, |
| "n": 50 |
| }, |
| { |
| "step_center": 8250, |
| "mean": 2.348032169342041, |
| "std": 0.028050880889420815, |
| "p25": 2.3264118671417235, |
| "p75": 2.3686971426010133, |
| "n": 50 |
| }, |
| { |
| "step_center": 8750, |
| "mean": 2.34533736371994, |
| "std": 0.02437305733889354, |
| "p25": 2.3267060041427614, |
| "p75": 2.3611632585525513, |
| "n": 50 |
| }, |
| { |
| "step_center": 9250, |
| "mean": 2.3430724515914916, |
| "std": 0.02720907854868419, |
| "p25": 2.32452507019043, |
| "p75": 2.3617564916610716, |
| "n": 50 |
| }, |
| { |
| "step_center": 9750, |
| "mean": 2.3414637241363523, |
| "std": 0.026057814236921148, |
| "p25": 2.3215897560119627, |
| "p75": 2.357621693611145, |
| "n": 50 |
| }, |
| { |
| "step_center": 10250, |
| "mean": 2.3405535078048705, |
| "std": 0.02665497429150158, |
| "p25": 2.321121668815613, |
| "p75": 2.35941686630249, |
| "n": 50 |
| }, |
| { |
| "step_center": 10750, |
| "mean": 2.3323154163360598, |
| "std": 0.02170759677431937, |
| "p25": 2.3169103145599363, |
| "p75": 2.345703053474426, |
| "n": 50 |
| }, |
| { |
| "step_center": 11250, |
| "mean": 2.334129483699799, |
| "std": 0.023022957389190198, |
| "p25": 2.319528651237488, |
| "p75": 2.3511791706085203, |
| "n": 50 |
| }, |
| { |
| "step_center": 11750, |
| "mean": 2.3361384110450745, |
| "std": 0.02471085499602499, |
| "p25": 2.3174596309661863, |
| "p75": 2.350165772438049, |
| "n": 50 |
| }, |
| { |
| "step_center": 12250, |
| "mean": 2.3365752921104432, |
| "std": 0.027231387794148867, |
| "p25": 2.313059759140015, |
| "p75": 2.356953167915344, |
| "n": 50 |
| }, |
| { |
| "step_center": 12750, |
| "mean": 2.331771935462952, |
| "std": 0.025773844962216148, |
| "p25": 2.3146870851516725, |
| "p75": 2.3490057706832888, |
| "n": 50 |
| }, |
| { |
| "step_center": 13250, |
| "mean": 2.3304363389015195, |
| "std": 0.022590550447677177, |
| "p25": 2.314772891998291, |
| "p75": 2.350356388092041, |
| "n": 50 |
| }, |
| { |
| "step_center": 13750, |
| "mean": 2.332354657649994, |
| "std": 0.0278746048668042, |
| "p25": 2.3138842582702637, |
| "p75": 2.353069567680359, |
| "n": 50 |
| }, |
| { |
| "step_center": 14250, |
| "mean": 2.328071443080902, |
| "std": 0.027385776869959595, |
| "p25": 2.3080037593841554, |
| "p75": 2.350447750091553, |
| "n": 50 |
| }, |
| { |
| "step_center": 14750, |
| "mean": 2.3318208928108213, |
| "std": 0.024377556097741115, |
| "p25": 2.312630367279053, |
| "p75": 2.3485876083374024, |
| "n": 50 |
| }, |
| { |
| "step_center": 15250, |
| "mean": 2.3255881023406983, |
| "std": 0.02616183998425422, |
| "p25": 2.30555522441864, |
| "p75": 2.344960618019104, |
| "n": 50 |
| }, |
| { |
| "step_center": 15750, |
| "mean": 2.3202085700035098, |
| "std": 0.023800025651637833, |
| "p25": 2.3002145290374756, |
| "p75": 2.3358023881912233, |
| "n": 50 |
| }, |
| { |
| "step_center": 16250, |
| "mean": 2.2996851239204408, |
| "std": 0.026246680909362278, |
| "p25": 2.2763946056365967, |
| "p75": 2.3184911012649536, |
| "n": 50 |
| }, |
| { |
| "step_center": 16750, |
| "mean": 2.2773553647994995, |
| "std": 0.02097443365212286, |
| "p25": 2.2597983360290526, |
| "p75": 2.2906126737594605, |
| "n": 50 |
| }, |
| { |
| "step_center": 17250, |
| "mean": 2.282346619424366, |
| "std": 0.028653366820936898, |
| "p25": 2.2540221214294434, |
| "p75": 2.305117154121399, |
| "n": 35 |
| } |
| ], |
| "stability_score": 0.8442793520944198, |
| "plateaus": [], |
| "jumps": [ |
| { |
| "step": 7300, |
| "delta": 0.11637275218963605, |
| "sigma": 3.41 |
| }, |
| { |
| "step": 9290, |
| "delta": 0.11292421817779541, |
| "sigma": 3.25 |
| }, |
| { |
| "step": 9920, |
| "delta": -0.10988917350769034, |
| "sigma": 3.1 |
| } |
| ], |
| "slope_sign_changes": [ |
| { |
| "step": 4890, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 4900, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5090, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5100, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5140, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5150, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5160, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5260, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5300, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5460, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 5480, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 5510, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7490, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7510, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7530, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7540, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7670, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7790, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7830, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7850, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7860, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7940, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8030, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8100, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8170, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8290, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8300, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8310, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8390, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8400, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8890, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8920, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9230, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9250, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9280, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9290, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9350, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9360, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9370, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9380, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9410, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9420, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9900, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10080, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10100, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10220, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10250, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10270, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10290, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10320, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10960, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 11560, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 11600, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 11750, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 11820, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 11830, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 12130, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 12150, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 12240, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 12290, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 12340, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 12360, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13110, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13120, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13130, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13240, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13250, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13750, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13780, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13790, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13860, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13870, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13950, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13990, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14010, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14030, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14360, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14370, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14380, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14390, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14400, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14410, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14430, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14550, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14630, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14710, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14720, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14730, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14760, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14770, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14880, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14890, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14900, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14910, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14940, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14950, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14970, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 15010, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 15080, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 15090, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 16890, |
| "direction": "negative\u2192positive" |
| } |
| ] |
| }, |
| "geo/rankme_last": { |
| "n_points": 232, |
| "slopes_at_landmarks": { |
| "pct_10": { |
| "w100": null, |
| "w500": 0.0033722650437127974, |
| "w1000": 0.0013678783374828298, |
| "w5000": 0.0019266211783584095 |
| }, |
| "pct_25": { |
| "w100": null, |
| "w500": 0.0008363414946056547, |
| "w1000": 0.000515038347069597, |
| "w5000": 0.0009746530627070197 |
| }, |
| "pct_50": { |
| "w100": null, |
| "w500": 0.004231329055059524, |
| "w1000": 0.0006157128190819597, |
| "w5000": 0.00034299754947128433 |
| }, |
| "pct_75": { |
| "w100": null, |
| "w500": 0.0007408040364583334, |
| "w1000": 0.00018919467576694138, |
| "w5000": 0.00010008815155809676 |
| }, |
| "pct_90": { |
| "w100": null, |
| "w500": -0.0008631388346354167, |
| "w1000": 0.00013333386990613553, |
| "w5000": 0.00033015691526263127 |
| } |
| }, |
| "inflection_points": [], |
| "rolling_stats": [ |
| { |
| "step_center": 250, |
| "mean": 418.8371276855469, |
| "std": 1.9706179295918487, |
| "p25": 414.6168212890625, |
| "p75": 419.4991760253906, |
| "n": 7 |
| }, |
| { |
| "step_center": 750, |
| "mean": 420.7526070731027, |
| "std": 1.1694264542259036, |
| "p25": 419.0769348144531, |
| "p75": 422.18658447265625, |
| "n": 7 |
| }, |
| { |
| "step_center": 1250, |
| "mean": 422.3680369059245, |
| "std": 0.612363257463856, |
| "p25": 421.7069091796875, |
| "p75": 422.3238830566406, |
| "n": 6 |
| }, |
| { |
| "step_center": 1750, |
| "mean": 423.146728515625, |
| "std": 0.7143769249048134, |
| "p25": 422.30780029296875, |
| "p75": 423.8494873046875, |
| "n": 7 |
| }, |
| { |
| "step_center": 2250, |
| "mean": 423.8157217843192, |
| "std": 0.6644047915811648, |
| "p25": 422.3829040527344, |
| "p75": 424.26641845703125, |
| "n": 7 |
| }, |
| { |
| "step_center": 2750, |
| "mean": 424.7442321777344, |
| "std": 0.38605224992275294, |
| "p25": 424.3502197265625, |
| "p75": 424.84075927734375, |
| "n": 6 |
| }, |
| { |
| "step_center": 3250, |
| "mean": 425.2377493722098, |
| "std": 0.9928863556386898, |
| "p25": 424.3253173828125, |
| "p75": 426.3678894042969, |
| "n": 7 |
| }, |
| { |
| "step_center": 3750, |
| "mean": 426.2940150669643, |
| "std": 0.36511910276686976, |
| "p25": 425.8962707519531, |
| "p75": 426.6614074707031, |
| "n": 7 |
| }, |
| { |
| "step_center": 4250, |
| "mean": 426.9090983072917, |
| "std": 0.5799380755235445, |
| "p25": 426.3441162109375, |
| "p75": 427.4007263183594, |
| "n": 6 |
| }, |
| { |
| "step_center": 4750, |
| "mean": 426.70751517159596, |
| "std": 0.3777274089993139, |
| "p25": 426.2264404296875, |
| "p75": 426.9776611328125, |
| "n": 7 |
| }, |
| { |
| "step_center": 5250, |
| "mean": 427.1932460239955, |
| "std": 0.3474182400524559, |
| "p25": 426.6791076660156, |
| "p75": 427.4221496582031, |
| "n": 7 |
| }, |
| { |
| "step_center": 5750, |
| "mean": 428.02931722005206, |
| "std": 0.7422348443206302, |
| "p25": 427.381591796875, |
| "p75": 428.8571472167969, |
| "n": 6 |
| }, |
| { |
| "step_center": 6250, |
| "mean": 427.67706298828125, |
| "std": 0.5068596593637932, |
| "p25": 427.1493225097656, |
| "p75": 428.06072998046875, |
| "n": 7 |
| }, |
| { |
| "step_center": 6750, |
| "mean": 428.2369689941406, |
| "std": 0.37135674083717257, |
| "p25": 427.6800537109375, |
| "p75": 428.4487609863281, |
| "n": 7 |
| }, |
| { |
| "step_center": 7250, |
| "mean": 428.5103810628255, |
| "std": 0.5867578912524614, |
| "p25": 427.70166015625, |
| "p75": 428.73895263671875, |
| "n": 6 |
| }, |
| { |
| "step_center": 7750, |
| "mean": 428.0627746582031, |
| "std": 0.5193927369314885, |
| "p25": 427.2740173339844, |
| "p75": 428.5312805175781, |
| "n": 7 |
| }, |
| { |
| "step_center": 8250, |
| "mean": 428.3086591448103, |
| "std": 0.5933176949414425, |
| "p25": 427.58868408203125, |
| "p75": 428.940185546875, |
| "n": 7 |
| }, |
| { |
| "step_center": 8750, |
| "mean": 428.5880533854167, |
| "std": 0.551078102138243, |
| "p25": 427.8064880371094, |
| "p75": 429.1161193847656, |
| "n": 6 |
| }, |
| { |
| "step_center": 9250, |
| "mean": 428.8888636997768, |
| "std": 0.5694960752056711, |
| "p25": 428.0375061035156, |
| "p75": 429.12469482421875, |
| "n": 7 |
| }, |
| { |
| "step_center": 9750, |
| "mean": 429.2879638671875, |
| "std": 0.7433667524753128, |
| "p25": 428.4444580078125, |
| "p75": 430.04833984375, |
| "n": 7 |
| }, |
| { |
| "step_center": 10250, |
| "mean": 429.48259989420575, |
| "std": 0.4522335608488025, |
| "p25": 429.08953857421875, |
| "p75": 429.6427001953125, |
| "n": 6 |
| }, |
| { |
| "step_center": 10750, |
| "mean": 429.14767020089283, |
| "std": 0.2757999472749072, |
| "p25": 428.6318664550781, |
| "p75": 429.35455322265625, |
| "n": 7 |
| }, |
| { |
| "step_center": 11250, |
| "mean": 429.68481881277904, |
| "std": 0.33605217382389435, |
| "p25": 429.0997314453125, |
| "p75": 429.8418884277344, |
| "n": 7 |
| }, |
| { |
| "step_center": 11750, |
| "mean": 429.19440205891925, |
| "std": 0.5124843902568214, |
| "p25": 428.5133361816406, |
| "p75": 429.4604187011719, |
| "n": 6 |
| }, |
| { |
| "step_center": 12250, |
| "mean": 429.8018711635045, |
| "std": 0.497040027800742, |
| "p25": 428.8563232421875, |
| "p75": 430.1318054199219, |
| "n": 7 |
| }, |
| { |
| "step_center": 12750, |
| "mean": 429.53719220842635, |
| "std": 0.6863248219684418, |
| "p25": 428.7851867675781, |
| "p75": 429.690673828125, |
| "n": 7 |
| }, |
| { |
| "step_center": 13250, |
| "mean": 429.95374552408856, |
| "std": 0.7975096227032695, |
| "p25": 428.9168701171875, |
| "p75": 430.7697448730469, |
| "n": 6 |
| }, |
| { |
| "step_center": 13750, |
| "mean": 429.67769949776783, |
| "std": 0.6044692586774199, |
| "p25": 428.8565368652344, |
| "p75": 430.0766296386719, |
| "n": 7 |
| }, |
| { |
| "step_center": 14250, |
| "mean": 429.861807686942, |
| "std": 0.7339570078253661, |
| "p25": 428.733642578125, |
| "p75": 430.5356140136719, |
| "n": 7 |
| }, |
| { |
| "step_center": 14750, |
| "mean": 429.5887451171875, |
| "std": 0.9283298841866231, |
| "p25": 428.0323791503906, |
| "p75": 430.1398010253906, |
| "n": 6 |
| }, |
| { |
| "step_center": 15250, |
| "mean": 429.84422084263394, |
| "std": 0.412175806266064, |
| "p25": 429.30645751953125, |
| "p75": 430.1964111328125, |
| "n": 7 |
| }, |
| { |
| "step_center": 15750, |
| "mean": 430.33884974888394, |
| "std": 0.47739178059533427, |
| "p25": 429.5009765625, |
| "p75": 430.82623291015625, |
| "n": 7 |
| }, |
| { |
| "step_center": 16250, |
| "mean": 430.0373840332031, |
| "std": 0.5542415663214534, |
| "p25": 429.2351989746094, |
| "p75": 430.5093688964844, |
| "n": 6 |
| }, |
| { |
| "step_center": 16750, |
| "mean": 430.9304722377232, |
| "std": 0.27740500209971125, |
| "p25": 430.6304931640625, |
| "p75": 431.13427734375, |
| "n": 7 |
| }, |
| { |
| "step_center": 17250, |
| "mean": 430.9951477050781, |
| "std": 0.11213697889472371, |
| "p25": 430.8769836425781, |
| "p75": 431.0931701660156, |
| "n": 5 |
| } |
| ], |
| "stability_score": 0.633204481136617, |
| "plateaus": [], |
| "jumps": [], |
| "slope_sign_changes": [ |
| { |
| "step": 4575, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 4875, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 6000, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 6375, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 7125, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 7650, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 8175, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 8325, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 9675, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 9750, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10125, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10200, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 10275, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 10725, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 11325, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 11775, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 12300, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 12825, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13425, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 13800, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 13950, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14400, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 14475, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 14700, |
| "direction": "negative\u2192positive" |
| }, |
| { |
| "step": 15750, |
| "direction": "positive\u2192negative" |
| }, |
| { |
| "step": 16050, |
| "direction": "negative\u2192positive" |
| } |
| ] |
| } |
| } |
| }, |
| "geometric_health": { |
| "layers": [ |
| 0, |
| 7, |
| 14, |
| 21, |
| 27 |
| ], |
| "landmarks": { |
| "early": 1725, |
| "quarter": 4350, |
| "mid": 8700, |
| "three_quarter": 12975, |
| "late": 15600, |
| "final": 17325 |
| }, |
| "profiles": { |
| "early": { |
| "step": 1725, |
| "layer_0": { |
| "stable_rank_q_proj": 20.133892059326172, |
| "stable_rank_k_proj": 17.006507873535156, |
| "stable_rank_o_proj": 46.15428161621094, |
| "stable_rank_gate_proj": 129.89878845214844, |
| "stable_rank_down_proj": 55.68263244628906, |
| "attn_entropy_mean": 6.25703239440918, |
| "attn_entropy_std": 0.4080713391304016, |
| "anisotropy": 0.06490539014339447, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 42.706878662109375, |
| "stable_rank_k_proj": 40.13029861450195, |
| "stable_rank_o_proj": 89.30697631835938, |
| "stable_rank_gate_proj": 79.12543487548828, |
| "stable_rank_down_proj": 143.33609008789062, |
| "attn_entropy_mean": 4.700146675109863, |
| "attn_entropy_std": 0.7736154198646545, |
| "anisotropy": 0.4299015402793884, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 50.8188362121582, |
| "stable_rank_k_proj": 41.04155349731445, |
| "stable_rank_o_proj": 42.97220993041992, |
| "stable_rank_gate_proj": 71.69290161132812, |
| "stable_rank_down_proj": 126.66923522949219, |
| "attn_entropy_mean": 5.53626823425293, |
| "attn_entropy_std": 0.4032261073589325, |
| "anisotropy": 0.39128610491752625, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 40.08066940307617, |
| "stable_rank_k_proj": 29.80875587463379, |
| "stable_rank_o_proj": 67.41558837890625, |
| "stable_rank_gate_proj": 63.583194732666016, |
| "stable_rank_down_proj": 49.89845275878906, |
| "attn_entropy_mean": 5.884044647216797, |
| "attn_entropy_std": 0.31702759861946106, |
| "anisotropy": 0.1451684832572937, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 44.02388381958008, |
| "stable_rank_k_proj": 31.312538146972656, |
| "stable_rank_o_proj": 114.83377838134766, |
| "stable_rank_gate_proj": 75.68281555175781, |
| "stable_rank_down_proj": 127.66222381591797, |
| "attn_entropy_mean": 4.35567045211792, |
| "attn_entropy_std": 0.6677613854408264, |
| "anisotropy": 0.10006999224424362, |
| "dead_units": 0.0 |
| } |
| }, |
| "quarter": { |
| "step": 4350, |
| "layer_0": { |
| "stable_rank_q_proj": 20.62217140197754, |
| "stable_rank_k_proj": 17.198904037475586, |
| "stable_rank_o_proj": 45.66370391845703, |
| "stable_rank_gate_proj": 130.02139282226562, |
| "stable_rank_down_proj": 56.49225616455078, |
| "attn_entropy_mean": 6.258755207061768, |
| "attn_entropy_std": 0.4235442876815796, |
| "anisotropy": 0.06752505153417587, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 42.567344665527344, |
| "stable_rank_k_proj": 39.40324783325195, |
| "stable_rank_o_proj": 89.7774887084961, |
| "stable_rank_gate_proj": 79.37095642089844, |
| "stable_rank_down_proj": 143.08059692382812, |
| "attn_entropy_mean": 4.703330039978027, |
| "attn_entropy_std": 0.7717511653900146, |
| "anisotropy": 0.4199843406677246, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 51.64799880981445, |
| "stable_rank_k_proj": 41.63627243041992, |
| "stable_rank_o_proj": 42.701908111572266, |
| "stable_rank_gate_proj": 71.93598937988281, |
| "stable_rank_down_proj": 127.28962707519531, |
| "attn_entropy_mean": 5.524231910705566, |
| "attn_entropy_std": 0.43370646238327026, |
| "anisotropy": 0.3828665316104889, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 39.59270095825195, |
| "stable_rank_k_proj": 29.09538459777832, |
| "stable_rank_o_proj": 66.52333068847656, |
| "stable_rank_gate_proj": 62.608497619628906, |
| "stable_rank_down_proj": 49.814796447753906, |
| "attn_entropy_mean": 5.853401184082031, |
| "attn_entropy_std": 0.3115076422691345, |
| "anisotropy": 0.1433832198381424, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 43.69539260864258, |
| "stable_rank_k_proj": 30.684505462646484, |
| "stable_rank_o_proj": 110.17560577392578, |
| "stable_rank_gate_proj": 73.6478271484375, |
| "stable_rank_down_proj": 127.23016357421875, |
| "attn_entropy_mean": 4.3218841552734375, |
| "attn_entropy_std": 0.6490551233291626, |
| "anisotropy": 0.10033217072486877, |
| "dead_units": 0.0 |
| } |
| }, |
| "mid": { |
| "step": 8700, |
| "layer_0": { |
| "stable_rank_q_proj": 20.74656105041504, |
| "stable_rank_k_proj": 17.162168502807617, |
| "stable_rank_o_proj": 44.727996826171875, |
| "stable_rank_gate_proj": 127.23369598388672, |
| "stable_rank_down_proj": 56.846107482910156, |
| "attn_entropy_mean": 6.235823631286621, |
| "attn_entropy_std": 0.43743276596069336, |
| "anisotropy": 0.06675643473863602, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 42.293479919433594, |
| "stable_rank_k_proj": 38.610774993896484, |
| "stable_rank_o_proj": 89.17684936523438, |
| "stable_rank_gate_proj": 79.0400161743164, |
| "stable_rank_down_proj": 144.4713134765625, |
| "attn_entropy_mean": 4.706167697906494, |
| "attn_entropy_std": 0.7591027617454529, |
| "anisotropy": 0.4047275185585022, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 51.58811569213867, |
| "stable_rank_k_proj": 42.609825134277344, |
| "stable_rank_o_proj": 42.335105895996094, |
| "stable_rank_gate_proj": 71.9311752319336, |
| "stable_rank_down_proj": 126.46110534667969, |
| "attn_entropy_mean": 5.540119171142578, |
| "attn_entropy_std": 0.45732536911964417, |
| "anisotropy": 0.37118563055992126, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 39.304080963134766, |
| "stable_rank_k_proj": 28.829919815063477, |
| "stable_rank_o_proj": 65.58588409423828, |
| "stable_rank_gate_proj": 61.5308837890625, |
| "stable_rank_down_proj": 49.71337127685547, |
| "attn_entropy_mean": 5.866855621337891, |
| "attn_entropy_std": 0.3209395408630371, |
| "anisotropy": 0.13976767659187317, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 44.39973449707031, |
| "stable_rank_k_proj": 30.10407829284668, |
| "stable_rank_o_proj": 107.94586181640625, |
| "stable_rank_gate_proj": 71.3941879272461, |
| "stable_rank_down_proj": 129.004150390625, |
| "attn_entropy_mean": 4.340854644775391, |
| "attn_entropy_std": 0.6970290541648865, |
| "anisotropy": 0.09302495419979095, |
| "dead_units": 0.0 |
| } |
| }, |
| "three_quarter": { |
| "step": 12975, |
| "layer_0": { |
| "stable_rank_q_proj": 20.623004913330078, |
| "stable_rank_k_proj": 16.95781898498535, |
| "stable_rank_o_proj": 43.821876525878906, |
| "stable_rank_gate_proj": 125.1052474975586, |
| "stable_rank_down_proj": 57.683834075927734, |
| "attn_entropy_mean": 6.233590126037598, |
| "attn_entropy_std": 0.45708325505256653, |
| "anisotropy": 0.06800613552331924, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 41.80685043334961, |
| "stable_rank_k_proj": 38.90849685668945, |
| "stable_rank_o_proj": 88.35002136230469, |
| "stable_rank_gate_proj": 78.26079559326172, |
| "stable_rank_down_proj": 144.59579467773438, |
| "attn_entropy_mean": 4.734737396240234, |
| "attn_entropy_std": 0.7458856701850891, |
| "anisotropy": 0.40183892846107483, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 51.84466552734375, |
| "stable_rank_k_proj": 43.42570114135742, |
| "stable_rank_o_proj": 42.2808723449707, |
| "stable_rank_gate_proj": 71.79308319091797, |
| "stable_rank_down_proj": 127.38919067382812, |
| "attn_entropy_mean": 5.545279026031494, |
| "attn_entropy_std": 0.4711344242095947, |
| "anisotropy": 0.37494924664497375, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 38.30750274658203, |
| "stable_rank_k_proj": 28.626514434814453, |
| "stable_rank_o_proj": 65.13455963134766, |
| "stable_rank_gate_proj": 59.88165283203125, |
| "stable_rank_down_proj": 48.73686981201172, |
| "attn_entropy_mean": 5.843373775482178, |
| "attn_entropy_std": 0.332601934671402, |
| "anisotropy": 0.1376378834247589, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 44.75471878051758, |
| "stable_rank_k_proj": 30.430049896240234, |
| "stable_rank_o_proj": 106.74622344970703, |
| "stable_rank_gate_proj": 69.8398666381836, |
| "stable_rank_down_proj": 130.29200744628906, |
| "attn_entropy_mean": 4.317837715148926, |
| "attn_entropy_std": 0.69074946641922, |
| "anisotropy": 0.10297069698572159, |
| "dead_units": 0.0 |
| } |
| }, |
| "late": { |
| "step": 15600, |
| "layer_0": { |
| "stable_rank_q_proj": 20.5914363861084, |
| "stable_rank_k_proj": 16.745973587036133, |
| "stable_rank_o_proj": 43.73870849609375, |
| "stable_rank_gate_proj": 123.3493423461914, |
| "stable_rank_down_proj": 57.89291000366211, |
| "attn_entropy_mean": 6.228974342346191, |
| "attn_entropy_std": 0.46670883893966675, |
| "anisotropy": 0.06984122097492218, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 41.609771728515625, |
| "stable_rank_k_proj": 38.61107635498047, |
| "stable_rank_o_proj": 87.21759796142578, |
| "stable_rank_gate_proj": 77.33723449707031, |
| "stable_rank_down_proj": 143.90965270996094, |
| "attn_entropy_mean": 4.746259689331055, |
| "attn_entropy_std": 0.7679674029350281, |
| "anisotropy": 0.38015806674957275, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 51.66292190551758, |
| "stable_rank_k_proj": 44.319820404052734, |
| "stable_rank_o_proj": 42.175537109375, |
| "stable_rank_gate_proj": 71.88668823242188, |
| "stable_rank_down_proj": 126.96123504638672, |
| "attn_entropy_mean": 5.53586483001709, |
| "attn_entropy_std": 0.45710787177085876, |
| "anisotropy": 0.36756598949432373, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 38.205936431884766, |
| "stable_rank_k_proj": 28.410799026489258, |
| "stable_rank_o_proj": 64.53252410888672, |
| "stable_rank_gate_proj": 59.53862380981445, |
| "stable_rank_down_proj": 48.487876892089844, |
| "attn_entropy_mean": 5.86893367767334, |
| "attn_entropy_std": 0.3390011489391327, |
| "anisotropy": 0.1381273865699768, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 45.488624572753906, |
| "stable_rank_k_proj": 30.428009033203125, |
| "stable_rank_o_proj": 106.52822875976562, |
| "stable_rank_gate_proj": 68.88688659667969, |
| "stable_rank_down_proj": 129.5972442626953, |
| "attn_entropy_mean": 4.31506872177124, |
| "attn_entropy_std": 0.6959003210067749, |
| "anisotropy": 0.09733413904905319, |
| "dead_units": 0.0 |
| } |
| }, |
| "final": { |
| "step": 17325, |
| "layer_0": { |
| "stable_rank_q_proj": 20.46058464050293, |
| "stable_rank_k_proj": 16.590166091918945, |
| "stable_rank_o_proj": 43.638336181640625, |
| "stable_rank_gate_proj": 122.99588775634766, |
| "stable_rank_down_proj": 57.99134063720703, |
| "attn_entropy_mean": 6.221095561981201, |
| "attn_entropy_std": 0.4666074216365814, |
| "anisotropy": 0.06611216813325882, |
| "dead_units": 0.0 |
| }, |
| "layer_7": { |
| "stable_rank_q_proj": 41.592491149902344, |
| "stable_rank_k_proj": 38.8099479675293, |
| "stable_rank_o_proj": 86.76081848144531, |
| "stable_rank_gate_proj": 76.83895874023438, |
| "stable_rank_down_proj": 144.66598510742188, |
| "attn_entropy_mean": 4.743325710296631, |
| "attn_entropy_std": 0.7405821681022644, |
| "anisotropy": 0.3939518332481384, |
| "dead_units": 0.0 |
| }, |
| "layer_14": { |
| "stable_rank_q_proj": 51.51805114746094, |
| "stable_rank_k_proj": 44.543739318847656, |
| "stable_rank_o_proj": 42.110252380371094, |
| "stable_rank_gate_proj": 71.66548156738281, |
| "stable_rank_down_proj": 126.4752426147461, |
| "attn_entropy_mean": 5.5013909339904785, |
| "attn_entropy_std": 0.4690777361392975, |
| "anisotropy": 0.3837595582008362, |
| "dead_units": 0.0 |
| }, |
| "layer_21": { |
| "stable_rank_q_proj": 38.228172302246094, |
| "stable_rank_k_proj": 28.443443298339844, |
| "stable_rank_o_proj": 64.04421997070312, |
| "stable_rank_gate_proj": 59.131065368652344, |
| "stable_rank_down_proj": 48.718719482421875, |
| "attn_entropy_mean": 5.846819877624512, |
| "attn_entropy_std": 0.3328602910041809, |
| "anisotropy": 0.1341758519411087, |
| "dead_units": 0.0 |
| }, |
| "layer_27": { |
| "stable_rank_q_proj": 45.467472076416016, |
| "stable_rank_k_proj": 30.417682647705078, |
| "stable_rank_o_proj": 106.4880599975586, |
| "stable_rank_gate_proj": 68.29488372802734, |
| "stable_rank_down_proj": 129.311279296875, |
| "attn_entropy_mean": 4.277439594268799, |
| "attn_entropy_std": 0.7137026190757751, |
| "anisotropy": 0.09188564866781235, |
| "dead_units": 0.0 |
| } |
| } |
| }, |
| "depth_gradient": { |
| "layers": { |
| "first": 0, |
| "mid": 14, |
| "last": 27 |
| }, |
| "attn_entropy_mean": { |
| "first": 6.221095561981201, |
| "mid": 5.5013909339904785, |
| "last": 4.277439594268799 |
| }, |
| "anisotropy": { |
| "first": 0.06611216813325882, |
| "mid": 0.3837595582008362, |
| "last": 0.09188564866781235 |
| }, |
| "stable_rank_q_proj": { |
| "first": 20.46058464050293, |
| "mid": 51.51805114746094, |
| "last": 45.467472076416016 |
| } |
| }, |
| "rankme_stability": 0.28062874758414175 |
| } |
| } |