kotodama-108m-base / bcpt-analysis /training_summary.json
LuxiaSL's picture
Upload folder using huggingface_hub
085670e verified
{
"summary": {
"max_step": 17336,
"loss_initial": 2.7233519554138184,
"loss_final": 2.3416449626286826,
"loss_min": 2.2302972316741942,
"loss_min_step": 17260,
"ppl_final": 10.398327361926201,
"avg_tokens_per_sec": 1960335.6302571176,
"rankme_initial": 414.6168212890625,
"rankme_min": 414.6168212890625,
"rankme_min_step": 0,
"rankme_final": 431.12603759765625,
"rankme_rebound_ratio": 1.0398180089685358,
"ww_alpha_initial": 7.489332379498283,
"ww_alpha_final": 7.571896228633995,
"ww_healthy_frac_final": 0.15736040609137056,
"twonn_id_final": 6.818349838256836,
"tokens_B": 36.358324224
},
"dynamics": {
"max_step": 17336,
"landmarks": {
"pct_10": 1733,
"pct_25": 4334,
"pct_50": 8668,
"pct_75": 13002,
"pct_90": 15602
},
"metrics": {
"train/loss": {
"n_points": 1735,
"slopes_at_landmarks": {
"pct_10": {
"w100": -4.28877743807706e-05,
"w500": -1.9821559681611903e-05,
"w1000": -2.0593762133095024e-05,
"w5000": -3.688523080177231e-05
},
"pct_25": {
"w100": 0.0001107275558240486,
"w500": -3.8268886694387226e-05,
"w1000": -1.0966062831907276e-05,
"w5000": -9.462775826338291e-06
},
"pct_50": {
"w100": 0.00019825918024236506,
"w500": -3.840362209947456e-06,
"w1000": -1.657888490875455e-05,
"w5000": -4.913578465691426e-06
},
"pct_75": {
"w100": -2.04078500921076e-05,
"w500": -6.083978467485627e-06,
"w1000": -4.660763942261841e-06,
"w5000": -1.4281992576798431e-06
},
"pct_90": {
"w100": -0.0003515260002886256,
"w500": -3.884735576817397e-05,
"w1000": -1.6693517605964394e-05,
"w5000": -1.4644461233839928e-05
}
},
"inflection_points": [
{
"step": 1760,
"direction": "positive\u2192negative"
},
{
"step": 1890,
"direction": "negative\u2192positive"
},
{
"step": 2210,
"direction": "positive\u2192negative"
},
{
"step": 2380,
"direction": "negative\u2192positive"
},
{
"step": 2590,
"direction": "positive\u2192negative"
},
{
"step": 2970,
"direction": "negative\u2192positive"
},
{
"step": 3260,
"direction": "positive\u2192negative"
},
{
"step": 3400,
"direction": "negative\u2192positive"
},
{
"step": 3620,
"direction": "positive\u2192negative"
},
{
"step": 4000,
"direction": "negative\u2192positive"
},
{
"step": 4290,
"direction": "positive\u2192negative"
},
{
"step": 4500,
"direction": "negative\u2192positive"
},
{
"step": 5300,
"direction": "positive\u2192negative"
},
{
"step": 5880,
"direction": "negative\u2192positive"
},
{
"step": 6120,
"direction": "positive\u2192negative"
},
{
"step": 6280,
"direction": "negative\u2192positive"
},
{
"step": 6570,
"direction": "positive\u2192negative"
},
{
"step": 6920,
"direction": "negative\u2192positive"
},
{
"step": 7250,
"direction": "positive\u2192negative"
},
{
"step": 7320,
"direction": "negative\u2192positive"
},
{
"step": 7810,
"direction": "positive\u2192negative"
},
{
"step": 8050,
"direction": "negative\u2192positive"
},
{
"step": 8190,
"direction": "positive\u2192negative"
},
{
"step": 8600,
"direction": "negative\u2192positive"
},
{
"step": 9000,
"direction": "positive\u2192negative"
},
{
"step": 9070,
"direction": "negative\u2192positive"
},
{
"step": 9310,
"direction": "positive\u2192negative"
},
{
"step": 9670,
"direction": "negative\u2192positive"
},
{
"step": 10040,
"direction": "positive\u2192negative"
},
{
"step": 10730,
"direction": "negative\u2192positive"
},
{
"step": 11280,
"direction": "positive\u2192negative"
},
{
"step": 11960,
"direction": "negative\u2192positive"
},
{
"step": 12240,
"direction": "positive\u2192negative"
},
{
"step": 12700,
"direction": "negative\u2192positive"
},
{
"step": 13530,
"direction": "positive\u2192negative"
},
{
"step": 14200,
"direction": "negative\u2192positive"
},
{
"step": 14540,
"direction": "positive\u2192negative"
},
{
"step": 14870,
"direction": "negative\u2192positive"
},
{
"step": 14980,
"direction": "positive\u2192negative"
},
{
"step": 15280,
"direction": "negative\u2192positive"
},
{
"step": 15450,
"direction": "positive\u2192negative"
},
{
"step": 16410,
"direction": "negative\u2192positive"
}
],
"rolling_stats": [
{
"step_center": 250,
"mean": 2.5536492648124693,
"std": 0.06794927528669344,
"p25": 2.494672417640686,
"p75": 2.6106740951538088,
"n": 50
},
{
"step_center": 750,
"mean": 2.448444580078125,
"std": 0.02509056922621886,
"p25": 2.4327359437942504,
"p75": 2.465353083610535,
"n": 50
},
{
"step_center": 1250,
"mean": 2.4215744848251344,
"std": 0.026759712703853474,
"p25": 2.3993499994277956,
"p75": 2.437948441505432,
"n": 50
},
{
"step_center": 1750,
"mean": 2.4059697308540344,
"std": 0.025170641630145284,
"p25": 2.3834667682647703,
"p75": 2.424547815322876,
"n": 50
},
{
"step_center": 2250,
"mean": 2.3959087138175965,
"std": 0.023488664878493968,
"p25": 2.376794672012329,
"p75": 2.4120164632797243,
"n": 50
},
{
"step_center": 2750,
"mean": 2.393886389732361,
"std": 0.025725386553165204,
"p25": 2.373078489303589,
"p75": 2.4160205841064455,
"n": 50
},
{
"step_center": 3250,
"mean": 2.383705304145813,
"std": 0.02064650608402684,
"p25": 2.368594765663147,
"p75": 2.3968960523605345,
"n": 50
},
{
"step_center": 3750,
"mean": 2.379629012107849,
"std": 0.025284434826176497,
"p25": 2.36467387676239,
"p75": 2.400493335723877,
"n": 50
},
{
"step_center": 4250,
"mean": 2.370320144176483,
"std": 0.023835743914342983,
"p25": 2.3525022745132445,
"p75": 2.3869575023651124,
"n": 50
},
{
"step_center": 4750,
"mean": 2.367443947792053,
"std": 0.025451306105213456,
"p25": 2.3461528539657595,
"p75": 2.382859945297241,
"n": 50
},
{
"step_center": 5250,
"mean": 2.3662837538719175,
"std": 0.023101883335323745,
"p25": 2.3492135047912597,
"p75": 2.3814218521118162,
"n": 50
},
{
"step_center": 5750,
"mean": 2.3646195211410523,
"std": 0.023650718565722668,
"p25": 2.346358561515808,
"p75": 2.381751036643982,
"n": 50
},
{
"step_center": 6250,
"mean": 2.3603261704444884,
"std": 0.025939884185818045,
"p25": 2.341030740737915,
"p75": 2.380238080024719,
"n": 50
},
{
"step_center": 6750,
"mean": 2.3573404712677,
"std": 0.018924405171920438,
"p25": 2.3400394916534424,
"p75": 2.3729241132736205,
"n": 50
},
{
"step_center": 7250,
"mean": 2.346787754058838,
"std": 0.028397110004011492,
"p25": 2.3257726430892944,
"p75": 2.366376209259033,
"n": 50
},
{
"step_center": 7750,
"mean": 2.3498280205726623,
"std": 0.0222118732939722,
"p25": 2.335556411743164,
"p75": 2.361265182495117,
"n": 50
},
{
"step_center": 8250,
"mean": 2.348032169342041,
"std": 0.028050880889420815,
"p25": 2.3264118671417235,
"p75": 2.3686971426010133,
"n": 50
},
{
"step_center": 8750,
"mean": 2.34533736371994,
"std": 0.02437305733889354,
"p25": 2.3267060041427614,
"p75": 2.3611632585525513,
"n": 50
},
{
"step_center": 9250,
"mean": 2.3430724515914916,
"std": 0.02720907854868419,
"p25": 2.32452507019043,
"p75": 2.3617564916610716,
"n": 50
},
{
"step_center": 9750,
"mean": 2.3414637241363523,
"std": 0.026057814236921148,
"p25": 2.3215897560119627,
"p75": 2.357621693611145,
"n": 50
},
{
"step_center": 10250,
"mean": 2.3405535078048705,
"std": 0.02665497429150158,
"p25": 2.321121668815613,
"p75": 2.35941686630249,
"n": 50
},
{
"step_center": 10750,
"mean": 2.3323154163360598,
"std": 0.02170759677431937,
"p25": 2.3169103145599363,
"p75": 2.345703053474426,
"n": 50
},
{
"step_center": 11250,
"mean": 2.334129483699799,
"std": 0.023022957389190198,
"p25": 2.319528651237488,
"p75": 2.3511791706085203,
"n": 50
},
{
"step_center": 11750,
"mean": 2.3361384110450745,
"std": 0.02471085499602499,
"p25": 2.3174596309661863,
"p75": 2.350165772438049,
"n": 50
},
{
"step_center": 12250,
"mean": 2.3365752921104432,
"std": 0.027231387794148867,
"p25": 2.313059759140015,
"p75": 2.356953167915344,
"n": 50
},
{
"step_center": 12750,
"mean": 2.331771935462952,
"std": 0.025773844962216148,
"p25": 2.3146870851516725,
"p75": 2.3490057706832888,
"n": 50
},
{
"step_center": 13250,
"mean": 2.3304363389015195,
"std": 0.022590550447677177,
"p25": 2.314772891998291,
"p75": 2.350356388092041,
"n": 50
},
{
"step_center": 13750,
"mean": 2.332354657649994,
"std": 0.0278746048668042,
"p25": 2.3138842582702637,
"p75": 2.353069567680359,
"n": 50
},
{
"step_center": 14250,
"mean": 2.328071443080902,
"std": 0.027385776869959595,
"p25": 2.3080037593841554,
"p75": 2.350447750091553,
"n": 50
},
{
"step_center": 14750,
"mean": 2.3318208928108213,
"std": 0.024377556097741115,
"p25": 2.312630367279053,
"p75": 2.3485876083374024,
"n": 50
},
{
"step_center": 15250,
"mean": 2.3255881023406983,
"std": 0.02616183998425422,
"p25": 2.30555522441864,
"p75": 2.344960618019104,
"n": 50
},
{
"step_center": 15750,
"mean": 2.3202085700035098,
"std": 0.023800025651637833,
"p25": 2.3002145290374756,
"p75": 2.3358023881912233,
"n": 50
},
{
"step_center": 16250,
"mean": 2.2996851239204408,
"std": 0.026246680909362278,
"p25": 2.2763946056365967,
"p75": 2.3184911012649536,
"n": 50
},
{
"step_center": 16750,
"mean": 2.2773553647994995,
"std": 0.02097443365212286,
"p25": 2.2597983360290526,
"p75": 2.2906126737594605,
"n": 50
},
{
"step_center": 17250,
"mean": 2.282346619424366,
"std": 0.028653366820936898,
"p25": 2.2540221214294434,
"p75": 2.305117154121399,
"n": 35
}
],
"stability_score": 0.8442793520944198,
"plateaus": [],
"jumps": [
{
"step": 7300,
"delta": 0.11637275218963605,
"sigma": 3.41
},
{
"step": 9290,
"delta": 0.11292421817779541,
"sigma": 3.25
},
{
"step": 9920,
"delta": -0.10988917350769034,
"sigma": 3.1
}
],
"slope_sign_changes": [
{
"step": 4890,
"direction": "negative\u2192positive"
},
{
"step": 4900,
"direction": "positive\u2192negative"
},
{
"step": 5090,
"direction": "negative\u2192positive"
},
{
"step": 5100,
"direction": "positive\u2192negative"
},
{
"step": 5140,
"direction": "negative\u2192positive"
},
{
"step": 5150,
"direction": "positive\u2192negative"
},
{
"step": 5160,
"direction": "negative\u2192positive"
},
{
"step": 5260,
"direction": "positive\u2192negative"
},
{
"step": 5300,
"direction": "negative\u2192positive"
},
{
"step": 5460,
"direction": "positive\u2192negative"
},
{
"step": 5480,
"direction": "negative\u2192positive"
},
{
"step": 5510,
"direction": "positive\u2192negative"
},
{
"step": 7490,
"direction": "negative\u2192positive"
},
{
"step": 7510,
"direction": "positive\u2192negative"
},
{
"step": 7530,
"direction": "negative\u2192positive"
},
{
"step": 7540,
"direction": "positive\u2192negative"
},
{
"step": 7670,
"direction": "negative\u2192positive"
},
{
"step": 7790,
"direction": "positive\u2192negative"
},
{
"step": 7830,
"direction": "negative\u2192positive"
},
{
"step": 7850,
"direction": "positive\u2192negative"
},
{
"step": 7860,
"direction": "negative\u2192positive"
},
{
"step": 7940,
"direction": "positive\u2192negative"
},
{
"step": 8030,
"direction": "negative\u2192positive"
},
{
"step": 8100,
"direction": "positive\u2192negative"
},
{
"step": 8170,
"direction": "negative\u2192positive"
},
{
"step": 8290,
"direction": "positive\u2192negative"
},
{
"step": 8300,
"direction": "negative\u2192positive"
},
{
"step": 8310,
"direction": "positive\u2192negative"
},
{
"step": 8390,
"direction": "negative\u2192positive"
},
{
"step": 8400,
"direction": "positive\u2192negative"
},
{
"step": 8890,
"direction": "negative\u2192positive"
},
{
"step": 8920,
"direction": "positive\u2192negative"
},
{
"step": 9230,
"direction": "negative\u2192positive"
},
{
"step": 9250,
"direction": "positive\u2192negative"
},
{
"step": 9280,
"direction": "negative\u2192positive"
},
{
"step": 9290,
"direction": "positive\u2192negative"
},
{
"step": 9350,
"direction": "negative\u2192positive"
},
{
"step": 9360,
"direction": "positive\u2192negative"
},
{
"step": 9370,
"direction": "negative\u2192positive"
},
{
"step": 9380,
"direction": "positive\u2192negative"
},
{
"step": 9410,
"direction": "negative\u2192positive"
},
{
"step": 9420,
"direction": "positive\u2192negative"
},
{
"step": 9900,
"direction": "negative\u2192positive"
},
{
"step": 10080,
"direction": "positive\u2192negative"
},
{
"step": 10100,
"direction": "negative\u2192positive"
},
{
"step": 10220,
"direction": "positive\u2192negative"
},
{
"step": 10250,
"direction": "negative\u2192positive"
},
{
"step": 10270,
"direction": "positive\u2192negative"
},
{
"step": 10290,
"direction": "negative\u2192positive"
},
{
"step": 10320,
"direction": "positive\u2192negative"
},
{
"step": 10960,
"direction": "negative\u2192positive"
},
{
"step": 11560,
"direction": "positive\u2192negative"
},
{
"step": 11600,
"direction": "negative\u2192positive"
},
{
"step": 11750,
"direction": "positive\u2192negative"
},
{
"step": 11820,
"direction": "negative\u2192positive"
},
{
"step": 11830,
"direction": "positive\u2192negative"
},
{
"step": 12130,
"direction": "negative\u2192positive"
},
{
"step": 12150,
"direction": "positive\u2192negative"
},
{
"step": 12240,
"direction": "negative\u2192positive"
},
{
"step": 12290,
"direction": "positive\u2192negative"
},
{
"step": 12340,
"direction": "negative\u2192positive"
},
{
"step": 12360,
"direction": "positive\u2192negative"
},
{
"step": 13110,
"direction": "negative\u2192positive"
},
{
"step": 13120,
"direction": "positive\u2192negative"
},
{
"step": 13130,
"direction": "negative\u2192positive"
},
{
"step": 13240,
"direction": "positive\u2192negative"
},
{
"step": 13250,
"direction": "negative\u2192positive"
},
{
"step": 13750,
"direction": "positive\u2192negative"
},
{
"step": 13780,
"direction": "negative\u2192positive"
},
{
"step": 13790,
"direction": "positive\u2192negative"
},
{
"step": 13860,
"direction": "negative\u2192positive"
},
{
"step": 13870,
"direction": "positive\u2192negative"
},
{
"step": 13950,
"direction": "negative\u2192positive"
},
{
"step": 13990,
"direction": "positive\u2192negative"
},
{
"step": 14010,
"direction": "negative\u2192positive"
},
{
"step": 14030,
"direction": "positive\u2192negative"
},
{
"step": 14360,
"direction": "negative\u2192positive"
},
{
"step": 14370,
"direction": "positive\u2192negative"
},
{
"step": 14380,
"direction": "negative\u2192positive"
},
{
"step": 14390,
"direction": "positive\u2192negative"
},
{
"step": 14400,
"direction": "negative\u2192positive"
},
{
"step": 14410,
"direction": "positive\u2192negative"
},
{
"step": 14430,
"direction": "negative\u2192positive"
},
{
"step": 14550,
"direction": "positive\u2192negative"
},
{
"step": 14630,
"direction": "negative\u2192positive"
},
{
"step": 14710,
"direction": "positive\u2192negative"
},
{
"step": 14720,
"direction": "negative\u2192positive"
},
{
"step": 14730,
"direction": "positive\u2192negative"
},
{
"step": 14760,
"direction": "negative\u2192positive"
},
{
"step": 14770,
"direction": "positive\u2192negative"
},
{
"step": 14880,
"direction": "negative\u2192positive"
},
{
"step": 14890,
"direction": "positive\u2192negative"
},
{
"step": 14900,
"direction": "negative\u2192positive"
},
{
"step": 14910,
"direction": "positive\u2192negative"
},
{
"step": 14940,
"direction": "negative\u2192positive"
},
{
"step": 14950,
"direction": "positive\u2192negative"
},
{
"step": 14970,
"direction": "negative\u2192positive"
},
{
"step": 15010,
"direction": "positive\u2192negative"
},
{
"step": 15080,
"direction": "negative\u2192positive"
},
{
"step": 15090,
"direction": "positive\u2192negative"
},
{
"step": 16890,
"direction": "negative\u2192positive"
}
]
},
"geo/rankme_last": {
"n_points": 232,
"slopes_at_landmarks": {
"pct_10": {
"w100": null,
"w500": 0.0033722650437127974,
"w1000": 0.0013678783374828298,
"w5000": 0.0019266211783584095
},
"pct_25": {
"w100": null,
"w500": 0.0008363414946056547,
"w1000": 0.000515038347069597,
"w5000": 0.0009746530627070197
},
"pct_50": {
"w100": null,
"w500": 0.004231329055059524,
"w1000": 0.0006157128190819597,
"w5000": 0.00034299754947128433
},
"pct_75": {
"w100": null,
"w500": 0.0007408040364583334,
"w1000": 0.00018919467576694138,
"w5000": 0.00010008815155809676
},
"pct_90": {
"w100": null,
"w500": -0.0008631388346354167,
"w1000": 0.00013333386990613553,
"w5000": 0.00033015691526263127
}
},
"inflection_points": [],
"rolling_stats": [
{
"step_center": 250,
"mean": 418.8371276855469,
"std": 1.9706179295918487,
"p25": 414.6168212890625,
"p75": 419.4991760253906,
"n": 7
},
{
"step_center": 750,
"mean": 420.7526070731027,
"std": 1.1694264542259036,
"p25": 419.0769348144531,
"p75": 422.18658447265625,
"n": 7
},
{
"step_center": 1250,
"mean": 422.3680369059245,
"std": 0.612363257463856,
"p25": 421.7069091796875,
"p75": 422.3238830566406,
"n": 6
},
{
"step_center": 1750,
"mean": 423.146728515625,
"std": 0.7143769249048134,
"p25": 422.30780029296875,
"p75": 423.8494873046875,
"n": 7
},
{
"step_center": 2250,
"mean": 423.8157217843192,
"std": 0.6644047915811648,
"p25": 422.3829040527344,
"p75": 424.26641845703125,
"n": 7
},
{
"step_center": 2750,
"mean": 424.7442321777344,
"std": 0.38605224992275294,
"p25": 424.3502197265625,
"p75": 424.84075927734375,
"n": 6
},
{
"step_center": 3250,
"mean": 425.2377493722098,
"std": 0.9928863556386898,
"p25": 424.3253173828125,
"p75": 426.3678894042969,
"n": 7
},
{
"step_center": 3750,
"mean": 426.2940150669643,
"std": 0.36511910276686976,
"p25": 425.8962707519531,
"p75": 426.6614074707031,
"n": 7
},
{
"step_center": 4250,
"mean": 426.9090983072917,
"std": 0.5799380755235445,
"p25": 426.3441162109375,
"p75": 427.4007263183594,
"n": 6
},
{
"step_center": 4750,
"mean": 426.70751517159596,
"std": 0.3777274089993139,
"p25": 426.2264404296875,
"p75": 426.9776611328125,
"n": 7
},
{
"step_center": 5250,
"mean": 427.1932460239955,
"std": 0.3474182400524559,
"p25": 426.6791076660156,
"p75": 427.4221496582031,
"n": 7
},
{
"step_center": 5750,
"mean": 428.02931722005206,
"std": 0.7422348443206302,
"p25": 427.381591796875,
"p75": 428.8571472167969,
"n": 6
},
{
"step_center": 6250,
"mean": 427.67706298828125,
"std": 0.5068596593637932,
"p25": 427.1493225097656,
"p75": 428.06072998046875,
"n": 7
},
{
"step_center": 6750,
"mean": 428.2369689941406,
"std": 0.37135674083717257,
"p25": 427.6800537109375,
"p75": 428.4487609863281,
"n": 7
},
{
"step_center": 7250,
"mean": 428.5103810628255,
"std": 0.5867578912524614,
"p25": 427.70166015625,
"p75": 428.73895263671875,
"n": 6
},
{
"step_center": 7750,
"mean": 428.0627746582031,
"std": 0.5193927369314885,
"p25": 427.2740173339844,
"p75": 428.5312805175781,
"n": 7
},
{
"step_center": 8250,
"mean": 428.3086591448103,
"std": 0.5933176949414425,
"p25": 427.58868408203125,
"p75": 428.940185546875,
"n": 7
},
{
"step_center": 8750,
"mean": 428.5880533854167,
"std": 0.551078102138243,
"p25": 427.8064880371094,
"p75": 429.1161193847656,
"n": 6
},
{
"step_center": 9250,
"mean": 428.8888636997768,
"std": 0.5694960752056711,
"p25": 428.0375061035156,
"p75": 429.12469482421875,
"n": 7
},
{
"step_center": 9750,
"mean": 429.2879638671875,
"std": 0.7433667524753128,
"p25": 428.4444580078125,
"p75": 430.04833984375,
"n": 7
},
{
"step_center": 10250,
"mean": 429.48259989420575,
"std": 0.4522335608488025,
"p25": 429.08953857421875,
"p75": 429.6427001953125,
"n": 6
},
{
"step_center": 10750,
"mean": 429.14767020089283,
"std": 0.2757999472749072,
"p25": 428.6318664550781,
"p75": 429.35455322265625,
"n": 7
},
{
"step_center": 11250,
"mean": 429.68481881277904,
"std": 0.33605217382389435,
"p25": 429.0997314453125,
"p75": 429.8418884277344,
"n": 7
},
{
"step_center": 11750,
"mean": 429.19440205891925,
"std": 0.5124843902568214,
"p25": 428.5133361816406,
"p75": 429.4604187011719,
"n": 6
},
{
"step_center": 12250,
"mean": 429.8018711635045,
"std": 0.497040027800742,
"p25": 428.8563232421875,
"p75": 430.1318054199219,
"n": 7
},
{
"step_center": 12750,
"mean": 429.53719220842635,
"std": 0.6863248219684418,
"p25": 428.7851867675781,
"p75": 429.690673828125,
"n": 7
},
{
"step_center": 13250,
"mean": 429.95374552408856,
"std": 0.7975096227032695,
"p25": 428.9168701171875,
"p75": 430.7697448730469,
"n": 6
},
{
"step_center": 13750,
"mean": 429.67769949776783,
"std": 0.6044692586774199,
"p25": 428.8565368652344,
"p75": 430.0766296386719,
"n": 7
},
{
"step_center": 14250,
"mean": 429.861807686942,
"std": 0.7339570078253661,
"p25": 428.733642578125,
"p75": 430.5356140136719,
"n": 7
},
{
"step_center": 14750,
"mean": 429.5887451171875,
"std": 0.9283298841866231,
"p25": 428.0323791503906,
"p75": 430.1398010253906,
"n": 6
},
{
"step_center": 15250,
"mean": 429.84422084263394,
"std": 0.412175806266064,
"p25": 429.30645751953125,
"p75": 430.1964111328125,
"n": 7
},
{
"step_center": 15750,
"mean": 430.33884974888394,
"std": 0.47739178059533427,
"p25": 429.5009765625,
"p75": 430.82623291015625,
"n": 7
},
{
"step_center": 16250,
"mean": 430.0373840332031,
"std": 0.5542415663214534,
"p25": 429.2351989746094,
"p75": 430.5093688964844,
"n": 6
},
{
"step_center": 16750,
"mean": 430.9304722377232,
"std": 0.27740500209971125,
"p25": 430.6304931640625,
"p75": 431.13427734375,
"n": 7
},
{
"step_center": 17250,
"mean": 430.9951477050781,
"std": 0.11213697889472371,
"p25": 430.8769836425781,
"p75": 431.0931701660156,
"n": 5
}
],
"stability_score": 0.633204481136617,
"plateaus": [],
"jumps": [],
"slope_sign_changes": [
{
"step": 4575,
"direction": "positive\u2192negative"
},
{
"step": 4875,
"direction": "negative\u2192positive"
},
{
"step": 6000,
"direction": "positive\u2192negative"
},
{
"step": 6375,
"direction": "negative\u2192positive"
},
{
"step": 7125,
"direction": "positive\u2192negative"
},
{
"step": 7650,
"direction": "negative\u2192positive"
},
{
"step": 8175,
"direction": "positive\u2192negative"
},
{
"step": 8325,
"direction": "negative\u2192positive"
},
{
"step": 9675,
"direction": "positive\u2192negative"
},
{
"step": 9750,
"direction": "negative\u2192positive"
},
{
"step": 10125,
"direction": "positive\u2192negative"
},
{
"step": 10200,
"direction": "negative\u2192positive"
},
{
"step": 10275,
"direction": "positive\u2192negative"
},
{
"step": 10725,
"direction": "negative\u2192positive"
},
{
"step": 11325,
"direction": "positive\u2192negative"
},
{
"step": 11775,
"direction": "negative\u2192positive"
},
{
"step": 12300,
"direction": "positive\u2192negative"
},
{
"step": 12825,
"direction": "negative\u2192positive"
},
{
"step": 13425,
"direction": "positive\u2192negative"
},
{
"step": 13800,
"direction": "negative\u2192positive"
},
{
"step": 13950,
"direction": "positive\u2192negative"
},
{
"step": 14400,
"direction": "negative\u2192positive"
},
{
"step": 14475,
"direction": "positive\u2192negative"
},
{
"step": 14700,
"direction": "negative\u2192positive"
},
{
"step": 15750,
"direction": "positive\u2192negative"
},
{
"step": 16050,
"direction": "negative\u2192positive"
}
]
}
}
},
"geometric_health": {
"layers": [
0,
7,
14,
21,
27
],
"landmarks": {
"early": 1725,
"quarter": 4350,
"mid": 8700,
"three_quarter": 12975,
"late": 15600,
"final": 17325
},
"profiles": {
"early": {
"step": 1725,
"layer_0": {
"stable_rank_q_proj": 20.133892059326172,
"stable_rank_k_proj": 17.006507873535156,
"stable_rank_o_proj": 46.15428161621094,
"stable_rank_gate_proj": 129.89878845214844,
"stable_rank_down_proj": 55.68263244628906,
"attn_entropy_mean": 6.25703239440918,
"attn_entropy_std": 0.4080713391304016,
"anisotropy": 0.06490539014339447,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 42.706878662109375,
"stable_rank_k_proj": 40.13029861450195,
"stable_rank_o_proj": 89.30697631835938,
"stable_rank_gate_proj": 79.12543487548828,
"stable_rank_down_proj": 143.33609008789062,
"attn_entropy_mean": 4.700146675109863,
"attn_entropy_std": 0.7736154198646545,
"anisotropy": 0.4299015402793884,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 50.8188362121582,
"stable_rank_k_proj": 41.04155349731445,
"stable_rank_o_proj": 42.97220993041992,
"stable_rank_gate_proj": 71.69290161132812,
"stable_rank_down_proj": 126.66923522949219,
"attn_entropy_mean": 5.53626823425293,
"attn_entropy_std": 0.4032261073589325,
"anisotropy": 0.39128610491752625,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 40.08066940307617,
"stable_rank_k_proj": 29.80875587463379,
"stable_rank_o_proj": 67.41558837890625,
"stable_rank_gate_proj": 63.583194732666016,
"stable_rank_down_proj": 49.89845275878906,
"attn_entropy_mean": 5.884044647216797,
"attn_entropy_std": 0.31702759861946106,
"anisotropy": 0.1451684832572937,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 44.02388381958008,
"stable_rank_k_proj": 31.312538146972656,
"stable_rank_o_proj": 114.83377838134766,
"stable_rank_gate_proj": 75.68281555175781,
"stable_rank_down_proj": 127.66222381591797,
"attn_entropy_mean": 4.35567045211792,
"attn_entropy_std": 0.6677613854408264,
"anisotropy": 0.10006999224424362,
"dead_units": 0.0
}
},
"quarter": {
"step": 4350,
"layer_0": {
"stable_rank_q_proj": 20.62217140197754,
"stable_rank_k_proj": 17.198904037475586,
"stable_rank_o_proj": 45.66370391845703,
"stable_rank_gate_proj": 130.02139282226562,
"stable_rank_down_proj": 56.49225616455078,
"attn_entropy_mean": 6.258755207061768,
"attn_entropy_std": 0.4235442876815796,
"anisotropy": 0.06752505153417587,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 42.567344665527344,
"stable_rank_k_proj": 39.40324783325195,
"stable_rank_o_proj": 89.7774887084961,
"stable_rank_gate_proj": 79.37095642089844,
"stable_rank_down_proj": 143.08059692382812,
"attn_entropy_mean": 4.703330039978027,
"attn_entropy_std": 0.7717511653900146,
"anisotropy": 0.4199843406677246,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 51.64799880981445,
"stable_rank_k_proj": 41.63627243041992,
"stable_rank_o_proj": 42.701908111572266,
"stable_rank_gate_proj": 71.93598937988281,
"stable_rank_down_proj": 127.28962707519531,
"attn_entropy_mean": 5.524231910705566,
"attn_entropy_std": 0.43370646238327026,
"anisotropy": 0.3828665316104889,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 39.59270095825195,
"stable_rank_k_proj": 29.09538459777832,
"stable_rank_o_proj": 66.52333068847656,
"stable_rank_gate_proj": 62.608497619628906,
"stable_rank_down_proj": 49.814796447753906,
"attn_entropy_mean": 5.853401184082031,
"attn_entropy_std": 0.3115076422691345,
"anisotropy": 0.1433832198381424,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 43.69539260864258,
"stable_rank_k_proj": 30.684505462646484,
"stable_rank_o_proj": 110.17560577392578,
"stable_rank_gate_proj": 73.6478271484375,
"stable_rank_down_proj": 127.23016357421875,
"attn_entropy_mean": 4.3218841552734375,
"attn_entropy_std": 0.6490551233291626,
"anisotropy": 0.10033217072486877,
"dead_units": 0.0
}
},
"mid": {
"step": 8700,
"layer_0": {
"stable_rank_q_proj": 20.74656105041504,
"stable_rank_k_proj": 17.162168502807617,
"stable_rank_o_proj": 44.727996826171875,
"stable_rank_gate_proj": 127.23369598388672,
"stable_rank_down_proj": 56.846107482910156,
"attn_entropy_mean": 6.235823631286621,
"attn_entropy_std": 0.43743276596069336,
"anisotropy": 0.06675643473863602,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 42.293479919433594,
"stable_rank_k_proj": 38.610774993896484,
"stable_rank_o_proj": 89.17684936523438,
"stable_rank_gate_proj": 79.0400161743164,
"stable_rank_down_proj": 144.4713134765625,
"attn_entropy_mean": 4.706167697906494,
"attn_entropy_std": 0.7591027617454529,
"anisotropy": 0.4047275185585022,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 51.58811569213867,
"stable_rank_k_proj": 42.609825134277344,
"stable_rank_o_proj": 42.335105895996094,
"stable_rank_gate_proj": 71.9311752319336,
"stable_rank_down_proj": 126.46110534667969,
"attn_entropy_mean": 5.540119171142578,
"attn_entropy_std": 0.45732536911964417,
"anisotropy": 0.37118563055992126,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 39.304080963134766,
"stable_rank_k_proj": 28.829919815063477,
"stable_rank_o_proj": 65.58588409423828,
"stable_rank_gate_proj": 61.5308837890625,
"stable_rank_down_proj": 49.71337127685547,
"attn_entropy_mean": 5.866855621337891,
"attn_entropy_std": 0.3209395408630371,
"anisotropy": 0.13976767659187317,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 44.39973449707031,
"stable_rank_k_proj": 30.10407829284668,
"stable_rank_o_proj": 107.94586181640625,
"stable_rank_gate_proj": 71.3941879272461,
"stable_rank_down_proj": 129.004150390625,
"attn_entropy_mean": 4.340854644775391,
"attn_entropy_std": 0.6970290541648865,
"anisotropy": 0.09302495419979095,
"dead_units": 0.0
}
},
"three_quarter": {
"step": 12975,
"layer_0": {
"stable_rank_q_proj": 20.623004913330078,
"stable_rank_k_proj": 16.95781898498535,
"stable_rank_o_proj": 43.821876525878906,
"stable_rank_gate_proj": 125.1052474975586,
"stable_rank_down_proj": 57.683834075927734,
"attn_entropy_mean": 6.233590126037598,
"attn_entropy_std": 0.45708325505256653,
"anisotropy": 0.06800613552331924,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 41.80685043334961,
"stable_rank_k_proj": 38.90849685668945,
"stable_rank_o_proj": 88.35002136230469,
"stable_rank_gate_proj": 78.26079559326172,
"stable_rank_down_proj": 144.59579467773438,
"attn_entropy_mean": 4.734737396240234,
"attn_entropy_std": 0.7458856701850891,
"anisotropy": 0.40183892846107483,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 51.84466552734375,
"stable_rank_k_proj": 43.42570114135742,
"stable_rank_o_proj": 42.2808723449707,
"stable_rank_gate_proj": 71.79308319091797,
"stable_rank_down_proj": 127.38919067382812,
"attn_entropy_mean": 5.545279026031494,
"attn_entropy_std": 0.4711344242095947,
"anisotropy": 0.37494924664497375,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 38.30750274658203,
"stable_rank_k_proj": 28.626514434814453,
"stable_rank_o_proj": 65.13455963134766,
"stable_rank_gate_proj": 59.88165283203125,
"stable_rank_down_proj": 48.73686981201172,
"attn_entropy_mean": 5.843373775482178,
"attn_entropy_std": 0.332601934671402,
"anisotropy": 0.1376378834247589,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 44.75471878051758,
"stable_rank_k_proj": 30.430049896240234,
"stable_rank_o_proj": 106.74622344970703,
"stable_rank_gate_proj": 69.8398666381836,
"stable_rank_down_proj": 130.29200744628906,
"attn_entropy_mean": 4.317837715148926,
"attn_entropy_std": 0.69074946641922,
"anisotropy": 0.10297069698572159,
"dead_units": 0.0
}
},
"late": {
"step": 15600,
"layer_0": {
"stable_rank_q_proj": 20.5914363861084,
"stable_rank_k_proj": 16.745973587036133,
"stable_rank_o_proj": 43.73870849609375,
"stable_rank_gate_proj": 123.3493423461914,
"stable_rank_down_proj": 57.89291000366211,
"attn_entropy_mean": 6.228974342346191,
"attn_entropy_std": 0.46670883893966675,
"anisotropy": 0.06984122097492218,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 41.609771728515625,
"stable_rank_k_proj": 38.61107635498047,
"stable_rank_o_proj": 87.21759796142578,
"stable_rank_gate_proj": 77.33723449707031,
"stable_rank_down_proj": 143.90965270996094,
"attn_entropy_mean": 4.746259689331055,
"attn_entropy_std": 0.7679674029350281,
"anisotropy": 0.38015806674957275,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 51.66292190551758,
"stable_rank_k_proj": 44.319820404052734,
"stable_rank_o_proj": 42.175537109375,
"stable_rank_gate_proj": 71.88668823242188,
"stable_rank_down_proj": 126.96123504638672,
"attn_entropy_mean": 5.53586483001709,
"attn_entropy_std": 0.45710787177085876,
"anisotropy": 0.36756598949432373,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 38.205936431884766,
"stable_rank_k_proj": 28.410799026489258,
"stable_rank_o_proj": 64.53252410888672,
"stable_rank_gate_proj": 59.53862380981445,
"stable_rank_down_proj": 48.487876892089844,
"attn_entropy_mean": 5.86893367767334,
"attn_entropy_std": 0.3390011489391327,
"anisotropy": 0.1381273865699768,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 45.488624572753906,
"stable_rank_k_proj": 30.428009033203125,
"stable_rank_o_proj": 106.52822875976562,
"stable_rank_gate_proj": 68.88688659667969,
"stable_rank_down_proj": 129.5972442626953,
"attn_entropy_mean": 4.31506872177124,
"attn_entropy_std": 0.6959003210067749,
"anisotropy": 0.09733413904905319,
"dead_units": 0.0
}
},
"final": {
"step": 17325,
"layer_0": {
"stable_rank_q_proj": 20.46058464050293,
"stable_rank_k_proj": 16.590166091918945,
"stable_rank_o_proj": 43.638336181640625,
"stable_rank_gate_proj": 122.99588775634766,
"stable_rank_down_proj": 57.99134063720703,
"attn_entropy_mean": 6.221095561981201,
"attn_entropy_std": 0.4666074216365814,
"anisotropy": 0.06611216813325882,
"dead_units": 0.0
},
"layer_7": {
"stable_rank_q_proj": 41.592491149902344,
"stable_rank_k_proj": 38.8099479675293,
"stable_rank_o_proj": 86.76081848144531,
"stable_rank_gate_proj": 76.83895874023438,
"stable_rank_down_proj": 144.66598510742188,
"attn_entropy_mean": 4.743325710296631,
"attn_entropy_std": 0.7405821681022644,
"anisotropy": 0.3939518332481384,
"dead_units": 0.0
},
"layer_14": {
"stable_rank_q_proj": 51.51805114746094,
"stable_rank_k_proj": 44.543739318847656,
"stable_rank_o_proj": 42.110252380371094,
"stable_rank_gate_proj": 71.66548156738281,
"stable_rank_down_proj": 126.4752426147461,
"attn_entropy_mean": 5.5013909339904785,
"attn_entropy_std": 0.4690777361392975,
"anisotropy": 0.3837595582008362,
"dead_units": 0.0
},
"layer_21": {
"stable_rank_q_proj": 38.228172302246094,
"stable_rank_k_proj": 28.443443298339844,
"stable_rank_o_proj": 64.04421997070312,
"stable_rank_gate_proj": 59.131065368652344,
"stable_rank_down_proj": 48.718719482421875,
"attn_entropy_mean": 5.846819877624512,
"attn_entropy_std": 0.3328602910041809,
"anisotropy": 0.1341758519411087,
"dead_units": 0.0
},
"layer_27": {
"stable_rank_q_proj": 45.467472076416016,
"stable_rank_k_proj": 30.417682647705078,
"stable_rank_o_proj": 106.4880599975586,
"stable_rank_gate_proj": 68.29488372802734,
"stable_rank_down_proj": 129.311279296875,
"attn_entropy_mean": 4.277439594268799,
"attn_entropy_std": 0.7137026190757751,
"anisotropy": 0.09188564866781235,
"dead_units": 0.0
}
}
},
"depth_gradient": {
"layers": {
"first": 0,
"mid": 14,
"last": 27
},
"attn_entropy_mean": {
"first": 6.221095561981201,
"mid": 5.5013909339904785,
"last": 4.277439594268799
},
"anisotropy": {
"first": 0.06611216813325882,
"mid": 0.3837595582008362,
"last": 0.09188564866781235
},
"stable_rank_q_proj": {
"first": 20.46058464050293,
"mid": 51.51805114746094,
"last": 45.467472076416016
}
},
"rankme_stability": 0.28062874758414175
}
}