| {"ts": "2025-12-27T19:40:44", "step": 5, "epoch": 0.02, "grpo_mean_advantage": -1.3560057254835556e-07, "grpo_std_advantage": 3.0318567496578908e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5922331809997559} |
| {"ts": "2025-12-27T19:46:55", "step": 5, "epoch": 0.02, "grpo_mean_advantage": -1.3560057254835556e-07, "grpo_std_advantage": 3.0318567496578908e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5922331809997559} |
| {"ts": "2025-12-27T19:48:18", "step": 10, "epoch": 0.04, "grpo_mean_advantage": 3.6619603633880615e-06, "grpo_std_advantage": 1.6246918676188216e-05, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5561589002609253} |
| {"ts": "2025-12-27T19:49:40", "step": 15, "epoch": 0.0601, "grpo_mean_advantage": -1.0654330395709621e-07, "grpo_std_advantage": 5.399440965447866e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5759152173995972} |
| {"ts": "2025-12-27T19:51:01", "step": 20, "epoch": 0.0801, "grpo_mean_advantage": -5.871057737749652e-07, "grpo_std_advantage": 2.6951597646984737e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5127314329147339} |
| {"ts": "2025-12-27T19:52:23", "step": 25, "epoch": 0.1001, "grpo_mean_advantage": 6.370246410369873e-07, "grpo_std_advantage": 2.8908377771585947e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.539706826210022} |
| {"ts": "2025-12-27T19:53:48", "step": 30, "epoch": 0.1201, "grpo_mean_advantage": 6.705522359595761e-09, "grpo_std_advantage": 6.189450800775376e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5812538862228394} |
| {"ts": "2025-12-27T19:55:13", "step": 35, "epoch": 0.1401, "grpo_mean_advantage": 3.859400692363124e-07, "grpo_std_advantage": 1.6833292875162442e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5909844636917114} |
| {"ts": "2025-12-27T19:56:38", "step": 40, "epoch": 0.1602, "grpo_mean_advantage": 2.600252742013254e-07, "grpo_std_advantage": 1.4095899132371414e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5630953907966614} |
| {"ts": "2025-12-27T19:58:00", "step": 45, "epoch": 0.1802, "grpo_mean_advantage": -1.2591480924584175e-07, "grpo_std_advantage": 1.0309080380466185e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5604403614997864} |
| {"ts": "2025-12-27T19:59:27", "step": 50, "epoch": 0.2002, "grpo_mean_advantage": -2.808868941883702e-07, "grpo_std_advantage": 1.5696078889959608e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5971035957336426} |
| {"ts": "2025-12-27T20:00:53", "step": 55, "epoch": 0.2202, "grpo_mean_advantage": 2.6822089438383045e-08, "grpo_std_advantage": 3.7878271541558206e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5892971754074097} |
| {"ts": "2025-12-27T20:02:17", "step": 60, "epoch": 0.2402, "grpo_mean_advantage": -5.662441182607836e-08, "grpo_std_advantage": 6.128998393251095e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.564322292804718} |
| {"ts": "2025-12-27T20:03:45", "step": 65, "epoch": 0.2603, "grpo_mean_advantage": -1.5944242193199898e-07, "grpo_std_advantage": 1.6374274309782777e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.562497615814209} |
| {"ts": "2025-12-27T20:05:10", "step": 70, "epoch": 0.2803, "grpo_mean_advantage": 1.6838312433264946e-07, "grpo_std_advantage": 8.536571272088622e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5904761552810669} |
| {"ts": "2025-12-27T20:06:36", "step": 75, "epoch": 0.3003, "grpo_mean_advantage": 1.1175870895385742e-07, "grpo_std_advantage": 6.451961667153228e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5765624046325684} |
| {"ts": "2025-12-27T20:08:03", "step": 80, "epoch": 0.3203, "grpo_mean_advantage": -1.4603138254187797e-07, "grpo_std_advantage": 1.1309343790344428e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5858271718025208} |
| {"ts": "2025-12-27T20:09:31", "step": 85, "epoch": 0.3403, "grpo_mean_advantage": -1.817941665649414e-06, "grpo_std_advantage": 1.1141768482048064e-05, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5871662497520447} |
| {"ts": "2025-12-27T20:10:55", "step": 90, "epoch": 0.3604, "grpo_mean_advantage": 1.8179416372277046e-07, "grpo_std_advantage": 6.210335072864837e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5330992937088013} |
| {"ts": "2025-12-27T20:12:19", "step": 95, "epoch": 0.3804, "grpo_mean_advantage": -2.972781771859445e-07, "grpo_std_advantage": 3.1582342217006953e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5265295505523682} |
| {"ts": "2025-12-27T20:13:46", "step": 100, "epoch": 0.4004, "grpo_mean_advantage": -7.033348197182931e-07, "grpo_std_advantage": 4.245831405569334e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5660771131515503} |
| {"ts": "2025-12-27T20:15:15", "step": 105, "epoch": 0.4204, "grpo_mean_advantage": 1.1920928955078125e-07, "grpo_std_advantage": 3.2809634831210133e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.57631915807724} |
| {"ts": "2025-12-27T20:16:39", "step": 110, "epoch": 0.4404, "grpo_mean_advantage": -4.0978193283081055e-07, "grpo_std_advantage": 6.0397578636184335e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.546563982963562} |
| {"ts": "2025-12-27T20:18:01", "step": 115, "epoch": 0.4605, "grpo_mean_advantage": -1.467764434437413e-07, "grpo_std_advantage": 2.2689375782647403e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5519219636917114} |
| {"ts": "2025-12-27T20:19:25", "step": 120, "epoch": 0.4805, "grpo_mean_advantage": -5.215406329028838e-09, "grpo_std_advantage": 7.929010621410271e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5490407943725586} |
| {"ts": "2025-12-27T20:20:48", "step": 125, "epoch": 0.5005, "grpo_mean_advantage": -5.7369469175228005e-08, "grpo_std_advantage": 1.2823379620385822e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5646580457687378} |
| {"ts": "2025-12-27T20:22:09", "step": 130, "epoch": 0.5205, "grpo_mean_advantage": 2.9876827056796174e-07, "grpo_std_advantage": 1.0496698905626545e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.6111599802970886} |
| {"ts": "2025-12-27T20:23:32", "step": 135, "epoch": 0.5405, "grpo_mean_advantage": 1.5869736103013565e-07, "grpo_std_advantage": 1.2748531617035042e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5619662404060364} |
| {"ts": "2025-12-27T20:24:57", "step": 140, "epoch": 0.5606, "grpo_mean_advantage": 3.0100346748440643e-07, "grpo_std_advantage": 2.4499684059264837e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5795454978942871} |
| {"ts": "2025-12-27T20:26:19", "step": 145, "epoch": 0.5806, "grpo_mean_advantage": -3.233552092751779e-07, "grpo_std_advantage": 1.248456669600273e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5804953575134277} |
| {"ts": "2025-12-27T20:27:43", "step": 150, "epoch": 0.6006, "grpo_mean_advantage": 3.2261013416245987e-07, "grpo_std_advantage": 1.4773489738217904e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5628539323806763} |
| {"ts": "2025-12-27T20:29:07", "step": 155, "epoch": 0.6206, "grpo_mean_advantage": -2.5331974029541016e-07, "grpo_std_advantage": 1.5092309695319273e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5727725625038147} |
| {"ts": "2025-12-27T20:30:30", "step": 160, "epoch": 0.6406, "grpo_mean_advantage": -6.780028627417778e-08, "grpo_std_advantage": 8.550978805033083e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5833909511566162} |
| {"ts": "2025-12-27T20:31:51", "step": 165, "epoch": 0.6607, "grpo_mean_advantage": -5.587935447692871e-08, "grpo_std_advantage": 3.564579174053506e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5742615461349487} |
| {"ts": "2025-12-27T20:33:17", "step": 170, "epoch": 0.6807, "grpo_mean_advantage": -5.327165126800537e-07, "grpo_std_advantage": 2.309018327650847e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5758188962936401} |
| {"ts": "2025-12-27T20:34:40", "step": 175, "epoch": 0.7007, "grpo_mean_advantage": 5.863606702405377e-07, "grpo_std_advantage": 2.4449204829579685e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5767683982849121} |
| {"ts": "2025-12-27T20:36:09", "step": 180, "epoch": 0.7207, "grpo_mean_advantage": 3.2186508747145126e-07, "grpo_std_advantage": 2.293551688126172e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.586772084236145} |
| {"ts": "2025-12-27T20:37:33", "step": 185, "epoch": 0.7407, "grpo_mean_advantage": -4.470348358154297e-08, "grpo_std_advantage": 3.7067667335577426e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.549396276473999} |
| {"ts": "2025-12-27T20:38:56", "step": 190, "epoch": 0.7608, "grpo_mean_advantage": -2.1010637851759384e-07, "grpo_std_advantage": 1.1695076409523608e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5798425078392029} |
| {"ts": "2025-12-27T20:40:21", "step": 195, "epoch": 0.7808, "grpo_mean_advantage": 1.765787658314366e-07, "grpo_std_advantage": 2.429934738756856e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5584167838096619} |
| {"ts": "2025-12-27T20:41:46", "step": 200, "epoch": 0.8008, "grpo_mean_advantage": 1.6540289493605087e-07, "grpo_std_advantage": 2.6342788714828203e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5676193237304688} |
| {"ts": "2025-12-27T20:43:13", "step": 205, "epoch": 0.8208, "grpo_mean_advantage": -1.0944902442133753e-06, "grpo_std_advantage": 5.346942998585291e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5669739842414856} |
| {"ts": "2025-12-27T20:44:38", "step": 210, "epoch": 0.8408, "grpo_mean_advantage": 2.4065374759629776e-07, "grpo_std_advantage": 1.6327536513927043e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5922158360481262} |
| {"ts": "2025-12-27T20:46:02", "step": 215, "epoch": 0.8609, "grpo_mean_advantage": -5.21540641784668e-08, "grpo_std_advantage": 5.847922466273303e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5473950505256653} |
| {"ts": "2025-12-27T20:47:28", "step": 220, "epoch": 0.8809, "grpo_mean_advantage": 6.541609991472797e-07, "grpo_std_advantage": 4.072162937518442e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5880032777786255} |
| {"ts": "2025-12-27T20:48:53", "step": 225, "epoch": 0.9009, "grpo_mean_advantage": -1.2218951894737984e-07, "grpo_std_advantage": 4.386006935419573e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5835092663764954} |
| {"ts": "2025-12-27T20:50:20", "step": 230, "epoch": 0.9209, "grpo_mean_advantage": 1.7605722177904681e-06, "grpo_std_advantage": 8.007580618141219e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5394966006278992} |
| {"ts": "2025-12-27T20:51:41", "step": 235, "epoch": 0.9409, "grpo_mean_advantage": -3.3080578987210174e-07, "grpo_std_advantage": 1.551636614749441e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5687432289123535} |
| {"ts": "2025-12-27T20:53:06", "step": 240, "epoch": 0.961, "grpo_mean_advantage": 2.712011450967111e-07, "grpo_std_advantage": 1.4400844747797237e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5550583600997925} |
| {"ts": "2025-12-27T20:54:27", "step": 245, "epoch": 0.981, "grpo_mean_advantage": -3.2857059295565705e-07, "grpo_std_advantage": 2.105091425619321e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.558111310005188} |
| {"ts": "2025-12-27T20:55:45", "step": 250, "epoch": 1.0, "grpo_mean_advantage": 4.470348358154297e-08, "grpo_std_advantage": 5.315724820320611e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.6196198463439941} |
| {"ts": "2025-12-27T20:57:06", "step": 255, "epoch": 1.02, "grpo_mean_advantage": 9.290873776990338e-07, "grpo_std_advantage": 4.219644324621186e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.582168459892273} |
| {"ts": "2025-12-27T20:58:26", "step": 260, "epoch": 1.04, "grpo_mean_advantage": 2.533197474008375e-08, "grpo_std_advantage": 1.6600588992332632e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5551307797431946} |
| {"ts": "2025-12-27T20:59:48", "step": 265, "epoch": 1.0601, "grpo_mean_advantage": -5.662441182607836e-08, "grpo_std_advantage": 1.0909400316450046e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.535040020942688} |
| {"ts": "2025-12-27T21:01:15", "step": 270, "epoch": 1.0801, "grpo_mean_advantage": -9.536743306171047e-08, "grpo_std_advantage": 5.838213610331877e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5673571825027466} |
| {"ts": "2025-12-27T21:02:37", "step": 275, "epoch": 1.1001, "grpo_mean_advantage": 3.278255533700758e-08, "grpo_std_advantage": 9.317170679423725e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5874732732772827} |
| {"ts": "2025-12-27T21:03:59", "step": 280, "epoch": 1.1201, "grpo_mean_advantage": -1.206994113545079e-07, "grpo_std_advantage": 6.201085511747806e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5569106340408325} |
| {"ts": "2025-12-27T21:05:19", "step": 285, "epoch": 1.1401, "grpo_mean_advantage": 4.470348358154297e-08, "grpo_std_advantage": 6.115651558502577e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5578873157501221} |
| {"ts": "2025-12-27T21:06:43", "step": 290, "epoch": 1.1602, "grpo_mean_advantage": -3.3453108017056365e-07, "grpo_std_advantage": 3.5326345368957845e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5735999345779419} |
| {"ts": "2025-12-27T21:08:10", "step": 295, "epoch": 1.1802, "grpo_mean_advantage": -1.110136480519941e-07, "grpo_std_advantage": 4.731904823529476e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5626259446144104} |
| {"ts": "2025-12-27T21:09:34", "step": 300, "epoch": 1.2002, "grpo_mean_advantage": -5.08874677507265e-07, "grpo_std_advantage": 1.840126174101897e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5463050603866577} |
| {"ts": "2025-12-27T21:10:59", "step": 305, "epoch": 1.2202, "grpo_mean_advantage": 1.01327898960335e-07, "grpo_std_advantage": 7.798533943059738e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5352144241333008} |
| {"ts": "2025-12-27T21:12:25", "step": 310, "epoch": 1.2402, "grpo_mean_advantage": 1.341104507446289e-07, "grpo_std_advantage": 7.821902840987605e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5547868013381958} |
| {"ts": "2025-12-27T21:13:52", "step": 315, "epoch": 1.2603, "grpo_mean_advantage": 9.015202806494926e-08, "grpo_std_advantage": 1.0693488547985908e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5859472751617432} |
| {"ts": "2025-12-27T21:15:21", "step": 320, "epoch": 1.2803, "grpo_mean_advantage": -2.443790378947597e-07, "grpo_std_advantage": 1.183122208203713e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5751550793647766} |
| {"ts": "2025-12-27T21:16:47", "step": 325, "epoch": 1.3003, "grpo_mean_advantage": -6.705522537231445e-08, "grpo_std_advantage": 6.109748937888071e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5497723817825317} |
| {"ts": "2025-12-27T21:18:13", "step": 330, "epoch": 1.3203, "grpo_mean_advantage": -1.639127766850379e-08, "grpo_std_advantage": 5.529495297196263e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.55989670753479} |
| {"ts": "2025-12-27T21:19:37", "step": 335, "epoch": 1.3403, "grpo_mean_advantage": 4.418194237132411e-07, "grpo_std_advantage": 2.9275292945385445e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5809233784675598} |
| {"ts": "2025-12-27T21:20:59", "step": 340, "epoch": 1.3604, "grpo_mean_advantage": 9.685754776000977e-08, "grpo_std_advantage": 3.754235251562932e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5568087100982666} |
| {"ts": "2025-12-27T21:22:27", "step": 345, "epoch": 1.3804, "grpo_mean_advantage": -2.384185791015625e-07, "grpo_std_advantage": 6.821086913078034e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5655568838119507} |
| {"ts": "2025-12-27T21:23:49", "step": 350, "epoch": 1.4004, "grpo_mean_advantage": -8.717179156292332e-08, "grpo_std_advantage": 2.500940354366321e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.6089578866958618} |
| {"ts": "2025-12-27T21:25:13", "step": 355, "epoch": 1.4204, "grpo_mean_advantage": 2.1606683731079102e-07, "grpo_std_advantage": 1.4568390724889468e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.6129671335220337} |
| {"ts": "2025-12-27T21:26:36", "step": 360, "epoch": 1.4404, "grpo_mean_advantage": -3.725290298461914e-09, "grpo_std_advantage": 2.965894054796081e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5562310814857483} |
| {"ts": "2025-12-27T21:27:58", "step": 365, "epoch": 1.4605, "grpo_mean_advantage": 4.313886279305734e-07, "grpo_std_advantage": 1.9621948013082147e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5884170532226562} |
| {"ts": "2025-12-27T21:29:21", "step": 370, "epoch": 1.4805, "grpo_mean_advantage": 2.0489096641540527e-07, "grpo_std_advantage": 1.0235522722723545e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5795440673828125} |
| {"ts": "2025-12-27T21:30:45", "step": 375, "epoch": 1.5005, "grpo_mean_advantage": -1.4185905001795618e-06, "grpo_std_advantage": 1.0947338523692451e-05, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5607603788375854} |
| {"ts": "2025-12-27T21:32:13", "step": 380, "epoch": 1.5205, "grpo_mean_advantage": -1.758337049295733e-07, "grpo_std_advantage": 9.663675655247062e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5381432771682739} |
| {"ts": "2025-12-27T21:33:36", "step": 385, "epoch": 1.5405, "grpo_mean_advantage": -6.973743325033865e-07, "grpo_std_advantage": 4.341973180999048e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5528443455696106} |
| {"ts": "2025-12-27T21:34:59", "step": 390, "epoch": 1.5606, "grpo_mean_advantage": 1.7881394143159923e-08, "grpo_std_advantage": 1.3004198251564958e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.6091476678848267} |
| {"ts": "2025-12-27T21:36:21", "step": 395, "epoch": 1.5806, "grpo_mean_advantage": 3.4868716625169327e-07, "grpo_std_advantage": 2.059372718576924e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5397372245788574} |
| {"ts": "2025-12-27T21:37:48", "step": 400, "epoch": 1.6006, "grpo_mean_advantage": -2.1636485598719446e-06, "grpo_std_advantage": 9.725940799398813e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5873125195503235} |
| {"ts": "2025-12-27T21:39:11", "step": 405, "epoch": 1.6206, "grpo_mean_advantage": -5.960464477539063e-08, "grpo_std_advantage": 3.460792754594877e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5601426362991333} |
| {"ts": "2025-12-27T21:40:38", "step": 410, "epoch": 1.6406, "grpo_mean_advantage": 2.6226044269606064e-07, "grpo_std_advantage": 7.928817922220333e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.578656792640686} |
| {"ts": "2025-12-27T21:42:02", "step": 415, "epoch": 1.6607, "grpo_mean_advantage": 2.3558736756967846e-06, "grpo_std_advantage": 1.4469559573626611e-05, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5885810852050781} |
| {"ts": "2025-12-27T21:43:27", "step": 420, "epoch": 1.6807, "grpo_mean_advantage": 1.639127766850379e-08, "grpo_std_advantage": 9.352411325380672e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5805023312568665} |
| {"ts": "2025-12-27T21:44:52", "step": 425, "epoch": 1.7007, "grpo_mean_advantage": 3.2387674764322583e-06, "grpo_std_advantage": 1.999079904635437e-05, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5655918121337891} |
| {"ts": "2025-12-27T21:46:17", "step": 430, "epoch": 1.7207, "grpo_mean_advantage": 3.5464762504489045e-07, "grpo_std_advantage": 1.7663603557593888e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5474504232406616} |
| {"ts": "2025-12-27T21:47:43", "step": 435, "epoch": 1.7407, "grpo_mean_advantage": 3.6135315895080566e-07, "grpo_std_advantage": 2.356920958845876e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5874254703521729} |
| {"ts": "2025-12-27T21:49:08", "step": 440, "epoch": 1.7608, "grpo_mean_advantage": 2.7567148563889532e-08, "grpo_std_advantage": 9.97340521280421e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5815118551254272} |
| {"ts": "2025-12-27T21:50:35", "step": 445, "epoch": 1.7808, "grpo_mean_advantage": -8.34465012644614e-08, "grpo_std_advantage": 5.558832185670326e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5611211061477661} |
| {"ts": "2025-12-27T21:52:06", "step": 450, "epoch": 1.8008, "grpo_mean_advantage": -1.9818544672034477e-07, "grpo_std_advantage": 6.800727305744658e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.590424656867981} |
| {"ts": "2025-12-27T21:53:32", "step": 455, "epoch": 1.8208, "grpo_mean_advantage": -1.9371508841459217e-08, "grpo_std_advantage": 3.142378943721269e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5670351982116699} |
| {"ts": "2025-12-27T21:55:00", "step": 460, "epoch": 1.8408, "grpo_mean_advantage": 2.3692845729783585e-07, "grpo_std_advantage": 1.682946731307311e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5640432834625244} |
| {"ts": "2025-12-27T21:56:27", "step": 465, "epoch": 1.8609, "grpo_mean_advantage": 1.110136480519941e-07, "grpo_std_advantage": 8.930008448260196e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5584251284599304} |
| {"ts": "2025-12-27T21:57:50", "step": 470, "epoch": 1.8809, "grpo_mean_advantage": 2.5406478698641877e-07, "grpo_std_advantage": 9.93092498902115e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5392154455184937} |
| {"ts": "2025-12-27T21:59:15", "step": 475, "epoch": 1.9009, "grpo_mean_advantage": -8.940697071579962e-09, "grpo_std_advantage": 5.835169645251881e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5686308741569519} |
| {"ts": "2025-12-27T22:00:36", "step": 480, "epoch": 1.9209, "grpo_mean_advantage": 4.0605664253234863e-07, "grpo_std_advantage": 2.3210795916384086e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5842767357826233} |
| {"ts": "2025-12-27T22:01:58", "step": 485, "epoch": 1.9409, "grpo_mean_advantage": 1.341104507446289e-07, "grpo_std_advantage": 1.507950400991831e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5495311617851257} |
| {"ts": "2025-12-27T22:03:20", "step": 490, "epoch": 1.961, "grpo_mean_advantage": 2.689659481802664e-07, "grpo_std_advantage": 8.491958851664094e-07, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.549436628818512} |
| {"ts": "2025-12-27T22:04:42", "step": 495, "epoch": 1.981, "grpo_mean_advantage": 8.195638656616211e-08, "grpo_std_advantage": 3.802849732892355e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.544632077217102} |
| {"ts": "2025-12-27T22:06:01", "step": 500, "epoch": 2.0, "grpo_mean_advantage": 7.552536089860951e-07, "grpo_std_advantage": 4.143997102801222e-06, "grpo_mean_kl_div": 0.0, "grpo_mean_group_score": 0.5968535542488098} |
|
|