| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025, |
| "grad_norm": 198.0, |
| "learning_rate": 1.747190540874941e-05, |
| "loss": 0.5751, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 186.0, |
| "learning_rate": 3.931178716968617e-05, |
| "loss": 0.4175, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 270.0, |
| "learning_rate": 6.115166893062294e-05, |
| "loss": 0.3509, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 110.5, |
| "learning_rate": 8.29915506915597e-05, |
| "loss": 0.3508, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 111.0, |
| "learning_rate": 0.00010483143245249646, |
| "loss": 0.4222, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 83.5, |
| "learning_rate": 0.00012667131421343323, |
| "loss": 0.6498, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 65.0, |
| "learning_rate": 0.00014851119597437, |
| "loss": 0.8894, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 104.5, |
| "learning_rate": 0.0001528649930352754, |
| "loss": 0.6775, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 29.5, |
| "learning_rate": 0.00015280740168512177, |
| "loss": 0.6998, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 17.0, |
| "learning_rate": 0.00015270555654763282, |
| "loss": 0.6402, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 16.75, |
| "learning_rate": 0.00015255953633738878, |
| "loss": 0.4757, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 11.75, |
| "learning_rate": 0.0001523694539112214, |
| "loss": 0.3831, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 6.5625, |
| "learning_rate": 0.00015213545618098876, |
| "loss": 0.3437, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 6.03125, |
| "learning_rate": 0.00015185772400002907, |
| "loss": 0.2986, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 4.46875, |
| "learning_rate": 0.00015153647202338207, |
| "loss": 0.2774, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.6875, |
| "learning_rate": 0.00015117194854188525, |
| "loss": 0.2703, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 5.65625, |
| "learning_rate": 0.00015076443529027353, |
| "loss": 0.2501, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 26.375, |
| "learning_rate": 0.00015031424722943083, |
| "loss": 0.3738, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 8.1875, |
| "learning_rate": 0.00014982173230296148, |
| "loss": 0.2765, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 7.125, |
| "learning_rate": 0.00014928727116826976, |
| "loss": 0.2929, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 7.8125, |
| "learning_rate": 0.00014871127690235564, |
| "loss": 0.2699, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.125, |
| "learning_rate": 0.00014809419468255356, |
| "loss": 0.269, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 4.1875, |
| "learning_rate": 0.00014743650144246167, |
| "loss": 0.2615, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.828125, |
| "learning_rate": 0.00014673870550332703, |
| "loss": 0.2325, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 5.375, |
| "learning_rate": 0.00014600134618117166, |
| "loss": 0.2295, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 3.953125, |
| "learning_rate": 0.0001452249933699633, |
| "loss": 0.2407, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.0001444102471011529, |
| "loss": 0.2192, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.8515625, |
| "learning_rate": 0.00014355773707991926, |
| "loss": 0.2133, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00014266812219847945, |
| "loss": 0.2014, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00014174209002684087, |
| "loss": 0.191, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 2.828125, |
| "learning_rate": 0.00014078035628138847, |
| "loss": 0.2024, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00013978366427171864, |
| "loss": 0.1871, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00013875278432614612, |
| "loss": 0.1751, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.1171875, |
| "learning_rate": 0.00013768851319632887, |
| "loss": 0.1679, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00013659167344147067, |
| "loss": 0.1818, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.8671875, |
| "learning_rate": 0.0001354631127925774, |
| "loss": 0.1692, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.0001343037034972584, |
| "loss": 0.1619, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001331143416455796, |
| "loss": 0.1617, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00013189594647748868, |
| "loss": 0.1615, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00013064945967234835, |
| "loss": 0.1689, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.16072207689285278, |
| "eval_runtime": 0.5972, |
| "eval_samples_per_second": 43.533, |
| "eval_steps_per_second": 43.533, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.025, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.0001293758446211266, |
| "loss": 0.1629, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00012807608568180618, |
| "loss": 0.1624, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.075, |
| "grad_norm": 1.0859375, |
| "learning_rate": 0.00012675118741858906, |
| "loss": 0.1614, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.00012540217382548384, |
| "loss": 0.1636, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.0001240300875348761, |
| "loss": 0.1567, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 2.609375, |
| "learning_rate": 0.0001226359890116935, |
| "loss": 0.1663, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.175, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00012122095573378837, |
| "loss": 0.1774, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 3.015625, |
| "learning_rate": 0.00011978608135917105, |
| "loss": 0.1701, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.225, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00011833247488073823, |
| "loss": 0.1853, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 2.453125, |
| "learning_rate": 0.00011686125976914878, |
| "loss": 0.1948, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.275, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00011537357310451031, |
| "loss": 0.1733, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00011387056469754679, |
| "loss": 0.1624, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.325, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00011235339620092721, |
| "loss": 0.1684, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.0001108232402114416, |
| "loss": 0.169, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 2.65625, |
| "learning_rate": 0.0001092812793637186, |
| "loss": 0.1789, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.0001077287054161847, |
| "loss": 0.1695, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.425, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00010616671832997237, |
| "loss": 0.1671, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 1.0390625, |
| "learning_rate": 0.00010459652534148764, |
| "loss": 0.1675, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.475, |
| "grad_norm": 0.97265625, |
| "learning_rate": 0.00010301934002935564, |
| "loss": 0.159, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00010143638137646338, |
| "loss": 0.1578, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.525, |
| "grad_norm": 1.171875, |
| "learning_rate": 9.984887282782665e-05, |
| "loss": 0.1588, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 1.0546875, |
| "learning_rate": 9.825804134500727e-05, |
| "loss": 0.1535, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.575, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.666511645781328e-05, |
| "loss": 0.1587, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.507132931401333e-05, |
| "loss": 0.1514, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.347791172780155e-05, |
| "loss": 0.1486, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.188609522774628e-05, |
| "loss": 0.149, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.675, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.029711010496061e-05, |
| "loss": 0.1467, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.8984375, |
| "learning_rate": 8.871218446222844e-05, |
| "loss": 0.1456, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.725, |
| "grad_norm": 0.7734375, |
| "learning_rate": 8.713254326482237e-05, |
| "loss": 0.1439, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.555940739374653e-05, |
| "loss": 0.1426, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.775, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.399399270213575e-05, |
| "loss": 0.1399, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.953125, |
| "learning_rate": 8.243750907554097e-05, |
| "loss": 0.1436, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.825, |
| "grad_norm": 0.7421875, |
| "learning_rate": 8.089115949682696e-05, |
| "loss": 0.1375, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.6640625, |
| "learning_rate": 7.935613911640464e-05, |
| "loss": 0.1384, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.828125, |
| "learning_rate": 7.783363432851746e-05, |
| "loss": 0.1366, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.859375, |
| "learning_rate": 7.632482185429501e-05, |
| "loss": 0.1374, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.925, |
| "grad_norm": 0.87890625, |
| "learning_rate": 7.483086783228284e-05, |
| "loss": 0.1413, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 1.1015625, |
| "learning_rate": 7.335292691715154e-05, |
| "loss": 0.1371, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.975, |
| "grad_norm": 0.8359375, |
| "learning_rate": 7.189214138728142e-05, |
| "loss": 0.1413, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8359375, |
| "learning_rate": 7.044964026191261e-05, |
| "loss": 0.1378, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.13336719572544098, |
| "eval_runtime": 0.5924, |
| "eval_samples_per_second": 43.887, |
| "eval_steps_per_second": 43.887, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.025, |
| "grad_norm": 0.8125, |
| "learning_rate": 6.902653842854314e-05, |
| "loss": 0.1382, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.6640625, |
| "learning_rate": 6.762393578124894e-05, |
| "loss": 0.1273, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.075, |
| "grad_norm": 0.6640625, |
| "learning_rate": 6.624291637059237e-05, |
| "loss": 0.1319, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.6796875, |
| "learning_rate": 6.48845475657757e-05, |
| "loss": 0.1296, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 0.68359375, |
| "learning_rate": 6.354987922968741e-05, |
| "loss": 0.136, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.8203125, |
| "learning_rate": 6.223994290747898e-05, |
| "loss": 0.1332, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.175, |
| "grad_norm": 0.79296875, |
| "learning_rate": 6.095575102929907e-05, |
| "loss": 0.1337, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.7421875, |
| "learning_rate": 5.969829612780141e-05, |
| "loss": 0.1295, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.225, |
| "grad_norm": 0.85546875, |
| "learning_rate": 5.8468550071031296e-05, |
| "loss": 0.1291, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 1.0390625, |
| "learning_rate": 5.726746331128316e-05, |
| "loss": 0.1364, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.275, |
| "grad_norm": 1.0, |
| "learning_rate": 5.609596415051039e-05, |
| "loss": 0.1365, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 1.0390625, |
| "learning_rate": 5.495495802285465e-05, |
| "loss": 0.1317, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.325, |
| "grad_norm": 0.7890625, |
| "learning_rate": 5.384532679484933e-05, |
| "loss": 0.1296, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.66796875, |
| "learning_rate": 5.276792808383817e-05, |
| "loss": 0.1355, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.875, |
| "learning_rate": 5.1723594595135666e-05, |
| "loss": 0.1324, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.77734375, |
| "learning_rate": 5.07131334784416e-05, |
| "loss": 0.1315, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.425, |
| "grad_norm": 0.8359375, |
| "learning_rate": 4.973732570400718e-05, |
| "loss": 0.1284, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.79296875, |
| "learning_rate": 4.879692545903476e-05, |
| "loss": 0.1345, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.475, |
| "grad_norm": 0.875, |
| "learning_rate": 4.789265956477791e-05, |
| "loss": 0.126, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.953125, |
| "learning_rate": 4.702522691479217e-05, |
| "loss": 0.1345, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 0.12545974552631378, |
| "eval_runtime": 0.5917, |
| "eval_samples_per_second": 43.943, |
| "eval_steps_per_second": 43.943, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.525, |
| "grad_norm": 0.8125, |
| "learning_rate": 4.619529793477068e-05, |
| "loss": 0.1291, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.540351406438219e-05, |
| "loss": 0.1301, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.575, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.465048726151201e-05, |
| "loss": 0.128, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.393679952928885e-05, |
| "loss": 0.1288, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.3263002466263436e-05, |
| "loss": 0.1281, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.262961684008613e-05, |
| "loss": 0.1305, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.675, |
| "grad_norm": 0.88671875, |
| "learning_rate": 4.203713218501353e-05, |
| "loss": 0.1258, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.1486006423554745e-05, |
| "loss": 0.1284, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.725, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.097666551254989e-05, |
| "loss": 0.1269, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.0509503113954545e-05, |
| "loss": 0.128, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.775, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.008488029058422e-05, |
| "loss": 0.1293, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.85546875, |
| "learning_rate": 3.9703125227054376e-05, |
| "loss": 0.132, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.825, |
| "grad_norm": 0.859375, |
| "learning_rate": 3.9364532976131475e-05, |
| "loss": 0.1247, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 1.6796875, |
| "learning_rate": 3.906936523069101e-05, |
| "loss": 0.1246, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.8817850121459174e-05, |
| "loss": 0.1246, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.861018204069391e-05, |
| "loss": 0.1322, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.925, |
| "grad_norm": 0.859375, |
| "learning_rate": 3.8446521491942034e-05, |
| "loss": 0.1277, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.832699496598859e-05, |
| "loss": 0.1264, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.975, |
| "grad_norm": 0.81640625, |
| "learning_rate": 3.8251694843093894e-05, |
| "loss": 0.1277, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.78515625, |
| "learning_rate": 3.8220679321594226e-05, |
| "loss": 0.1288, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.1242036521434784, |
| "eval_runtime": 0.5964, |
| "eval_samples_per_second": 43.593, |
| "eval_steps_per_second": 43.593, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.19491360014336e+16, |
| "train_batch_size": 46, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|