| { |
| "best_global_step": 2000, |
| "best_metric": 0.6596935206968907, |
| "best_model_checkpoint": "/workspace/output/resnet50/checkpoint-2000", |
| "epoch": 0.28388928317955997, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014194464158978, |
| "grad_norm": 3.2342276573181152, |
| "learning_rate": 9.999872249822569e-05, |
| "loss": 5.98863525390625, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0028388928317956, |
| "grad_norm": 3.3994972705841064, |
| "learning_rate": 9.99973030518098e-05, |
| "loss": 5.97633056640625, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0042583392476933995, |
| "grad_norm": 3.3180341720581055, |
| "learning_rate": 9.99958836053939e-05, |
| "loss": 5.97711181640625, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0056777856635912, |
| "grad_norm": 2.9379143714904785, |
| "learning_rate": 9.999446415897801e-05, |
| "loss": 5.9991455078125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.007097232079488999, |
| "grad_norm": 2.2698018550872803, |
| "learning_rate": 9.99930447125621e-05, |
| "loss": 5.96363525390625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.008516678495386799, |
| "grad_norm": 2.0626659393310547, |
| "learning_rate": 9.99916252661462e-05, |
| "loss": 5.96995849609375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0099361249112846, |
| "grad_norm": 2.814460277557373, |
| "learning_rate": 9.999020581973031e-05, |
| "loss": 5.9493408203125, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0113555713271824, |
| "grad_norm": 2.871051788330078, |
| "learning_rate": 9.998878637331441e-05, |
| "loss": 5.9510498046875, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0127750177430802, |
| "grad_norm": 2.3897151947021484, |
| "learning_rate": 9.998736692689852e-05, |
| "loss": 5.94254150390625, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.014194464158977998, |
| "grad_norm": 2.9910531044006348, |
| "learning_rate": 9.99859474804826e-05, |
| "loss": 5.9062255859375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015613910574875798, |
| "grad_norm": 3.137518882751465, |
| "learning_rate": 9.998452803406672e-05, |
| "loss": 5.9070068359375, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.017033356990773598, |
| "grad_norm": 3.021024703979492, |
| "learning_rate": 9.998310858765082e-05, |
| "loss": 5.87197265625, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.018452803406671398, |
| "grad_norm": 3.499450445175171, |
| "learning_rate": 9.998168914123493e-05, |
| "loss": 5.8237548828125, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0198722498225692, |
| "grad_norm": 3.87576961517334, |
| "learning_rate": 9.998026969481902e-05, |
| "loss": 5.754150390625, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.021291696238467, |
| "grad_norm": 3.9846458435058594, |
| "learning_rate": 9.997885024840313e-05, |
| "loss": 5.697198486328125, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0227111426543648, |
| "grad_norm": 4.339130878448486, |
| "learning_rate": 9.997743080198723e-05, |
| "loss": 5.63760986328125, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0241305890702626, |
| "grad_norm": 4.891483783721924, |
| "learning_rate": 9.997601135557133e-05, |
| "loss": 5.5271728515625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0255500354861604, |
| "grad_norm": 5.147222995758057, |
| "learning_rate": 9.997459190915544e-05, |
| "loss": 5.45938720703125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0269694819020582, |
| "grad_norm": 5.365755558013916, |
| "learning_rate": 9.997317246273954e-05, |
| "loss": 5.355255126953125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.028388928317955996, |
| "grad_norm": 5.888001918792725, |
| "learning_rate": 9.997175301632365e-05, |
| "loss": 5.1554931640625, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.029808374733853796, |
| "grad_norm": 6.100172996520996, |
| "learning_rate": 9.997033356990773e-05, |
| "loss": 5.035284423828125, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.031227821149751596, |
| "grad_norm": 6.491486549377441, |
| "learning_rate": 9.996891412349184e-05, |
| "loss": 4.899530029296875, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.032647267565649396, |
| "grad_norm": 6.916806697845459, |
| "learning_rate": 9.996749467707594e-05, |
| "loss": 4.851350402832031, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.034066713981547196, |
| "grad_norm": 6.837950706481934, |
| "learning_rate": 9.996607523066005e-05, |
| "loss": 4.726431274414063, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.035486160397444996, |
| "grad_norm": 7.554074287414551, |
| "learning_rate": 9.996465578424415e-05, |
| "loss": 4.4839630126953125, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.036905606813342796, |
| "grad_norm": 7.574995994567871, |
| "learning_rate": 9.996323633782825e-05, |
| "loss": 4.506732177734375, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0383250532292406, |
| "grad_norm": 7.498238563537598, |
| "learning_rate": 9.996181689141236e-05, |
| "loss": 4.319998168945313, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0397444996451384, |
| "grad_norm": 7.978142261505127, |
| "learning_rate": 9.996039744499645e-05, |
| "loss": 4.214613342285157, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0411639460610362, |
| "grad_norm": 8.194511413574219, |
| "learning_rate": 9.995897799858057e-05, |
| "loss": 4.212762451171875, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.042583392476934, |
| "grad_norm": 8.136639595031738, |
| "learning_rate": 9.995755855216466e-05, |
| "loss": 4.009028625488281, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0440028388928318, |
| "grad_norm": 8.684012413024902, |
| "learning_rate": 9.995613910574876e-05, |
| "loss": 3.9817459106445314, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0454222853087296, |
| "grad_norm": 8.888952255249023, |
| "learning_rate": 9.995471965933286e-05, |
| "loss": 3.94019775390625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0468417317246274, |
| "grad_norm": 8.79919719696045, |
| "learning_rate": 9.995330021291697e-05, |
| "loss": 3.9265777587890627, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0482611781405252, |
| "grad_norm": 8.571785926818848, |
| "learning_rate": 9.995188076650107e-05, |
| "loss": 3.7262115478515625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.049680624556423, |
| "grad_norm": 8.640142440795898, |
| "learning_rate": 9.995046132008518e-05, |
| "loss": 3.644915771484375, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0511000709723208, |
| "grad_norm": 9.322779655456543, |
| "learning_rate": 9.994904187366927e-05, |
| "loss": 3.644049072265625, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0525195173882186, |
| "grad_norm": 8.790424346923828, |
| "learning_rate": 9.994762242725337e-05, |
| "loss": 3.4869285583496095, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0539389638041164, |
| "grad_norm": 9.344154357910156, |
| "learning_rate": 9.994620298083748e-05, |
| "loss": 3.55142822265625, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.05535841022001419, |
| "grad_norm": 8.807840347290039, |
| "learning_rate": 9.994478353442158e-05, |
| "loss": 3.4293190002441407, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05677785663591199, |
| "grad_norm": 9.36971378326416, |
| "learning_rate": 9.994336408800569e-05, |
| "loss": 3.429082489013672, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05819730305180979, |
| "grad_norm": 9.73521900177002, |
| "learning_rate": 9.994194464158977e-05, |
| "loss": 3.408639907836914, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.05961674946770759, |
| "grad_norm": 9.646844863891602, |
| "learning_rate": 9.994052519517389e-05, |
| "loss": 3.1950119018554686, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.06103619588360539, |
| "grad_norm": 9.722207069396973, |
| "learning_rate": 9.993910574875798e-05, |
| "loss": 3.4140243530273438, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.06245564229950319, |
| "grad_norm": 10.609601020812988, |
| "learning_rate": 9.99376863023421e-05, |
| "loss": 3.320109558105469, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.063875088715401, |
| "grad_norm": 10.271575927734375, |
| "learning_rate": 9.993626685592619e-05, |
| "loss": 3.232251739501953, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06529453513129879, |
| "grad_norm": 9.766585350036621, |
| "learning_rate": 9.993484740951029e-05, |
| "loss": 3.149517059326172, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0667139815471966, |
| "grad_norm": 10.358244895935059, |
| "learning_rate": 9.99334279630944e-05, |
| "loss": 3.1863967895507814, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06813342796309439, |
| "grad_norm": 10.473136901855469, |
| "learning_rate": 9.99320085166785e-05, |
| "loss": 3.222390365600586, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.0695528743789922, |
| "grad_norm": 9.905110359191895, |
| "learning_rate": 9.993058907026261e-05, |
| "loss": 3.1823768615722656, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.07097232079488999, |
| "grad_norm": 9.858973503112793, |
| "learning_rate": 9.99291696238467e-05, |
| "loss": 2.9202560424804687, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07097232079488999, |
| "eval_accuracy": 0.1867489031601704, |
| "eval_loss": 3.0744524002075195, |
| "eval_runtime": 31.2289, |
| "eval_samples_per_second": 503.605, |
| "eval_steps_per_second": 15.755, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0723917672107878, |
| "grad_norm": 10.224215507507324, |
| "learning_rate": 9.992775017743082e-05, |
| "loss": 3.0410499572753906, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.07381121362668559, |
| "grad_norm": 9.867650032043457, |
| "learning_rate": 9.99263307310149e-05, |
| "loss": 3.116912078857422, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.07523066004258339, |
| "grad_norm": 10.343064308166504, |
| "learning_rate": 9.992491128459901e-05, |
| "loss": 3.06390266418457, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0766501064584812, |
| "grad_norm": 10.38116455078125, |
| "learning_rate": 9.992349183818311e-05, |
| "loss": 2.973680114746094, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.07806955287437899, |
| "grad_norm": 10.979643821716309, |
| "learning_rate": 9.992207239176722e-05, |
| "loss": 3.0906436920166014, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0794889992902768, |
| "grad_norm": 10.06657886505127, |
| "learning_rate": 9.992065294535132e-05, |
| "loss": 3.0091484069824217, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.08090844570617459, |
| "grad_norm": 10.663322448730469, |
| "learning_rate": 9.991923349893541e-05, |
| "loss": 2.862255859375, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0823278921220724, |
| "grad_norm": 9.277785301208496, |
| "learning_rate": 9.991781405251952e-05, |
| "loss": 2.8638259887695314, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.08374733853797019, |
| "grad_norm": 10.807332038879395, |
| "learning_rate": 9.991639460610362e-05, |
| "loss": 2.732352066040039, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.085166784953868, |
| "grad_norm": 9.970373153686523, |
| "learning_rate": 9.991497515968773e-05, |
| "loss": 2.736968231201172, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08658623136976579, |
| "grad_norm": 11.008269309997559, |
| "learning_rate": 9.991355571327183e-05, |
| "loss": 2.7735246658325194, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0880056777856636, |
| "grad_norm": 8.758193969726562, |
| "learning_rate": 9.991213626685593e-05, |
| "loss": 2.5436214447021483, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.08942512420156139, |
| "grad_norm": 11.253259658813477, |
| "learning_rate": 9.991071682044003e-05, |
| "loss": 2.748835563659668, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0908445706174592, |
| "grad_norm": 10.979547500610352, |
| "learning_rate": 9.990929737402414e-05, |
| "loss": 2.7314834594726562, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.09226401703335699, |
| "grad_norm": 11.182887077331543, |
| "learning_rate": 9.990787792760823e-05, |
| "loss": 2.645678901672363, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0936834634492548, |
| "grad_norm": 10.636208534240723, |
| "learning_rate": 9.990645848119234e-05, |
| "loss": 2.5704013824462892, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.09510290986515259, |
| "grad_norm": 10.351170539855957, |
| "learning_rate": 9.990503903477644e-05, |
| "loss": 2.5628406524658205, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0965223562810504, |
| "grad_norm": 9.914809226989746, |
| "learning_rate": 9.990361958836054e-05, |
| "loss": 2.5872230529785156, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.09794180269694819, |
| "grad_norm": 10.839837074279785, |
| "learning_rate": 9.990220014194465e-05, |
| "loss": 2.490940475463867, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.099361249112846, |
| "grad_norm": 11.259613990783691, |
| "learning_rate": 9.990078069552875e-05, |
| "loss": 2.64483585357666, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10078069552874379, |
| "grad_norm": 11.213078498840332, |
| "learning_rate": 9.989936124911286e-05, |
| "loss": 2.5397150039672853, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1022001419446416, |
| "grad_norm": 10.366206169128418, |
| "learning_rate": 9.989794180269694e-05, |
| "loss": 2.457781219482422, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.10361958836053939, |
| "grad_norm": 11.44458293914795, |
| "learning_rate": 9.989652235628105e-05, |
| "loss": 2.5090484619140625, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.1050390347764372, |
| "grad_norm": 11.689805030822754, |
| "learning_rate": 9.989510290986515e-05, |
| "loss": 2.409171485900879, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.10645848119233499, |
| "grad_norm": 10.568279266357422, |
| "learning_rate": 9.989368346344926e-05, |
| "loss": 2.3308380126953123, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1078779276082328, |
| "grad_norm": 11.917696952819824, |
| "learning_rate": 9.989226401703337e-05, |
| "loss": 2.3733493804931642, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.10929737402413059, |
| "grad_norm": 9.960722923278809, |
| "learning_rate": 9.989098651525906e-05, |
| "loss": 2.4058095932006838, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.11071682044002838, |
| "grad_norm": 11.068999290466309, |
| "learning_rate": 9.988956706884315e-05, |
| "loss": 2.4371658325195313, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.11213626685592619, |
| "grad_norm": 10.340009689331055, |
| "learning_rate": 9.988814762242725e-05, |
| "loss": 2.2587520599365236, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.11355571327182398, |
| "grad_norm": 9.941303253173828, |
| "learning_rate": 9.988672817601136e-05, |
| "loss": 2.268446350097656, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11497515968772179, |
| "grad_norm": 11.490272521972656, |
| "learning_rate": 9.988530872959546e-05, |
| "loss": 2.471067428588867, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.11639460610361958, |
| "grad_norm": 10.67241382598877, |
| "learning_rate": 9.988388928317957e-05, |
| "loss": 2.3497791290283203, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.11781405251951739, |
| "grad_norm": 10.710894584655762, |
| "learning_rate": 9.988246983676367e-05, |
| "loss": 2.1724626541137697, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.11923349893541518, |
| "grad_norm": 10.985452651977539, |
| "learning_rate": 9.988105039034778e-05, |
| "loss": 2.1848114013671873, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.12065294535131299, |
| "grad_norm": 10.063145637512207, |
| "learning_rate": 9.987963094393186e-05, |
| "loss": 2.180558776855469, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.12207239176721078, |
| "grad_norm": 11.236614227294922, |
| "learning_rate": 9.987821149751597e-05, |
| "loss": 2.282668876647949, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.12349183818310859, |
| "grad_norm": 10.98898983001709, |
| "learning_rate": 9.987679205110007e-05, |
| "loss": 2.235186767578125, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.12491128459900638, |
| "grad_norm": 11.805492401123047, |
| "learning_rate": 9.987537260468418e-05, |
| "loss": 2.2264921188354494, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.1263307310149042, |
| "grad_norm": 10.717041015625, |
| "learning_rate": 9.987395315826828e-05, |
| "loss": 2.1385255813598634, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.127750177430802, |
| "grad_norm": 9.613192558288574, |
| "learning_rate": 9.987253371185238e-05, |
| "loss": 2.1964336395263673, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12916962384669978, |
| "grad_norm": 10.594833374023438, |
| "learning_rate": 9.987111426543649e-05, |
| "loss": 2.050688362121582, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.13058907026259758, |
| "grad_norm": 11.596671104431152, |
| "learning_rate": 9.986969481902059e-05, |
| "loss": 2.077385139465332, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1320085166784954, |
| "grad_norm": 10.779032707214355, |
| "learning_rate": 9.98682753726047e-05, |
| "loss": 2.0280479431152343, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1334279630943932, |
| "grad_norm": 10.522924423217773, |
| "learning_rate": 9.98668559261888e-05, |
| "loss": 1.9384689331054688, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.13484740951029098, |
| "grad_norm": 9.86844539642334, |
| "learning_rate": 9.986543647977289e-05, |
| "loss": 2.0612548828125, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.13626685592618878, |
| "grad_norm": 12.521405220031738, |
| "learning_rate": 9.986401703335699e-05, |
| "loss": 2.139466094970703, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.1376863023420866, |
| "grad_norm": 11.292656898498535, |
| "learning_rate": 9.98625975869411e-05, |
| "loss": 2.077956199645996, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1391057487579844, |
| "grad_norm": 11.186986923217773, |
| "learning_rate": 9.98611781405252e-05, |
| "loss": 2.028730010986328, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.14052519517388218, |
| "grad_norm": 10.553022384643555, |
| "learning_rate": 9.985975869410931e-05, |
| "loss": 1.9375551223754883, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.14194464158977999, |
| "grad_norm": 11.089204788208008, |
| "learning_rate": 9.98583392476934e-05, |
| "loss": 2.0689823150634767, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14194464158977999, |
| "eval_accuracy": 0.42239460799898265, |
| "eval_loss": 1.9010688066482544, |
| "eval_runtime": 31.4593, |
| "eval_samples_per_second": 499.916, |
| "eval_steps_per_second": 15.639, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1433640880056778, |
| "grad_norm": 10.988676071166992, |
| "learning_rate": 9.98569198012775e-05, |
| "loss": 1.9830604553222657, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1447835344215756, |
| "grad_norm": 11.2459077835083, |
| "learning_rate": 9.985550035486161e-05, |
| "loss": 1.9190074920654296, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.14620298083747338, |
| "grad_norm": 10.437894821166992, |
| "learning_rate": 9.985408090844571e-05, |
| "loss": 1.8999460220336915, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.14762242725337119, |
| "grad_norm": 10.94793701171875, |
| "learning_rate": 9.985266146202982e-05, |
| "loss": 1.8579456329345703, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.149041873669269, |
| "grad_norm": 11.168233871459961, |
| "learning_rate": 9.98512420156139e-05, |
| "loss": 1.8979732513427734, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.15046132008516677, |
| "grad_norm": 10.14195728302002, |
| "learning_rate": 9.984982256919802e-05, |
| "loss": 1.7833553314208985, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.15188076650106458, |
| "grad_norm": 9.160737991333008, |
| "learning_rate": 9.984840312278211e-05, |
| "loss": 1.8624576568603515, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.1533002129169624, |
| "grad_norm": 11.151049613952637, |
| "learning_rate": 9.984698367636623e-05, |
| "loss": 1.8210905075073243, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.1547196593328602, |
| "grad_norm": 10.053725242614746, |
| "learning_rate": 9.984556422995032e-05, |
| "loss": 1.7738643646240235, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.15613910574875797, |
| "grad_norm": 10.97727108001709, |
| "learning_rate": 9.984414478353442e-05, |
| "loss": 1.866429328918457, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.15755855216465578, |
| "grad_norm": 12.384384155273438, |
| "learning_rate": 9.984272533711853e-05, |
| "loss": 1.8680984497070312, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.1589779985805536, |
| "grad_norm": 11.387879371643066, |
| "learning_rate": 9.984130589070263e-05, |
| "loss": 1.8034194946289062, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.1603974449964514, |
| "grad_norm": 10.6587495803833, |
| "learning_rate": 9.983988644428674e-05, |
| "loss": 1.772690773010254, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.16181689141234917, |
| "grad_norm": 12.721858024597168, |
| "learning_rate": 9.983846699787084e-05, |
| "loss": 1.7724496841430664, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.16323633782824698, |
| "grad_norm": 11.116838455200195, |
| "learning_rate": 9.983704755145493e-05, |
| "loss": 1.7527042388916017, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.1646557842441448, |
| "grad_norm": 10.033406257629395, |
| "learning_rate": 9.983562810503903e-05, |
| "loss": 1.674898338317871, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1660752306600426, |
| "grad_norm": 11.121773719787598, |
| "learning_rate": 9.983420865862314e-05, |
| "loss": 1.741505241394043, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.16749467707594037, |
| "grad_norm": 11.052094459533691, |
| "learning_rate": 9.983278921220724e-05, |
| "loss": 1.7749841690063477, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.16891412349183818, |
| "grad_norm": 10.183452606201172, |
| "learning_rate": 9.983136976579135e-05, |
| "loss": 1.6881484985351562, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.170333569907736, |
| "grad_norm": 11.106999397277832, |
| "learning_rate": 9.982995031937545e-05, |
| "loss": 1.814961051940918, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1717530163236338, |
| "grad_norm": 12.08647632598877, |
| "learning_rate": 9.982853087295955e-05, |
| "loss": 1.682515525817871, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.17317246273953157, |
| "grad_norm": 13.744584083557129, |
| "learning_rate": 9.982711142654366e-05, |
| "loss": 1.6713733673095703, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.17459190915542938, |
| "grad_norm": 9.970173835754395, |
| "learning_rate": 9.982569198012775e-05, |
| "loss": 1.711156463623047, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.1760113555713272, |
| "grad_norm": 11.027495384216309, |
| "learning_rate": 9.982427253371186e-05, |
| "loss": 1.759619140625, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.177430801987225, |
| "grad_norm": 10.876315116882324, |
| "learning_rate": 9.982285308729596e-05, |
| "loss": 1.618482780456543, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.17885024840312277, |
| "grad_norm": 10.26490592956543, |
| "learning_rate": 9.982143364088006e-05, |
| "loss": 1.6674427032470702, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.18026969481902058, |
| "grad_norm": 11.872292518615723, |
| "learning_rate": 9.982001419446416e-05, |
| "loss": 1.6325908660888673, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.1816891412349184, |
| "grad_norm": 9.946234703063965, |
| "learning_rate": 9.981859474804827e-05, |
| "loss": 1.5453743934631348, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.18310858765081617, |
| "grad_norm": 11.03128719329834, |
| "learning_rate": 9.981717530163236e-05, |
| "loss": 1.658684539794922, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.18452803406671398, |
| "grad_norm": 12.145915031433105, |
| "learning_rate": 9.981575585521648e-05, |
| "loss": 1.5792274475097656, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.18594748048261178, |
| "grad_norm": 11.820379257202148, |
| "learning_rate": 9.981433640880057e-05, |
| "loss": 1.5301803588867187, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.1873669268985096, |
| "grad_norm": 11.046746253967285, |
| "learning_rate": 9.981291696238467e-05, |
| "loss": 1.6124080657958983, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.18878637331440737, |
| "grad_norm": 9.545868873596191, |
| "learning_rate": 9.981149751596878e-05, |
| "loss": 1.5502593994140625, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.19020581973030518, |
| "grad_norm": 11.999979019165039, |
| "learning_rate": 9.981007806955288e-05, |
| "loss": 1.5360203742980958, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.19162526614620298, |
| "grad_norm": 9.949675559997559, |
| "learning_rate": 9.980865862313699e-05, |
| "loss": 1.353858470916748, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1930447125621008, |
| "grad_norm": 11.573400497436523, |
| "learning_rate": 9.980723917672107e-05, |
| "loss": 1.3946660995483398, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.19446415897799857, |
| "grad_norm": 10.249485969543457, |
| "learning_rate": 9.980581973030518e-05, |
| "loss": 1.518262004852295, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.19588360539389638, |
| "grad_norm": 10.011629104614258, |
| "learning_rate": 9.980440028388928e-05, |
| "loss": 1.5000194549560546, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.19730305180979418, |
| "grad_norm": 12.186440467834473, |
| "learning_rate": 9.980298083747339e-05, |
| "loss": 1.554741382598877, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.198722498225692, |
| "grad_norm": 11.845844268798828, |
| "learning_rate": 9.980156139105749e-05, |
| "loss": 1.4599843978881837, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.20014194464158977, |
| "grad_norm": 10.98592472076416, |
| "learning_rate": 9.980014194464159e-05, |
| "loss": 1.4062080383300781, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.20156139105748758, |
| "grad_norm": 11.54171371459961, |
| "learning_rate": 9.97987224982257e-05, |
| "loss": 1.5128003120422364, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.20298083747338538, |
| "grad_norm": 10.248682022094727, |
| "learning_rate": 9.97973030518098e-05, |
| "loss": 1.5022719383239747, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.2044002838892832, |
| "grad_norm": 8.78536319732666, |
| "learning_rate": 9.97958836053939e-05, |
| "loss": 1.4118841171264649, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.20581973030518097, |
| "grad_norm": 9.993626594543457, |
| "learning_rate": 9.9794464158978e-05, |
| "loss": 1.3945957183837892, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.20723917672107878, |
| "grad_norm": 11.31412124633789, |
| "learning_rate": 9.97930447125621e-05, |
| "loss": 1.26229887008667, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.20865862313697658, |
| "grad_norm": 11.182840347290039, |
| "learning_rate": 9.97916252661462e-05, |
| "loss": 1.3171740531921388, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.2100780695528744, |
| "grad_norm": 12.25224781036377, |
| "learning_rate": 9.979020581973031e-05, |
| "loss": 1.3310781478881837, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.21149751596877217, |
| "grad_norm": 11.81201457977295, |
| "learning_rate": 9.978878637331441e-05, |
| "loss": 1.3043070793151856, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.21291696238466998, |
| "grad_norm": 10.484480857849121, |
| "learning_rate": 9.978736692689852e-05, |
| "loss": 1.2629288673400878, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.21291696238466998, |
| "eval_accuracy": 0.5395180263241559, |
| "eval_loss": 1.438815712928772, |
| "eval_runtime": 32.1456, |
| "eval_samples_per_second": 489.242, |
| "eval_steps_per_second": 15.305, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.21433640880056778, |
| "grad_norm": 10.796157836914062, |
| "learning_rate": 9.978594748048262e-05, |
| "loss": 1.3752121925354004, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.2157558552164656, |
| "grad_norm": 10.1256742477417, |
| "learning_rate": 9.978452803406671e-05, |
| "loss": 1.3005435943603516, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.21717530163236337, |
| "grad_norm": 11.182530403137207, |
| "learning_rate": 9.978310858765082e-05, |
| "loss": 1.3048934936523438, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.21859474804826118, |
| "grad_norm": 10.190278053283691, |
| "learning_rate": 9.978168914123492e-05, |
| "loss": 1.3993605613708495, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.22001419446415899, |
| "grad_norm": 10.497735977172852, |
| "learning_rate": 9.978026969481903e-05, |
| "loss": 1.303945541381836, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.22143364088005676, |
| "grad_norm": 10.535606384277344, |
| "learning_rate": 9.977885024840313e-05, |
| "loss": 1.2210904121398927, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.22285308729595457, |
| "grad_norm": 11.385029792785645, |
| "learning_rate": 9.977743080198723e-05, |
| "loss": 1.3508376121520995, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.22427253371185238, |
| "grad_norm": 9.528643608093262, |
| "learning_rate": 9.977601135557132e-05, |
| "loss": 1.2278815269470216, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.22569198012775019, |
| "grad_norm": 13.161009788513184, |
| "learning_rate": 9.977459190915544e-05, |
| "loss": 1.254448413848877, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.22711142654364797, |
| "grad_norm": 11.288809776306152, |
| "learning_rate": 9.977317246273953e-05, |
| "loss": 1.271047878265381, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.22853087295954577, |
| "grad_norm": 11.30105209350586, |
| "learning_rate": 9.977175301632364e-05, |
| "loss": 1.3242988586425781, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.22995031937544358, |
| "grad_norm": 10.600774765014648, |
| "learning_rate": 9.977033356990774e-05, |
| "loss": 1.3170942306518554, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.2313697657913414, |
| "grad_norm": 10.652543067932129, |
| "learning_rate": 9.976891412349184e-05, |
| "loss": 1.3998719215393067, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.23278921220723917, |
| "grad_norm": 11.354793548583984, |
| "learning_rate": 9.976749467707595e-05, |
| "loss": 1.270443820953369, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.23420865862313697, |
| "grad_norm": 9.926568031311035, |
| "learning_rate": 9.976607523066005e-05, |
| "loss": 1.117215347290039, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.23562810503903478, |
| "grad_norm": 11.167335510253906, |
| "learning_rate": 9.976465578424416e-05, |
| "loss": 1.348717212677002, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.2370475514549326, |
| "grad_norm": 11.364425659179688, |
| "learning_rate": 9.976323633782824e-05, |
| "loss": 1.2113998413085938, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.23846699787083037, |
| "grad_norm": 10.315034866333008, |
| "learning_rate": 9.976181689141235e-05, |
| "loss": 1.2621678352355956, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.23988644428672817, |
| "grad_norm": 11.332146644592285, |
| "learning_rate": 9.976039744499645e-05, |
| "loss": 1.2919418334960937, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.24130589070262598, |
| "grad_norm": 9.863037109375, |
| "learning_rate": 9.975897799858056e-05, |
| "loss": 1.262222957611084, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2427253371185238, |
| "grad_norm": 13.898163795471191, |
| "learning_rate": 9.975755855216467e-05, |
| "loss": 1.349098300933838, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.24414478353442157, |
| "grad_norm": 9.008386611938477, |
| "learning_rate": 9.975613910574876e-05, |
| "loss": 1.1653017044067382, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.24556422995031937, |
| "grad_norm": 9.755669593811035, |
| "learning_rate": 9.975471965933287e-05, |
| "loss": 1.304057788848877, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.24698367636621718, |
| "grad_norm": 10.742278099060059, |
| "learning_rate": 9.975330021291696e-05, |
| "loss": 1.1656038284301757, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.248403122782115, |
| "grad_norm": 11.937880516052246, |
| "learning_rate": 9.975188076650107e-05, |
| "loss": 1.2565963745117188, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.24982256919801277, |
| "grad_norm": 9.80545711517334, |
| "learning_rate": 9.975046132008517e-05, |
| "loss": 1.1316876411437988, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.2512420156139106, |
| "grad_norm": 11.162557601928711, |
| "learning_rate": 9.974904187366927e-05, |
| "loss": 1.2094581604003907, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2526614620298084, |
| "grad_norm": 12.278450965881348, |
| "learning_rate": 9.974762242725337e-05, |
| "loss": 1.2499947547912598, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.2540809084457062, |
| "grad_norm": 10.95953369140625, |
| "learning_rate": 9.974620298083748e-05, |
| "loss": 1.1540046691894532, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.255500354861604, |
| "grad_norm": 7.865696430206299, |
| "learning_rate": 9.974478353442159e-05, |
| "loss": 1.1665989875793457, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.25691980127750175, |
| "grad_norm": 12.1609468460083, |
| "learning_rate": 9.974336408800569e-05, |
| "loss": 1.120746898651123, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.25833924769339955, |
| "grad_norm": 9.554359436035156, |
| "learning_rate": 9.974194464158978e-05, |
| "loss": 1.3381189346313476, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.25975869410929736, |
| "grad_norm": 9.497129440307617, |
| "learning_rate": 9.974052519517388e-05, |
| "loss": 1.1758546829223633, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.26117814052519517, |
| "grad_norm": 10.584992408752441, |
| "learning_rate": 9.973910574875799e-05, |
| "loss": 1.0787659645080567, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.262597586941093, |
| "grad_norm": 9.558980941772461, |
| "learning_rate": 9.973768630234209e-05, |
| "loss": 0.9334567070007325, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2640170333569908, |
| "grad_norm": 9.41112995147705, |
| "learning_rate": 9.97362668559262e-05, |
| "loss": 1.1376053810119628, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2654364797728886, |
| "grad_norm": 11.666831970214844, |
| "learning_rate": 9.973484740951028e-05, |
| "loss": 1.207914447784424, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.2668559261887864, |
| "grad_norm": 11.217955589294434, |
| "learning_rate": 9.97334279630944e-05, |
| "loss": 1.052849578857422, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.26827537260468415, |
| "grad_norm": 8.3615083694458, |
| "learning_rate": 9.97320085166785e-05, |
| "loss": 0.9782976150512696, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.26969481902058196, |
| "grad_norm": 10.69944953918457, |
| "learning_rate": 9.97305890702626e-05, |
| "loss": 0.9639101982116699, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.27111426543647976, |
| "grad_norm": 11.15194034576416, |
| "learning_rate": 9.972916962384671e-05, |
| "loss": 1.0744239807128906, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.27253371185237757, |
| "grad_norm": 10.363690376281738, |
| "learning_rate": 9.972775017743081e-05, |
| "loss": 1.1180108070373536, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.2739531582682754, |
| "grad_norm": 10.816513061523438, |
| "learning_rate": 9.972633073101491e-05, |
| "loss": 1.118791103363037, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.2753726046841732, |
| "grad_norm": 8.64388656616211, |
| "learning_rate": 9.9724911284599e-05, |
| "loss": 1.1368459701538085, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.276792051100071, |
| "grad_norm": 9.002252578735352, |
| "learning_rate": 9.972349183818312e-05, |
| "loss": 1.1344121932983398, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2782114975159688, |
| "grad_norm": 11.083386421203613, |
| "learning_rate": 9.972207239176721e-05, |
| "loss": 1.1827295303344727, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.27963094393186655, |
| "grad_norm": 8.360145568847656, |
| "learning_rate": 9.972065294535133e-05, |
| "loss": 0.9954969406127929, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.28105039034776436, |
| "grad_norm": 12.982026100158691, |
| "learning_rate": 9.971923349893542e-05, |
| "loss": 0.9865982055664062, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.28246983676366216, |
| "grad_norm": 9.3854341506958, |
| "learning_rate": 9.971781405251952e-05, |
| "loss": 0.9238475799560547, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.28388928317955997, |
| "grad_norm": 10.693597793579102, |
| "learning_rate": 9.971639460610363e-05, |
| "loss": 0.9660484313964843, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.28388928317955997, |
| "eval_accuracy": 0.6596935206968907, |
| "eval_loss": 1.0827350616455078, |
| "eval_runtime": 31.44, |
| "eval_samples_per_second": 500.222, |
| "eval_steps_per_second": 15.649, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 704500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|