| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 231, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004329004329004329, |
| "grad_norm": 6.30618953704834, |
| "learning_rate": 0.0, |
| "loss": 0.4668, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008658008658008658, |
| "grad_norm": 3.96525502204895, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.3887, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.012987012987012988, |
| "grad_norm": 4.628964424133301, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.4199, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017316017316017316, |
| "grad_norm": 4.340306282043457, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 0.3379, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021645021645021644, |
| "grad_norm": 10.413896560668945, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.4785, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.025974025974025976, |
| "grad_norm": 3.5524611473083496, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.3965, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030303030303030304, |
| "grad_norm": 3.9681403636932373, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.334, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03463203463203463, |
| "grad_norm": 6.837435722351074, |
| "learning_rate": 0.0001, |
| "loss": 0.4609, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03896103896103896, |
| "grad_norm": 6.86609411239624, |
| "learning_rate": 9.999508258797877e-05, |
| "loss": 0.4297, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04329004329004329, |
| "grad_norm": 5.7420973777771, |
| "learning_rate": 9.998033131915266e-05, |
| "loss": 0.3691, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 5.456467151641846, |
| "learning_rate": 9.995574909504435e-05, |
| "loss": 0.4023, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05194805194805195, |
| "grad_norm": 6.083012104034424, |
| "learning_rate": 9.992134075089084e-05, |
| "loss": 0.498, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05627705627705628, |
| "grad_norm": 24.94365692138672, |
| "learning_rate": 9.987711305469231e-05, |
| "loss": 0.4551, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06060606060606061, |
| "grad_norm": 9.905355453491211, |
| "learning_rate": 9.982307470588098e-05, |
| "loss": 0.4453, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06493506493506493, |
| "grad_norm": 6.808415412902832, |
| "learning_rate": 9.975923633360985e-05, |
| "loss": 0.4434, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06926406926406926, |
| "grad_norm": 8.532634735107422, |
| "learning_rate": 9.968561049466214e-05, |
| "loss": 0.4043, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0735930735930736, |
| "grad_norm": 7.474434852600098, |
| "learning_rate": 9.960221167098124e-05, |
| "loss": 0.4473, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.07792207792207792, |
| "grad_norm": 31.124326705932617, |
| "learning_rate": 9.950905626682228e-05, |
| "loss": 0.3809, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08225108225108226, |
| "grad_norm": 7.993480205535889, |
| "learning_rate": 9.940616260552544e-05, |
| "loss": 0.4863, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08658008658008658, |
| "grad_norm": 6.659813404083252, |
| "learning_rate": 9.92935509259118e-05, |
| "loss": 0.3477, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 8.14820671081543, |
| "learning_rate": 9.917124337830243e-05, |
| "loss": 0.3809, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 6.370754718780518, |
| "learning_rate": 9.903926402016153e-05, |
| "loss": 0.4727, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.09956709956709957, |
| "grad_norm": 10.538797378540039, |
| "learning_rate": 9.889763881136439e-05, |
| "loss": 0.5156, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1038961038961039, |
| "grad_norm": 8.41638469696045, |
| "learning_rate": 9.874639560909117e-05, |
| "loss": 0.4727, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10822510822510822, |
| "grad_norm": 11.677297592163086, |
| "learning_rate": 9.858556416234755e-05, |
| "loss": 0.457, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11255411255411256, |
| "grad_norm": 14.953546524047852, |
| "learning_rate": 9.841517610611309e-05, |
| "loss": 0.4336, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11688311688311688, |
| "grad_norm": 7.516232013702393, |
| "learning_rate": 9.82352649551188e-05, |
| "loss": 0.3516, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 7.574983596801758, |
| "learning_rate": 9.804586609725499e-05, |
| "loss": 0.3926, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12554112554112554, |
| "grad_norm": 17.778465270996094, |
| "learning_rate": 9.784701678661045e-05, |
| "loss": 0.4082, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 22.582393646240234, |
| "learning_rate": 9.763875613614482e-05, |
| "loss": 0.4688, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1341991341991342, |
| "grad_norm": 156.63107299804688, |
| "learning_rate": 9.742112510999515e-05, |
| "loss": 0.4902, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.13852813852813853, |
| "grad_norm": 22.96047592163086, |
| "learning_rate": 9.719416651541839e-05, |
| "loss": 0.4336, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 10.698134422302246, |
| "learning_rate": 9.69579249943714e-05, |
| "loss": 0.4414, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1471861471861472, |
| "grad_norm": 7.732542514801025, |
| "learning_rate": 9.671244701472999e-05, |
| "loss": 0.3789, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 20.447450637817383, |
| "learning_rate": 9.645778086114892e-05, |
| "loss": 0.4551, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.15584415584415584, |
| "grad_norm": 7.715920925140381, |
| "learning_rate": 9.619397662556435e-05, |
| "loss": 0.4863, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.16017316017316016, |
| "grad_norm": 11.215670585632324, |
| "learning_rate": 9.592108619734106e-05, |
| "loss": 0.4746, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1645021645021645, |
| "grad_norm": 14.483991622924805, |
| "learning_rate": 9.563916325306594e-05, |
| "loss": 0.4062, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16883116883116883, |
| "grad_norm": 9.34101390838623, |
| "learning_rate": 9.534826324599003e-05, |
| "loss": 0.4941, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17316017316017315, |
| "grad_norm": 9.180691719055176, |
| "learning_rate": 9.504844339512095e-05, |
| "loss": 0.4707, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1774891774891775, |
| "grad_norm": 5.87382173538208, |
| "learning_rate": 9.473976267396831e-05, |
| "loss": 0.5078, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 7.854617118835449, |
| "learning_rate": 9.442228179894362e-05, |
| "loss": 0.4746, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18614718614718614, |
| "grad_norm": 5.667612075805664, |
| "learning_rate": 9.409606321741775e-05, |
| "loss": 0.5156, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 6.341343402862549, |
| "learning_rate": 9.376117109543769e-05, |
| "loss": 0.498, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.19480519480519481, |
| "grad_norm": 6.829994201660156, |
| "learning_rate": 9.341767130510528e-05, |
| "loss": 0.3711, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.19913419913419914, |
| "grad_norm": 11.078237533569336, |
| "learning_rate": 9.306563141162046e-05, |
| "loss": 0.4434, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.20346320346320346, |
| "grad_norm": 9.45587158203125, |
| "learning_rate": 9.270512065999137e-05, |
| "loss": 0.5234, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2077922077922078, |
| "grad_norm": 6.764471530914307, |
| "learning_rate": 9.233620996141421e-05, |
| "loss": 0.4219, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.21212121212121213, |
| "grad_norm": 23.972339630126953, |
| "learning_rate": 9.195897187932512e-05, |
| "loss": 0.3965, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21645021645021645, |
| "grad_norm": 5.304466724395752, |
| "learning_rate": 9.157348061512727e-05, |
| "loss": 0.418, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22077922077922077, |
| "grad_norm": 5.903518199920654, |
| "learning_rate": 9.117981199359574e-05, |
| "loss": 0.2969, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.22510822510822512, |
| "grad_norm": 6.200258731842041, |
| "learning_rate": 9.077804344796302e-05, |
| "loss": 0.4551, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.22943722943722944, |
| "grad_norm": 7.089105606079102, |
| "learning_rate": 9.036825400468812e-05, |
| "loss": 0.457, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.23376623376623376, |
| "grad_norm": 34.66611862182617, |
| "learning_rate": 8.995052426791247e-05, |
| "loss": 0.418, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 5.267077922821045, |
| "learning_rate": 8.952493640360517e-05, |
| "loss": 0.4082, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 6.587023735046387, |
| "learning_rate": 8.90915741234015e-05, |
| "loss": 0.4512, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24675324675324675, |
| "grad_norm": 7.729217529296875, |
| "learning_rate": 8.865052266813685e-05, |
| "loss": 0.4258, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2510822510822511, |
| "grad_norm": 6.812266826629639, |
| "learning_rate": 8.820186879108038e-05, |
| "loss": 0.4512, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2554112554112554, |
| "grad_norm": 11.491703033447266, |
| "learning_rate": 8.77457007408708e-05, |
| "loss": 0.2832, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 5.020540714263916, |
| "learning_rate": 8.728210824415827e-05, |
| "loss": 0.3496, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26406926406926406, |
| "grad_norm": 5.021598815917969, |
| "learning_rate": 8.681118248795547e-05, |
| "loss": 0.5, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2683982683982684, |
| "grad_norm": 8.810498237609863, |
| "learning_rate": 8.633301610170135e-05, |
| "loss": 0.4551, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 7.0777411460876465, |
| "learning_rate": 8.584770313904137e-05, |
| "loss": 0.3926, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.27705627705627706, |
| "grad_norm": 5.617433547973633, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 0.3223, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2813852813852814, |
| "grad_norm": 5.157428741455078, |
| "learning_rate": 8.485602070884117e-05, |
| "loss": 0.373, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 6.726741790771484, |
| "learning_rate": 8.434984630174509e-05, |
| "loss": 0.4219, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.29004329004329005, |
| "grad_norm": 4.791531085968018, |
| "learning_rate": 8.383691540076371e-05, |
| "loss": 0.4199, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2943722943722944, |
| "grad_norm": 5.5883026123046875, |
| "learning_rate": 8.33173288976002e-05, |
| "loss": 0.459, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2987012987012987, |
| "grad_norm": 7.0759663581848145, |
| "learning_rate": 8.279118899309122e-05, |
| "loss": 0.4629, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 7.941741466522217, |
| "learning_rate": 8.225859917710439e-05, |
| "loss": 0.4316, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30735930735930733, |
| "grad_norm": 4.9386396408081055, |
| "learning_rate": 8.171966420818228e-05, |
| "loss": 0.3359, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3116883116883117, |
| "grad_norm": 4.561506748199463, |
| "learning_rate": 8.117449009293668e-05, |
| "loss": 0.373, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.31601731601731603, |
| "grad_norm": 6.921361446380615, |
| "learning_rate": 8.062318406519751e-05, |
| "loss": 0.3809, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3203463203463203, |
| "grad_norm": 6.0779876708984375, |
| "learning_rate": 8.006585456492029e-05, |
| "loss": 0.416, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3246753246753247, |
| "grad_norm": 14.405391693115234, |
| "learning_rate": 7.950261121685641e-05, |
| "loss": 0.3711, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.329004329004329, |
| "grad_norm": 12.624444961547852, |
| "learning_rate": 7.89335648089903e-05, |
| "loss": 0.3691, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 8.761910438537598, |
| "learning_rate": 7.835882727074779e-05, |
| "loss": 0.4707, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.33766233766233766, |
| "grad_norm": 6.742737770080566, |
| "learning_rate": 7.777851165098012e-05, |
| "loss": 0.2949, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.341991341991342, |
| "grad_norm": 13.422913551330566, |
| "learning_rate": 7.719273209572744e-05, |
| "loss": 0.377, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3463203463203463, |
| "grad_norm": 8.455282211303711, |
| "learning_rate": 7.660160382576683e-05, |
| "loss": 0.4707, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.35064935064935066, |
| "grad_norm": 7.551178455352783, |
| "learning_rate": 7.600524311394873e-05, |
| "loss": 0.4688, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.354978354978355, |
| "grad_norm": 5.217031002044678, |
| "learning_rate": 7.540376726232648e-05, |
| "loss": 0.4492, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3593073593073593, |
| "grad_norm": 7.432611465454102, |
| "learning_rate": 7.47972945790834e-05, |
| "loss": 0.4121, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 10.472357749938965, |
| "learning_rate": 7.4185944355262e-05, |
| "loss": 0.4199, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.36796536796536794, |
| "grad_norm": 8.361776351928711, |
| "learning_rate": 7.35698368412999e-05, |
| "loss": 0.3496, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3722943722943723, |
| "grad_norm": 8.236878395080566, |
| "learning_rate": 7.294909322337689e-05, |
| "loss": 0.3906, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.37662337662337664, |
| "grad_norm": 6.491207122802734, |
| "learning_rate": 7.232383559957814e-05, |
| "loss": 0.4277, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 9.313511848449707, |
| "learning_rate": 7.169418695587791e-05, |
| "loss": 0.3867, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3852813852813853, |
| "grad_norm": 13.22208309173584, |
| "learning_rate": 7.106027114194855e-05, |
| "loss": 0.4824, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 7.9978718757629395, |
| "learning_rate": 7.042221284679982e-05, |
| "loss": 0.4238, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3939393939393939, |
| "grad_norm": 14.92418384552002, |
| "learning_rate": 6.978013757425295e-05, |
| "loss": 0.4941, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.39826839826839827, |
| "grad_norm": 8.810619354248047, |
| "learning_rate": 6.91341716182545e-05, |
| "loss": 0.3691, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4025974025974026, |
| "grad_norm": 7.104898452758789, |
| "learning_rate": 6.848444203803476e-05, |
| "loss": 0.4609, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4069264069264069, |
| "grad_norm": 30.174938201904297, |
| "learning_rate": 6.783107663311565e-05, |
| "loss": 0.375, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.41125541125541126, |
| "grad_norm": 6.395003318786621, |
| "learning_rate": 6.717420391817306e-05, |
| "loss": 0.4043, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.4155844155844156, |
| "grad_norm": 17.24394416809082, |
| "learning_rate": 6.651395309775837e-05, |
| "loss": 0.4434, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4199134199134199, |
| "grad_norm": 6.928684711456299, |
| "learning_rate": 6.585045404088441e-05, |
| "loss": 0.4141, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.42424242424242425, |
| "grad_norm": 14.082657814025879, |
| "learning_rate": 6.518383725548074e-05, |
| "loss": 0.4414, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 21.20639991760254, |
| "learning_rate": 6.451423386272312e-05, |
| "loss": 0.4121, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4329004329004329, |
| "grad_norm": 12.427319526672363, |
| "learning_rate": 6.384177557124247e-05, |
| "loss": 0.4258, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.43722943722943725, |
| "grad_norm": 10.986278533935547, |
| "learning_rate": 6.316659465121824e-05, |
| "loss": 0.3965, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.44155844155844154, |
| "grad_norm": 12.830971717834473, |
| "learning_rate": 6.248882390836135e-05, |
| "loss": 0.4375, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.4458874458874459, |
| "grad_norm": 14.790221214294434, |
| "learning_rate": 6.180859665779172e-05, |
| "loss": 0.4492, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.45021645021645024, |
| "grad_norm": 44.620018005371094, |
| "learning_rate": 6.112604669781572e-05, |
| "loss": 0.5703, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 13.396041870117188, |
| "learning_rate": 6.04413082836085e-05, |
| "loss": 0.4805, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4588744588744589, |
| "grad_norm": 13.93975830078125, |
| "learning_rate": 5.9754516100806423e-05, |
| "loss": 0.3809, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.46320346320346323, |
| "grad_norm": 18.1014404296875, |
| "learning_rate": 5.9065805239014923e-05, |
| "loss": 0.4707, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.4675324675324675, |
| "grad_norm": 7.800663948059082, |
| "learning_rate": 5.837531116523682e-05, |
| "loss": 0.4121, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.47186147186147187, |
| "grad_norm": 9.331329345703125, |
| "learning_rate": 5.76831696972265e-05, |
| "loss": 0.3926, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 8.944900512695312, |
| "learning_rate": 5.698951697677498e-05, |
| "loss": 0.3262, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4805194805194805, |
| "grad_norm": 20.515888214111328, |
| "learning_rate": 5.629448944293127e-05, |
| "loss": 0.4648, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 9.60425853729248, |
| "learning_rate": 5.559822380516539e-05, |
| "loss": 0.4023, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.48917748917748916, |
| "grad_norm": 9.976903915405273, |
| "learning_rate": 5.490085701647805e-05, |
| "loss": 0.3438, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4935064935064935, |
| "grad_norm": 19.132675170898438, |
| "learning_rate": 5.420252624646238e-05, |
| "loss": 0.3965, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.49783549783549785, |
| "grad_norm": 12.469053268432617, |
| "learning_rate": 5.3503368854323366e-05, |
| "loss": 0.3848, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5021645021645021, |
| "grad_norm": 14.417160034179688, |
| "learning_rate": 5.2803522361859594e-05, |
| "loss": 0.459, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5064935064935064, |
| "grad_norm": 8.167932510375977, |
| "learning_rate": 5.2103124426413264e-05, |
| "loss": 0.3711, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5108225108225108, |
| "grad_norm": 9.628777503967285, |
| "learning_rate": 5.140231281379345e-05, |
| "loss": 0.2695, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5151515151515151, |
| "grad_norm": 13.074538230895996, |
| "learning_rate": 5.070122537117812e-05, |
| "loss": 0.3633, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 8.415633201599121, |
| "learning_rate": 5e-05, |
| "loss": 0.4395, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 16.27584457397461, |
| "learning_rate": 4.929877462882189e-05, |
| "loss": 0.416, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5281385281385281, |
| "grad_norm": 5.963536262512207, |
| "learning_rate": 4.859768718620656e-05, |
| "loss": 0.3633, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5324675324675324, |
| "grad_norm": 17.088741302490234, |
| "learning_rate": 4.7896875573586755e-05, |
| "loss": 0.3672, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5367965367965368, |
| "grad_norm": 16.568267822265625, |
| "learning_rate": 4.7196477638140404e-05, |
| "loss": 0.4121, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5411255411255411, |
| "grad_norm": 15.208264350891113, |
| "learning_rate": 4.649663114567663e-05, |
| "loss": 0.4082, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 11.11156177520752, |
| "learning_rate": 4.579747375353763e-05, |
| "loss": 0.375, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5497835497835498, |
| "grad_norm": 15.04819107055664, |
| "learning_rate": 4.509914298352197e-05, |
| "loss": 0.3594, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5541125541125541, |
| "grad_norm": 6.2651286125183105, |
| "learning_rate": 4.4401776194834613e-05, |
| "loss": 0.2793, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5584415584415584, |
| "grad_norm": 9.082189559936523, |
| "learning_rate": 4.370551055706874e-05, |
| "loss": 0.3906, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5627705627705628, |
| "grad_norm": 7.258723258972168, |
| "learning_rate": 4.3010483023225045e-05, |
| "loss": 0.3086, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5670995670995671, |
| "grad_norm": 9.980514526367188, |
| "learning_rate": 4.231683030277349e-05, |
| "loss": 0.4043, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 13.367783546447754, |
| "learning_rate": 4.162468883476319e-05, |
| "loss": 0.3223, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5757575757575758, |
| "grad_norm": 5.13516902923584, |
| "learning_rate": 4.093419476098509e-05, |
| "loss": 0.3027, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5800865800865801, |
| "grad_norm": 6.814506530761719, |
| "learning_rate": 4.0245483899193595e-05, |
| "loss": 0.4395, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5844155844155844, |
| "grad_norm": 9.574424743652344, |
| "learning_rate": 3.955869171639152e-05, |
| "loss": 0.3027, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5887445887445888, |
| "grad_norm": 6.566686630249023, |
| "learning_rate": 3.887395330218429e-05, |
| "loss": 0.3262, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5930735930735931, |
| "grad_norm": 14.991905212402344, |
| "learning_rate": 3.81914033422083e-05, |
| "loss": 0.3926, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5974025974025974, |
| "grad_norm": 6.989403247833252, |
| "learning_rate": 3.7511176091638653e-05, |
| "loss": 0.416, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6017316017316018, |
| "grad_norm": 99.71111297607422, |
| "learning_rate": 3.683340534878176e-05, |
| "loss": 0.3555, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 7.185469150543213, |
| "learning_rate": 3.6158224428757535e-05, |
| "loss": 0.3555, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6103896103896104, |
| "grad_norm": 48.649051666259766, |
| "learning_rate": 3.5485766137276894e-05, |
| "loss": 0.3906, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6147186147186147, |
| "grad_norm": 8.177955627441406, |
| "learning_rate": 3.4816162744519263e-05, |
| "loss": 0.4043, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 7.434495449066162, |
| "learning_rate": 3.4149545959115605e-05, |
| "loss": 0.3867, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6233766233766234, |
| "grad_norm": 10.178008079528809, |
| "learning_rate": 3.3486046902241664e-05, |
| "loss": 0.4141, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6277056277056277, |
| "grad_norm": 6.520373821258545, |
| "learning_rate": 3.282579608182694e-05, |
| "loss": 0.3301, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6320346320346321, |
| "grad_norm": 15.099567413330078, |
| "learning_rate": 3.216892336688435e-05, |
| "loss": 0.3496, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 14.876191139221191, |
| "learning_rate": 3.151555796196525e-05, |
| "loss": 0.3691, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6406926406926406, |
| "grad_norm": 10.065027236938477, |
| "learning_rate": 3.086582838174551e-05, |
| "loss": 0.3379, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.645021645021645, |
| "grad_norm": 7.905645370483398, |
| "learning_rate": 3.021986242574707e-05, |
| "loss": 0.4004, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 10.962891578674316, |
| "learning_rate": 2.9577787153200197e-05, |
| "loss": 0.4141, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6536796536796536, |
| "grad_norm": 6.561282157897949, |
| "learning_rate": 2.893972885805148e-05, |
| "loss": 0.4844, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.658008658008658, |
| "grad_norm": 15.016473770141602, |
| "learning_rate": 2.8305813044122097e-05, |
| "loss": 0.3418, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6623376623376623, |
| "grad_norm": 8.82343864440918, |
| "learning_rate": 2.7676164400421862e-05, |
| "loss": 0.3438, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 6.84242582321167, |
| "learning_rate": 2.705090677662311e-05, |
| "loss": 0.3438, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.670995670995671, |
| "grad_norm": 7.3810930252075195, |
| "learning_rate": 2.6430163158700115e-05, |
| "loss": 0.3906, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6753246753246753, |
| "grad_norm": 8.569120407104492, |
| "learning_rate": 2.581405564473801e-05, |
| "loss": 0.4004, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6796536796536796, |
| "grad_norm": 13.025605201721191, |
| "learning_rate": 2.5202705420916627e-05, |
| "loss": 0.373, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.683982683982684, |
| "grad_norm": 8.868095397949219, |
| "learning_rate": 2.459623273767354e-05, |
| "loss": 0.4199, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6883116883116883, |
| "grad_norm": 6.385418891906738, |
| "learning_rate": 2.3994756886051268e-05, |
| "loss": 0.3203, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6926406926406926, |
| "grad_norm": 8.916970252990723, |
| "learning_rate": 2.3398396174233178e-05, |
| "loss": 0.2949, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.696969696969697, |
| "grad_norm": 9.499614715576172, |
| "learning_rate": 2.280726790427258e-05, |
| "loss": 0.2832, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7012987012987013, |
| "grad_norm": 14.341413497924805, |
| "learning_rate": 2.2221488349019903e-05, |
| "loss": 0.3125, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7056277056277056, |
| "grad_norm": 6.781824111938477, |
| "learning_rate": 2.164117272925221e-05, |
| "loss": 0.332, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.70995670995671, |
| "grad_norm": 44.00325393676758, |
| "learning_rate": 2.1066435191009715e-05, |
| "loss": 0.4141, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 13.864439010620117, |
| "learning_rate": 2.0497388783143602e-05, |
| "loss": 0.4199, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7186147186147186, |
| "grad_norm": 8.958086967468262, |
| "learning_rate": 1.9934145435079702e-05, |
| "loss": 0.3535, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7229437229437229, |
| "grad_norm": 4.822065830230713, |
| "learning_rate": 1.9376815934802496e-05, |
| "loss": 0.3828, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 8.322967529296875, |
| "learning_rate": 1.8825509907063327e-05, |
| "loss": 0.4258, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7316017316017316, |
| "grad_norm": 13.969364166259766, |
| "learning_rate": 1.8280335791817733e-05, |
| "loss": 0.3672, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7359307359307359, |
| "grad_norm": 6.8569722175598145, |
| "learning_rate": 1.774140082289563e-05, |
| "loss": 0.3789, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7402597402597403, |
| "grad_norm": 6.253945827484131, |
| "learning_rate": 1.7208811006908798e-05, |
| "loss": 0.3379, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7445887445887446, |
| "grad_norm": 5.97335958480835, |
| "learning_rate": 1.6682671102399805e-05, |
| "loss": 0.3672, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7489177489177489, |
| "grad_norm": 21.618064880371094, |
| "learning_rate": 1.6163084599236278e-05, |
| "loss": 0.4297, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7532467532467533, |
| "grad_norm": 6.355106353759766, |
| "learning_rate": 1.5650153698254916e-05, |
| "loss": 0.2969, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 11.884471893310547, |
| "learning_rate": 1.5143979291158838e-05, |
| "loss": 0.4531, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 8.2210054397583, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 0.3535, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7662337662337663, |
| "grad_norm": 5.43289852142334, |
| "learning_rate": 1.4152296860958642e-05, |
| "loss": 0.3281, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7705627705627706, |
| "grad_norm": 13.447378158569336, |
| "learning_rate": 1.3666983898298657e-05, |
| "loss": 0.4336, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7748917748917749, |
| "grad_norm": 9.235896110534668, |
| "learning_rate": 1.3188817512044544e-05, |
| "loss": 0.3516, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 6.7336602210998535, |
| "learning_rate": 1.2717891755841722e-05, |
| "loss": 0.3223, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7835497835497836, |
| "grad_norm": 6.973309516906738, |
| "learning_rate": 1.225429925912921e-05, |
| "loss": 0.3301, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7878787878787878, |
| "grad_norm": 30.55255699157715, |
| "learning_rate": 1.1798131208919627e-05, |
| "loss": 0.3027, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7922077922077922, |
| "grad_norm": 10.4804048538208, |
| "learning_rate": 1.134947733186315e-05, |
| "loss": 0.3848, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.7965367965367965, |
| "grad_norm": 5.046676158905029, |
| "learning_rate": 1.090842587659851e-05, |
| "loss": 0.4805, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8008658008658008, |
| "grad_norm": 8.961450576782227, |
| "learning_rate": 1.047506359639483e-05, |
| "loss": 0.3398, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8051948051948052, |
| "grad_norm": 10.69762134552002, |
| "learning_rate": 1.004947573208756e-05, |
| "loss": 0.3945, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 9.194847106933594, |
| "learning_rate": 9.63174599531188e-06, |
| "loss": 0.3379, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8138528138528138, |
| "grad_norm": 7.735859394073486, |
| "learning_rate": 9.221956552036992e-06, |
| "loss": 0.373, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 7.634536266326904, |
| "learning_rate": 8.820188006404268e-06, |
| "loss": 0.3418, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8225108225108225, |
| "grad_norm": 12.410811424255371, |
| "learning_rate": 8.426519384872733e-06, |
| "loss": 0.3125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8268398268398268, |
| "grad_norm": 44.61256408691406, |
| "learning_rate": 8.041028120674893e-06, |
| "loss": 0.3418, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8311688311688312, |
| "grad_norm": 8.871509552001953, |
| "learning_rate": 7.663790038585793e-06, |
| "loss": 0.4141, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8354978354978355, |
| "grad_norm": 7.508159160614014, |
| "learning_rate": 7.2948793400086315e-06, |
| "loss": 0.4004, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8398268398268398, |
| "grad_norm": 33.30258560180664, |
| "learning_rate": 6.934368588379553e-06, |
| "loss": 0.3574, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8441558441558441, |
| "grad_norm": 21.46922492980957, |
| "learning_rate": 6.582328694894729e-06, |
| "loss": 0.4023, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 31.504789352416992, |
| "learning_rate": 6.238828904562316e-06, |
| "loss": 0.377, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8528138528138528, |
| "grad_norm": 9.979594230651855, |
| "learning_rate": 5.903936782582253e-06, |
| "loss": 0.4668, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 7.933041095733643, |
| "learning_rate": 5.577718201056392e-06, |
| "loss": 0.4102, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8614718614718615, |
| "grad_norm": 5.786583423614502, |
| "learning_rate": 5.260237326031697e-06, |
| "loss": 0.4414, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8658008658008658, |
| "grad_norm": 5.894907474517822, |
| "learning_rate": 4.951556604879048e-06, |
| "loss": 0.3848, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8701298701298701, |
| "grad_norm": 32.59992599487305, |
| "learning_rate": 4.651736754009972e-06, |
| "loss": 0.3672, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8744588744588745, |
| "grad_norm": 10.215508460998535, |
| "learning_rate": 4.360836746934055e-06, |
| "loss": 0.3496, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8787878787878788, |
| "grad_norm": 10.185182571411133, |
| "learning_rate": 4.078913802658946e-06, |
| "loss": 0.4062, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8831168831168831, |
| "grad_norm": 5.9914021492004395, |
| "learning_rate": 3.8060233744356633e-06, |
| "loss": 0.3633, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8874458874458875, |
| "grad_norm": 18.541730880737305, |
| "learning_rate": 3.542219138851094e-06, |
| "loss": 0.3691, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8917748917748918, |
| "grad_norm": 5.432623863220215, |
| "learning_rate": 3.2875529852700147e-06, |
| "loss": 0.4297, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.8961038961038961, |
| "grad_norm": 15.46761417388916, |
| "learning_rate": 3.0420750056286195e-06, |
| "loss": 0.3379, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9004329004329005, |
| "grad_norm": 6.300500392913818, |
| "learning_rate": 2.8058334845816213e-06, |
| "loss": 0.2754, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 8.409278869628906, |
| "learning_rate": 2.5788748900048676e-06, |
| "loss": 0.293, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 12.198765754699707, |
| "learning_rate": 2.361243863855184e-06, |
| "loss": 0.3789, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9134199134199135, |
| "grad_norm": 6.002485275268555, |
| "learning_rate": 2.152983213389559e-06, |
| "loss": 0.3652, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9177489177489178, |
| "grad_norm": 5.1724090576171875, |
| "learning_rate": 1.9541339027450256e-06, |
| "loss": 0.3125, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.922077922077922, |
| "grad_norm": 19.717180252075195, |
| "learning_rate": 1.7647350448812106e-06, |
| "loss": 0.3516, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9264069264069265, |
| "grad_norm": 17.20496368408203, |
| "learning_rate": 1.584823893886933e-06, |
| "loss": 0.3789, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9307359307359307, |
| "grad_norm": 6.270045280456543, |
| "learning_rate": 1.4144358376524503e-06, |
| "loss": 0.3672, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.935064935064935, |
| "grad_norm": 10.227100372314453, |
| "learning_rate": 1.2536043909088191e-06, |
| "loss": 0.4395, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9393939393939394, |
| "grad_norm": 8.13603401184082, |
| "learning_rate": 1.10236118863562e-06, |
| "loss": 0.3418, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9437229437229437, |
| "grad_norm": 7.209720611572266, |
| "learning_rate": 9.607359798384785e-07, |
| "loss": 0.3945, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.948051948051948, |
| "grad_norm": 8.675386428833008, |
| "learning_rate": 8.287566216975795e-07, |
| "loss": 0.3242, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 13.124395370483398, |
| "learning_rate": 7.064490740882057e-07, |
| "loss": 0.3398, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9567099567099567, |
| "grad_norm": 5.071322917938232, |
| "learning_rate": 5.938373944745612e-07, |
| "loss": 0.2812, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.961038961038961, |
| "grad_norm": 6.736144542694092, |
| "learning_rate": 4.909437331777179e-07, |
| "loss": 0.3906, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9653679653679653, |
| "grad_norm": 6.963472366333008, |
| "learning_rate": 3.9778832901876675e-07, |
| "loss": 0.3945, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 11.631999015808105, |
| "learning_rate": 3.143895053378698e-07, |
| "loss": 0.418, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.974025974025974, |
| "grad_norm": 6.837718963623047, |
| "learning_rate": 2.407636663901591e-07, |
| "loss": 0.334, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9783549783549783, |
| "grad_norm": 71.07975006103516, |
| "learning_rate": 1.7692529411904578e-07, |
| "loss": 0.3926, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9826839826839827, |
| "grad_norm": 9.980896949768066, |
| "learning_rate": 1.228869453076986e-07, |
| "loss": 0.3398, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.987012987012987, |
| "grad_norm": 34.9681282043457, |
| "learning_rate": 7.865924910916977e-08, |
| "loss": 0.4414, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9913419913419913, |
| "grad_norm": 23.342512130737305, |
| "learning_rate": 4.4250904955656095e-08, |
| "loss": 0.4336, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.9956709956709957, |
| "grad_norm": 15.746844291687012, |
| "learning_rate": 1.9668680847356735e-08, |
| "loss": 0.375, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 19.062803268432617, |
| "learning_rate": 4.917412021249179e-09, |
| "loss": 0.3711, |
| "step": 231 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 231, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 116, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.922842866902368e+18, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|