| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 26790, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011198208286674132, |
| "grad_norm": 4.2997212409973145, |
| "learning_rate": 4.9983202687569994e-05, |
| "loss": 3.1991, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0022396416573348264, |
| "grad_norm": 3.1073286533355713, |
| "learning_rate": 4.9964539007092206e-05, |
| "loss": 2.9692, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0033594624860022394, |
| "grad_norm": 1.520981788635254, |
| "learning_rate": 4.994587532661441e-05, |
| "loss": 3.2009, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.004479283314669653, |
| "grad_norm": 4.404178142547607, |
| "learning_rate": 4.9927211646136616e-05, |
| "loss": 2.8, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.005599104143337066, |
| "grad_norm": 6.26295280456543, |
| "learning_rate": 4.990854796565883e-05, |
| "loss": 3.2157, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.006718924972004479, |
| "grad_norm": 2.1947414875030518, |
| "learning_rate": 4.988988428518104e-05, |
| "loss": 3.358, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.007838745800671893, |
| "grad_norm": 2.184293746948242, |
| "learning_rate": 4.987122060470325e-05, |
| "loss": 3.1155, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.008958566629339306, |
| "grad_norm": 2.162921905517578, |
| "learning_rate": 4.9852556924225456e-05, |
| "loss": 2.113, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.010078387458006719, |
| "grad_norm": 6.238914966583252, |
| "learning_rate": 4.983389324374767e-05, |
| "loss": 2.9041, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.011198208286674132, |
| "grad_norm": 2.3711066246032715, |
| "learning_rate": 4.981522956326988e-05, |
| "loss": 2.6368, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.012318029115341545, |
| "grad_norm": 6.883894443511963, |
| "learning_rate": 4.979656588279209e-05, |
| "loss": 3.1278, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.013437849944008958, |
| "grad_norm": 2.7835214138031006, |
| "learning_rate": 4.9777902202314296e-05, |
| "loss": 3.2703, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.014557670772676373, |
| "grad_norm": 2.707707643508911, |
| "learning_rate": 4.975923852183651e-05, |
| "loss": 3.2012, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.015677491601343786, |
| "grad_norm": 7.556955337524414, |
| "learning_rate": 4.974057484135872e-05, |
| "loss": 3.0857, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0167973124300112, |
| "grad_norm": 9.100250244140625, |
| "learning_rate": 4.972191116088093e-05, |
| "loss": 2.8656, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01791713325867861, |
| "grad_norm": 6.301916122436523, |
| "learning_rate": 4.9703247480403136e-05, |
| "loss": 2.6866, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.019036954087346025, |
| "grad_norm": 2.9455721378326416, |
| "learning_rate": 4.968458379992535e-05, |
| "loss": 2.4591, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.020156774916013438, |
| "grad_norm": 2.6669416427612305, |
| "learning_rate": 4.966592011944756e-05, |
| "loss": 2.73, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 7.3631086349487305, |
| "learning_rate": 4.964725643896977e-05, |
| "loss": 2.5342, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.022396416573348264, |
| "grad_norm": 2.861095905303955, |
| "learning_rate": 4.9628592758491976e-05, |
| "loss": 3.152, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.023516237402015677, |
| "grad_norm": 6.908902645111084, |
| "learning_rate": 4.960992907801419e-05, |
| "loss": 2.4202, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.02463605823068309, |
| "grad_norm": 3.1885159015655518, |
| "learning_rate": 4.95912653975364e-05, |
| "loss": 2.6223, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.025755879059350503, |
| "grad_norm": 3.509582042694092, |
| "learning_rate": 4.957260171705861e-05, |
| "loss": 2.2328, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.026875699888017916, |
| "grad_norm": 7.881849765777588, |
| "learning_rate": 4.9553938036580816e-05, |
| "loss": 3.1846, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.027995520716685332, |
| "grad_norm": 6.826298236846924, |
| "learning_rate": 4.953527435610303e-05, |
| "loss": 2.5674, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.029115341545352745, |
| "grad_norm": 2.763533353805542, |
| "learning_rate": 4.951661067562523e-05, |
| "loss": 2.9195, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.030235162374020158, |
| "grad_norm": 2.4903926849365234, |
| "learning_rate": 4.9497946995147444e-05, |
| "loss": 2.8041, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.03135498320268757, |
| "grad_norm": 5.462828636169434, |
| "learning_rate": 4.9479283314669656e-05, |
| "loss": 2.7518, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.032474804031354984, |
| "grad_norm": 2.7415122985839844, |
| "learning_rate": 4.946061963419186e-05, |
| "loss": 2.2559, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0335946248600224, |
| "grad_norm": 3.042797803878784, |
| "learning_rate": 4.944195595371407e-05, |
| "loss": 2.3064, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03471444568868981, |
| "grad_norm": 7.439578533172607, |
| "learning_rate": 4.9423292273236284e-05, |
| "loss": 2.2869, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03583426651735722, |
| "grad_norm": 7.290367603302002, |
| "learning_rate": 4.9404628592758496e-05, |
| "loss": 2.4284, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.036954087346024636, |
| "grad_norm": 3.0886971950531006, |
| "learning_rate": 4.93859649122807e-05, |
| "loss": 2.8264, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.03807390817469205, |
| "grad_norm": 2.817957878112793, |
| "learning_rate": 4.936730123180291e-05, |
| "loss": 2.631, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.03919372900335946, |
| "grad_norm": 2.4056355953216553, |
| "learning_rate": 4.9348637551325124e-05, |
| "loss": 2.538, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.040313549832026875, |
| "grad_norm": 5.376706600189209, |
| "learning_rate": 4.9329973870847336e-05, |
| "loss": 2.8889, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04143337066069429, |
| "grad_norm": 2.869654893875122, |
| "learning_rate": 4.931131019036954e-05, |
| "loss": 2.4983, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 4.797430515289307, |
| "learning_rate": 4.929264650989175e-05, |
| "loss": 3.0132, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.043673012318029114, |
| "grad_norm": 7.970874786376953, |
| "learning_rate": 4.9273982829413964e-05, |
| "loss": 2.8745, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.04479283314669653, |
| "grad_norm": 5.253184795379639, |
| "learning_rate": 4.9255319148936176e-05, |
| "loss": 2.6653, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04591265397536394, |
| "grad_norm": 3.0611302852630615, |
| "learning_rate": 4.923665546845838e-05, |
| "loss": 2.5853, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.04703247480403135, |
| "grad_norm": 5.2951884269714355, |
| "learning_rate": 4.921799178798059e-05, |
| "loss": 2.5193, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.048152295632698766, |
| "grad_norm": 2.9463164806365967, |
| "learning_rate": 4.9199328107502804e-05, |
| "loss": 2.5797, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.04927211646136618, |
| "grad_norm": 3.1109468936920166, |
| "learning_rate": 4.9180664427025016e-05, |
| "loss": 2.7147, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05039193729003359, |
| "grad_norm": 4.468992710113525, |
| "learning_rate": 4.916200074654722e-05, |
| "loss": 2.5695, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.051511758118701005, |
| "grad_norm": 8.419249534606934, |
| "learning_rate": 4.914333706606943e-05, |
| "loss": 2.7006, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 6.9784722328186035, |
| "learning_rate": 4.912467338559164e-05, |
| "loss": 2.6604, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.05375139977603583, |
| "grad_norm": 3.6676979064941406, |
| "learning_rate": 4.9106009705113856e-05, |
| "loss": 2.5849, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.054871220604703244, |
| "grad_norm": 2.428481101989746, |
| "learning_rate": 4.908734602463606e-05, |
| "loss": 2.7081, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.055991041433370664, |
| "grad_norm": 4.069552898406982, |
| "learning_rate": 4.9068682344158266e-05, |
| "loss": 2.7358, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05711086226203808, |
| "grad_norm": 4.768444538116455, |
| "learning_rate": 4.905001866368048e-05, |
| "loss": 2.3977, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.05823068309070549, |
| "grad_norm": 7.9206342697143555, |
| "learning_rate": 4.903135498320269e-05, |
| "loss": 2.615, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0593505039193729, |
| "grad_norm": 4.9245476722717285, |
| "learning_rate": 4.90126913027249e-05, |
| "loss": 2.1996, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.060470324748040316, |
| "grad_norm": 3.498934745788574, |
| "learning_rate": 4.8994027622247106e-05, |
| "loss": 2.4096, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.06159014557670773, |
| "grad_norm": 2.8137447834014893, |
| "learning_rate": 4.897536394176932e-05, |
| "loss": 2.3293, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.06270996640537514, |
| "grad_norm": 2.2534000873565674, |
| "learning_rate": 4.895670026129153e-05, |
| "loss": 2.5449, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 5.295638561248779, |
| "learning_rate": 4.893803658081374e-05, |
| "loss": 2.4056, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.06494960806270997, |
| "grad_norm": 2.56434965133667, |
| "learning_rate": 4.8919372900335946e-05, |
| "loss": 2.4592, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.06606942889137737, |
| "grad_norm": 2.1598501205444336, |
| "learning_rate": 4.890070921985816e-05, |
| "loss": 2.3764, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0671892497200448, |
| "grad_norm": 2.71020245552063, |
| "learning_rate": 4.888204553938037e-05, |
| "loss": 2.8345, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0683090705487122, |
| "grad_norm": 6.304147243499756, |
| "learning_rate": 4.886338185890258e-05, |
| "loss": 2.6056, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.06942889137737962, |
| "grad_norm": 6.749722003936768, |
| "learning_rate": 4.8844718178424786e-05, |
| "loss": 2.1934, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.07054871220604703, |
| "grad_norm": 7.15731954574585, |
| "learning_rate": 4.8826054497947e-05, |
| "loss": 2.3531, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.07166853303471445, |
| "grad_norm": 2.1473021507263184, |
| "learning_rate": 4.880739081746921e-05, |
| "loss": 2.6166, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.07278835386338185, |
| "grad_norm": 2.7124907970428467, |
| "learning_rate": 4.878872713699142e-05, |
| "loss": 2.3902, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.07390817469204927, |
| "grad_norm": 11.411026954650879, |
| "learning_rate": 4.8770063456513626e-05, |
| "loss": 2.4434, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.07502799552071669, |
| "grad_norm": 2.7030553817749023, |
| "learning_rate": 4.875139977603584e-05, |
| "loss": 2.3203, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0761478163493841, |
| "grad_norm": 8.861196517944336, |
| "learning_rate": 4.873273609555804e-05, |
| "loss": 2.5645, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.07726763717805152, |
| "grad_norm": 4.628374099731445, |
| "learning_rate": 4.871407241508026e-05, |
| "loss": 2.8101, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.07838745800671892, |
| "grad_norm": 9.408851623535156, |
| "learning_rate": 4.8695408734602466e-05, |
| "loss": 2.2799, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.07950727883538634, |
| "grad_norm": 9.125829696655273, |
| "learning_rate": 4.867674505412468e-05, |
| "loss": 2.6383, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.08062709966405375, |
| "grad_norm": 3.9296653270721436, |
| "learning_rate": 4.865808137364688e-05, |
| "loss": 2.0646, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.08174692049272117, |
| "grad_norm": 5.768338203430176, |
| "learning_rate": 4.8639417693169094e-05, |
| "loss": 2.3089, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.08286674132138858, |
| "grad_norm": 2.457167148590088, |
| "learning_rate": 4.8620754012691306e-05, |
| "loss": 2.2855, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.083986562150056, |
| "grad_norm": 8.373284339904785, |
| "learning_rate": 4.860209033221351e-05, |
| "loss": 2.2884, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 2.7077553272247314, |
| "learning_rate": 4.858342665173572e-05, |
| "loss": 2.5055, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.08622620380739082, |
| "grad_norm": 7.299142360687256, |
| "learning_rate": 4.8564762971257934e-05, |
| "loss": 2.2387, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.08734602463605823, |
| "grad_norm": 2.6765339374542236, |
| "learning_rate": 4.8546099290780146e-05, |
| "loss": 2.6105, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.08846584546472565, |
| "grad_norm": 3.2736477851867676, |
| "learning_rate": 4.852743561030235e-05, |
| "loss": 2.3101, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.08958566629339305, |
| "grad_norm": 8.751072883605957, |
| "learning_rate": 4.850877192982456e-05, |
| "loss": 2.7699, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09070548712206047, |
| "grad_norm": 2.8005926609039307, |
| "learning_rate": 4.8490108249346774e-05, |
| "loss": 2.5564, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.09182530795072788, |
| "grad_norm": 5.777060031890869, |
| "learning_rate": 4.8471444568868986e-05, |
| "loss": 2.5826, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0929451287793953, |
| "grad_norm": 5.9840803146362305, |
| "learning_rate": 4.845278088839119e-05, |
| "loss": 2.7461, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0940649496080627, |
| "grad_norm": 5.613245010375977, |
| "learning_rate": 4.84341172079134e-05, |
| "loss": 2.2355, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.09518477043673013, |
| "grad_norm": 2.8910045623779297, |
| "learning_rate": 4.8415453527435614e-05, |
| "loss": 2.5613, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.09630459126539753, |
| "grad_norm": 2.2605295181274414, |
| "learning_rate": 4.8396789846957826e-05, |
| "loss": 2.3108, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.09742441209406495, |
| "grad_norm": 2.1678943634033203, |
| "learning_rate": 4.837812616648003e-05, |
| "loss": 2.5849, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.09854423292273236, |
| "grad_norm": 3.4123549461364746, |
| "learning_rate": 4.835946248600224e-05, |
| "loss": 2.5897, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.09966405375139978, |
| "grad_norm": 2.3803961277008057, |
| "learning_rate": 4.834079880552445e-05, |
| "loss": 2.3515, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.10078387458006718, |
| "grad_norm": 2.3979332447052, |
| "learning_rate": 4.8322135125046666e-05, |
| "loss": 2.5587, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1019036954087346, |
| "grad_norm": 4.127903461456299, |
| "learning_rate": 4.830347144456887e-05, |
| "loss": 1.8338, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.10302351623740201, |
| "grad_norm": 3.460048198699951, |
| "learning_rate": 4.828480776409108e-05, |
| "loss": 3.0448, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.10414333706606943, |
| "grad_norm": 2.7010154724121094, |
| "learning_rate": 4.826614408361329e-05, |
| "loss": 2.5111, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 2.2253668308258057, |
| "learning_rate": 4.8247480403135506e-05, |
| "loss": 2.1311, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 2.3751561641693115, |
| "learning_rate": 4.822881672265771e-05, |
| "loss": 2.3021, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.10750279955207166, |
| "grad_norm": 4.968678951263428, |
| "learning_rate": 4.8210153042179916e-05, |
| "loss": 2.7204, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.10862262038073908, |
| "grad_norm": 2.429736375808716, |
| "learning_rate": 4.819148936170213e-05, |
| "loss": 2.4528, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.10974244120940649, |
| "grad_norm": 8.118196487426758, |
| "learning_rate": 4.817282568122434e-05, |
| "loss": 2.8347, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.11086226203807391, |
| "grad_norm": 9.025050163269043, |
| "learning_rate": 4.815416200074655e-05, |
| "loss": 2.2329, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.11198208286674133, |
| "grad_norm": 2.610757827758789, |
| "learning_rate": 4.8135498320268756e-05, |
| "loss": 2.0086, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11310190369540873, |
| "grad_norm": 3.6983273029327393, |
| "learning_rate": 4.811683463979097e-05, |
| "loss": 2.6589, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.11422172452407615, |
| "grad_norm": 6.618379592895508, |
| "learning_rate": 4.809817095931318e-05, |
| "loss": 2.2701, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.11534154535274356, |
| "grad_norm": 2.155717372894287, |
| "learning_rate": 4.807950727883539e-05, |
| "loss": 2.3469, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.11646136618141098, |
| "grad_norm": 2.49660325050354, |
| "learning_rate": 4.8060843598357596e-05, |
| "loss": 2.1965, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.11758118701007839, |
| "grad_norm": 9.351076126098633, |
| "learning_rate": 4.804217991787981e-05, |
| "loss": 3.0117, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1187010078387458, |
| "grad_norm": 4.395270824432373, |
| "learning_rate": 4.802351623740202e-05, |
| "loss": 2.091, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.11982082866741321, |
| "grad_norm": 2.891835927963257, |
| "learning_rate": 4.800485255692423e-05, |
| "loss": 2.4642, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.12094064949608063, |
| "grad_norm": 3.1243512630462646, |
| "learning_rate": 4.7986188876446436e-05, |
| "loss": 2.3218, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.12206047032474804, |
| "grad_norm": 4.109086513519287, |
| "learning_rate": 4.796752519596865e-05, |
| "loss": 2.2233, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.12318029115341546, |
| "grad_norm": 8.871736526489258, |
| "learning_rate": 4.794886151549086e-05, |
| "loss": 2.6833, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.12430011198208286, |
| "grad_norm": 2.5556600093841553, |
| "learning_rate": 4.793019783501307e-05, |
| "loss": 2.3963, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.12541993281075028, |
| "grad_norm": 2.431551694869995, |
| "learning_rate": 4.7911534154535276e-05, |
| "loss": 2.2135, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.1265397536394177, |
| "grad_norm": 6.504064559936523, |
| "learning_rate": 4.789287047405749e-05, |
| "loss": 2.0713, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 8.992396354675293, |
| "learning_rate": 4.787420679357969e-05, |
| "loss": 2.3186, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.12877939529675253, |
| "grad_norm": 7.831729888916016, |
| "learning_rate": 4.785554311310191e-05, |
| "loss": 2.5546, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.12989921612541994, |
| "grad_norm": 2.7570407390594482, |
| "learning_rate": 4.7836879432624116e-05, |
| "loss": 2.5008, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.13101903695408734, |
| "grad_norm": 10.529077529907227, |
| "learning_rate": 4.781821575214633e-05, |
| "loss": 2.3034, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.13213885778275475, |
| "grad_norm": 7.510254383087158, |
| "learning_rate": 4.779955207166853e-05, |
| "loss": 2.5347, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.13325867861142218, |
| "grad_norm": 6.021450519561768, |
| "learning_rate": 4.7780888391190744e-05, |
| "loss": 2.3394, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.1343784994400896, |
| "grad_norm": 2.8167929649353027, |
| "learning_rate": 4.7762224710712956e-05, |
| "loss": 2.4706, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.135498320268757, |
| "grad_norm": 2.655770778656006, |
| "learning_rate": 4.774356103023516e-05, |
| "loss": 2.5604, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.1366181410974244, |
| "grad_norm": 5.053645610809326, |
| "learning_rate": 4.772489734975737e-05, |
| "loss": 2.468, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.13773796192609183, |
| "grad_norm": 7.558941841125488, |
| "learning_rate": 4.7706233669279584e-05, |
| "loss": 2.5013, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.13885778275475924, |
| "grad_norm": 7.665897369384766, |
| "learning_rate": 4.7687569988801796e-05, |
| "loss": 2.1974, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.13997760358342665, |
| "grad_norm": 9.41537094116211, |
| "learning_rate": 4.7668906308324e-05, |
| "loss": 2.2467, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.14109742441209405, |
| "grad_norm": 9.68034839630127, |
| "learning_rate": 4.765024262784621e-05, |
| "loss": 2.5209, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.1422172452407615, |
| "grad_norm": 6.756275177001953, |
| "learning_rate": 4.7631578947368424e-05, |
| "loss": 2.4288, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.1433370660694289, |
| "grad_norm": 7.971835613250732, |
| "learning_rate": 4.7612915266890636e-05, |
| "loss": 2.059, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.1444568868980963, |
| "grad_norm": 7.094338893890381, |
| "learning_rate": 4.759425158641284e-05, |
| "loss": 2.7495, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1455767077267637, |
| "grad_norm": 6.793420791625977, |
| "learning_rate": 4.757558790593505e-05, |
| "loss": 1.8302, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.14669652855543114, |
| "grad_norm": 6.490263938903809, |
| "learning_rate": 4.7556924225457264e-05, |
| "loss": 2.4643, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.14781634938409854, |
| "grad_norm": 2.3416640758514404, |
| "learning_rate": 4.7538260544979476e-05, |
| "loss": 2.4583, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 6.409727096557617, |
| "learning_rate": 4.751959686450168e-05, |
| "loss": 1.7198, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.15005599104143338, |
| "grad_norm": 3.995352029800415, |
| "learning_rate": 4.750093318402389e-05, |
| "loss": 2.1065, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.1511758118701008, |
| "grad_norm": 4.906558036804199, |
| "learning_rate": 4.74822695035461e-05, |
| "loss": 2.3096, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1522956326987682, |
| "grad_norm": 6.388749122619629, |
| "learning_rate": 4.7463605823068316e-05, |
| "loss": 1.9702, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.1534154535274356, |
| "grad_norm": 2.246985673904419, |
| "learning_rate": 4.744494214259052e-05, |
| "loss": 2.1969, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.15453527435610304, |
| "grad_norm": 6.625758647918701, |
| "learning_rate": 4.742627846211273e-05, |
| "loss": 2.4131, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.15565509518477044, |
| "grad_norm": 2.558464527130127, |
| "learning_rate": 4.740761478163494e-05, |
| "loss": 2.5493, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.15677491601343785, |
| "grad_norm": 4.546473979949951, |
| "learning_rate": 4.7388951101157156e-05, |
| "loss": 2.8893, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 2.1784298419952393, |
| "learning_rate": 4.737028742067936e-05, |
| "loss": 2.3148, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.1590145576707727, |
| "grad_norm": 2.0193071365356445, |
| "learning_rate": 4.735162374020157e-05, |
| "loss": 2.6046, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.1601343784994401, |
| "grad_norm": 2.74428653717041, |
| "learning_rate": 4.733296005972378e-05, |
| "loss": 2.6196, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.1612541993281075, |
| "grad_norm": 10.364500045776367, |
| "learning_rate": 4.731429637924599e-05, |
| "loss": 2.473, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.1623740201567749, |
| "grad_norm": 7.268424034118652, |
| "learning_rate": 4.72956326987682e-05, |
| "loss": 2.9277, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.16349384098544234, |
| "grad_norm": 7.980413913726807, |
| "learning_rate": 4.7276969018290406e-05, |
| "loss": 2.4812, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.16461366181410975, |
| "grad_norm": 4.977534770965576, |
| "learning_rate": 4.725830533781262e-05, |
| "loss": 2.3111, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.16573348264277715, |
| "grad_norm": 2.0615103244781494, |
| "learning_rate": 4.723964165733483e-05, |
| "loss": 2.4883, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.16685330347144456, |
| "grad_norm": 2.4058101177215576, |
| "learning_rate": 4.722097797685704e-05, |
| "loss": 2.2982, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.167973124300112, |
| "grad_norm": 5.251309871673584, |
| "learning_rate": 4.7202314296379246e-05, |
| "loss": 2.186, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1690929451287794, |
| "grad_norm": 4.544527053833008, |
| "learning_rate": 4.718365061590146e-05, |
| "loss": 1.8374, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 8.125224113464355, |
| "learning_rate": 4.716498693542367e-05, |
| "loss": 2.3848, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.1713325867861142, |
| "grad_norm": 2.274805784225464, |
| "learning_rate": 4.714632325494588e-05, |
| "loss": 2.1904, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.17245240761478164, |
| "grad_norm": 7.999364376068115, |
| "learning_rate": 4.7127659574468086e-05, |
| "loss": 2.4646, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.17357222844344905, |
| "grad_norm": 4.198975086212158, |
| "learning_rate": 4.71089958939903e-05, |
| "loss": 2.2459, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.17469204927211646, |
| "grad_norm": 8.396190643310547, |
| "learning_rate": 4.70903322135125e-05, |
| "loss": 2.75, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.17581187010078386, |
| "grad_norm": 2.833841562271118, |
| "learning_rate": 4.707166853303472e-05, |
| "loss": 2.5594, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.1769316909294513, |
| "grad_norm": 2.6558115482330322, |
| "learning_rate": 4.7053004852556926e-05, |
| "loss": 2.2972, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.1780515117581187, |
| "grad_norm": 7.599963188171387, |
| "learning_rate": 4.703434117207914e-05, |
| "loss": 2.5468, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1791713325867861, |
| "grad_norm": 2.6800622940063477, |
| "learning_rate": 4.701567749160134e-05, |
| "loss": 2.4275, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.18029115341545351, |
| "grad_norm": 9.46832275390625, |
| "learning_rate": 4.699701381112356e-05, |
| "loss": 2.2797, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.18141097424412095, |
| "grad_norm": 2.8210015296936035, |
| "learning_rate": 4.6978350130645766e-05, |
| "loss": 2.3262, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.18253079507278835, |
| "grad_norm": 6.384908676147461, |
| "learning_rate": 4.695968645016798e-05, |
| "loss": 1.852, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.18365061590145576, |
| "grad_norm": 11.738371849060059, |
| "learning_rate": 4.694102276969018e-05, |
| "loss": 2.4613, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.18477043673012317, |
| "grad_norm": 2.614558696746826, |
| "learning_rate": 4.6922359089212394e-05, |
| "loss": 2.3428, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.1858902575587906, |
| "grad_norm": 3.371556282043457, |
| "learning_rate": 4.6903695408734606e-05, |
| "loss": 2.8777, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.187010078387458, |
| "grad_norm": 2.5485849380493164, |
| "learning_rate": 4.688503172825681e-05, |
| "loss": 2.1301, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.1881298992161254, |
| "grad_norm": 4.2750935554504395, |
| "learning_rate": 4.686636804777902e-05, |
| "loss": 2.5627, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.18924972004479285, |
| "grad_norm": 2.555360794067383, |
| "learning_rate": 4.6847704367301234e-05, |
| "loss": 2.19, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.19036954087346025, |
| "grad_norm": 6.980922698974609, |
| "learning_rate": 4.6829040686823446e-05, |
| "loss": 2.2301, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 4.802427768707275, |
| "learning_rate": 4.681037700634565e-05, |
| "loss": 2.1102, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.19260918253079506, |
| "grad_norm": 6.685520172119141, |
| "learning_rate": 4.679171332586786e-05, |
| "loss": 2.3725, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1937290033594625, |
| "grad_norm": 2.2345573902130127, |
| "learning_rate": 4.6773049645390074e-05, |
| "loss": 2.1639, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.1948488241881299, |
| "grad_norm": 2.531062364578247, |
| "learning_rate": 4.6754385964912286e-05, |
| "loss": 1.8379, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.1959686450167973, |
| "grad_norm": 7.543485164642334, |
| "learning_rate": 4.673572228443449e-05, |
| "loss": 2.5206, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.19708846584546472, |
| "grad_norm": 4.684238910675049, |
| "learning_rate": 4.67170586039567e-05, |
| "loss": 2.1743, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.19820828667413215, |
| "grad_norm": 8.91139030456543, |
| "learning_rate": 4.669839492347891e-05, |
| "loss": 2.3441, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.19932810750279956, |
| "grad_norm": 2.0204806327819824, |
| "learning_rate": 4.6679731243001126e-05, |
| "loss": 2.3745, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.20044792833146696, |
| "grad_norm": 10.537651062011719, |
| "learning_rate": 4.666106756252333e-05, |
| "loss": 2.587, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.20156774916013437, |
| "grad_norm": 3.3336009979248047, |
| "learning_rate": 4.664240388204554e-05, |
| "loss": 2.7144, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2026875699888018, |
| "grad_norm": 3.2457361221313477, |
| "learning_rate": 4.662374020156775e-05, |
| "loss": 2.1114, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.2038073908174692, |
| "grad_norm": 6.266234874725342, |
| "learning_rate": 4.6605076521089966e-05, |
| "loss": 2.3444, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.20492721164613661, |
| "grad_norm": 5.921943187713623, |
| "learning_rate": 4.658641284061217e-05, |
| "loss": 2.2345, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.20604703247480402, |
| "grad_norm": 2.481746196746826, |
| "learning_rate": 4.656774916013438e-05, |
| "loss": 2.3925, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.20716685330347145, |
| "grad_norm": 6.096205711364746, |
| "learning_rate": 4.654908547965659e-05, |
| "loss": 2.4366, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.20828667413213886, |
| "grad_norm": 7.671387672424316, |
| "learning_rate": 4.65304217991788e-05, |
| "loss": 2.5033, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.20940649496080627, |
| "grad_norm": 4.001086711883545, |
| "learning_rate": 4.651175811870101e-05, |
| "loss": 2.0047, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 7.602363586425781, |
| "learning_rate": 4.649309443822322e-05, |
| "loss": 2.3823, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.2116461366181411, |
| "grad_norm": 5.483312129974365, |
| "learning_rate": 4.647443075774543e-05, |
| "loss": 2.4785, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 2.5652925968170166, |
| "learning_rate": 4.645576707726764e-05, |
| "loss": 2.1959, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.21388577827547592, |
| "grad_norm": 8.491823196411133, |
| "learning_rate": 4.643710339678985e-05, |
| "loss": 2.9472, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.21500559910414332, |
| "grad_norm": 5.945290565490723, |
| "learning_rate": 4.6418439716312056e-05, |
| "loss": 2.2415, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.21612541993281076, |
| "grad_norm": 4.045243263244629, |
| "learning_rate": 4.639977603583427e-05, |
| "loss": 2.4932, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.21724524076147816, |
| "grad_norm": 2.715601921081543, |
| "learning_rate": 4.638111235535648e-05, |
| "loss": 2.4262, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.21836506159014557, |
| "grad_norm": 3.0143299102783203, |
| "learning_rate": 4.636244867487869e-05, |
| "loss": 2.6019, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.21948488241881298, |
| "grad_norm": 9.742323875427246, |
| "learning_rate": 4.6343784994400896e-05, |
| "loss": 2.9155, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.2206047032474804, |
| "grad_norm": 5.9390788078308105, |
| "learning_rate": 4.632512131392311e-05, |
| "loss": 2.1963, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.22172452407614782, |
| "grad_norm": 5.941153049468994, |
| "learning_rate": 4.630645763344531e-05, |
| "loss": 2.2618, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.22284434490481522, |
| "grad_norm": 4.004471778869629, |
| "learning_rate": 4.628779395296753e-05, |
| "loss": 2.6587, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.22396416573348266, |
| "grad_norm": 8.82131576538086, |
| "learning_rate": 4.6269130272489736e-05, |
| "loss": 2.1719, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.22508398656215006, |
| "grad_norm": 2.8698363304138184, |
| "learning_rate": 4.625046659201195e-05, |
| "loss": 2.4046, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.22620380739081747, |
| "grad_norm": 6.006710529327393, |
| "learning_rate": 4.623180291153415e-05, |
| "loss": 2.3737, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.22732362821948487, |
| "grad_norm": 2.5947604179382324, |
| "learning_rate": 4.621313923105637e-05, |
| "loss": 2.4821, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.2284434490481523, |
| "grad_norm": 2.4432547092437744, |
| "learning_rate": 4.6194475550578576e-05, |
| "loss": 2.5101, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.22956326987681971, |
| "grad_norm": 12.777518272399902, |
| "learning_rate": 4.617581187010079e-05, |
| "loss": 2.89, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.23068309070548712, |
| "grad_norm": 8.881490707397461, |
| "learning_rate": 4.615714818962299e-05, |
| "loss": 3.0737, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.23180291153415453, |
| "grad_norm": 11.968159675598145, |
| "learning_rate": 4.613848450914521e-05, |
| "loss": 2.4398, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.23292273236282196, |
| "grad_norm": 2.413706064224243, |
| "learning_rate": 4.6119820828667416e-05, |
| "loss": 2.1109, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 8.401453971862793, |
| "learning_rate": 4.610115714818963e-05, |
| "loss": 2.5007, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.23516237402015677, |
| "grad_norm": 2.3912086486816406, |
| "learning_rate": 4.608249346771183e-05, |
| "loss": 2.4239, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.23628219484882418, |
| "grad_norm": 8.813179016113281, |
| "learning_rate": 4.6063829787234044e-05, |
| "loss": 2.4587, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.2374020156774916, |
| "grad_norm": 10.839656829833984, |
| "learning_rate": 4.6045166106756256e-05, |
| "loss": 2.4691, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.23852183650615902, |
| "grad_norm": 4.4540252685546875, |
| "learning_rate": 4.602650242627846e-05, |
| "loss": 2.4054, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.23964165733482642, |
| "grad_norm": 2.7125473022460938, |
| "learning_rate": 4.600783874580067e-05, |
| "loss": 2.7286, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.24076147816349383, |
| "grad_norm": 2.332322359085083, |
| "learning_rate": 4.5989175065322884e-05, |
| "loss": 2.5039, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.24188129899216126, |
| "grad_norm": 2.539842367172241, |
| "learning_rate": 4.5970511384845096e-05, |
| "loss": 2.2403, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.24300111982082867, |
| "grad_norm": 6.839804649353027, |
| "learning_rate": 4.59518477043673e-05, |
| "loss": 2.0901, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.24412094064949608, |
| "grad_norm": 2.5890653133392334, |
| "learning_rate": 4.593318402388951e-05, |
| "loss": 2.2586, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.24524076147816348, |
| "grad_norm": 2.5026495456695557, |
| "learning_rate": 4.591452034341172e-05, |
| "loss": 2.1888, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.24636058230683092, |
| "grad_norm": 3.8693251609802246, |
| "learning_rate": 4.5895856662933936e-05, |
| "loss": 2.6531, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.24748040313549832, |
| "grad_norm": 8.573837280273438, |
| "learning_rate": 4.587719298245614e-05, |
| "loss": 2.5078, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.24860022396416573, |
| "grad_norm": 3.1866371631622314, |
| "learning_rate": 4.585852930197835e-05, |
| "loss": 2.2594, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.24972004479283313, |
| "grad_norm": 7.868608474731445, |
| "learning_rate": 4.583986562150056e-05, |
| "loss": 2.2759, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.25083986562150057, |
| "grad_norm": 3.183617353439331, |
| "learning_rate": 4.5821201941022776e-05, |
| "loss": 2.5888, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.251959686450168, |
| "grad_norm": 2.5060982704162598, |
| "learning_rate": 4.580253826054498e-05, |
| "loss": 2.1064, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.2530795072788354, |
| "grad_norm": 2.9019861221313477, |
| "learning_rate": 4.578387458006719e-05, |
| "loss": 2.1935, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.2541993281075028, |
| "grad_norm": 13.326761245727539, |
| "learning_rate": 4.57652108995894e-05, |
| "loss": 2.5056, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 7.620180130004883, |
| "learning_rate": 4.5746547219111616e-05, |
| "loss": 2.6445, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.2564389697648376, |
| "grad_norm": 6.347967147827148, |
| "learning_rate": 4.572788353863382e-05, |
| "loss": 2.1387, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.25755879059350506, |
| "grad_norm": 7.242101192474365, |
| "learning_rate": 4.570921985815603e-05, |
| "loss": 2.4346, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.25867861142217247, |
| "grad_norm": 7.027688503265381, |
| "learning_rate": 4.569055617767824e-05, |
| "loss": 2.2526, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.2597984322508399, |
| "grad_norm": 6.494021892547607, |
| "learning_rate": 4.567189249720045e-05, |
| "loss": 1.9165, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.2609182530795073, |
| "grad_norm": 8.93453311920166, |
| "learning_rate": 4.565322881672266e-05, |
| "loss": 2.1397, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.2620380739081747, |
| "grad_norm": 2.471494197845459, |
| "learning_rate": 4.563456513624487e-05, |
| "loss": 2.2892, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 10.424552917480469, |
| "learning_rate": 4.561590145576708e-05, |
| "loss": 2.514, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.2642777155655095, |
| "grad_norm": 7.312840938568115, |
| "learning_rate": 4.559723777528929e-05, |
| "loss": 2.2164, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.26539753639417696, |
| "grad_norm": 11.861546516418457, |
| "learning_rate": 4.55785740948115e-05, |
| "loss": 2.2371, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.26651735722284436, |
| "grad_norm": 9.549253463745117, |
| "learning_rate": 4.5559910414333706e-05, |
| "loss": 2.6811, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.26763717805151177, |
| "grad_norm": 2.9422247409820557, |
| "learning_rate": 4.554124673385592e-05, |
| "loss": 2.2888, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.2687569988801792, |
| "grad_norm": 7.779324054718018, |
| "learning_rate": 4.552258305337813e-05, |
| "loss": 2.3416, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.2698768197088466, |
| "grad_norm": 2.1986162662506104, |
| "learning_rate": 4.550391937290034e-05, |
| "loss": 2.4494, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.270996640537514, |
| "grad_norm": 2.420370578765869, |
| "learning_rate": 4.5485255692422546e-05, |
| "loss": 2.6193, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.2721164613661814, |
| "grad_norm": 2.281414747238159, |
| "learning_rate": 4.546659201194476e-05, |
| "loss": 2.7233, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.2732362821948488, |
| "grad_norm": 2.4500784873962402, |
| "learning_rate": 4.544792833146696e-05, |
| "loss": 2.658, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.27435610302351626, |
| "grad_norm": 8.000895500183105, |
| "learning_rate": 4.542926465098918e-05, |
| "loss": 2.7245, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.27547592385218367, |
| "grad_norm": 2.6147563457489014, |
| "learning_rate": 4.5410600970511386e-05, |
| "loss": 2.4904, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 12.0834321975708, |
| "learning_rate": 4.53919372900336e-05, |
| "loss": 2.6572, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.2777155655095185, |
| "grad_norm": 5.857783317565918, |
| "learning_rate": 4.53732736095558e-05, |
| "loss": 1.98, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.2788353863381859, |
| "grad_norm": 5.242463111877441, |
| "learning_rate": 4.535460992907802e-05, |
| "loss": 2.4122, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.2799552071668533, |
| "grad_norm": 9.532788276672363, |
| "learning_rate": 4.5335946248600226e-05, |
| "loss": 2.6636, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2810750279955207, |
| "grad_norm": 8.554610252380371, |
| "learning_rate": 4.531728256812244e-05, |
| "loss": 2.4047, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.2821948488241881, |
| "grad_norm": 7.8059234619140625, |
| "learning_rate": 4.529861888764464e-05, |
| "loss": 2.4136, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.28331466965285557, |
| "grad_norm": 4.768645286560059, |
| "learning_rate": 4.5279955207166854e-05, |
| "loss": 2.6977, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.284434490481523, |
| "grad_norm": 6.272426128387451, |
| "learning_rate": 4.5261291526689066e-05, |
| "loss": 2.7218, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.2855543113101904, |
| "grad_norm": 2.5695507526397705, |
| "learning_rate": 4.524262784621128e-05, |
| "loss": 2.8431, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.2866741321388578, |
| "grad_norm": 2.400848865509033, |
| "learning_rate": 4.522396416573348e-05, |
| "loss": 2.3621, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.2877939529675252, |
| "grad_norm": 10.295741081237793, |
| "learning_rate": 4.5205300485255694e-05, |
| "loss": 1.9837, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.2889137737961926, |
| "grad_norm": 2.625807762145996, |
| "learning_rate": 4.5186636804777906e-05, |
| "loss": 2.3817, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.29003359462486, |
| "grad_norm": 2.6179468631744385, |
| "learning_rate": 4.516797312430011e-05, |
| "loss": 2.6555, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.2911534154535274, |
| "grad_norm": 2.512031316757202, |
| "learning_rate": 4.514930944382232e-05, |
| "loss": 2.5653, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.29227323628219487, |
| "grad_norm": 2.6077969074249268, |
| "learning_rate": 4.5130645763344534e-05, |
| "loss": 2.222, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.2933930571108623, |
| "grad_norm": 2.072172164916992, |
| "learning_rate": 4.5111982082866746e-05, |
| "loss": 2.1437, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.2945128779395297, |
| "grad_norm": 4.034156799316406, |
| "learning_rate": 4.509331840238895e-05, |
| "loss": 2.4836, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.2956326987681971, |
| "grad_norm": 6.953413963317871, |
| "learning_rate": 4.507465472191116e-05, |
| "loss": 2.1186, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.2967525195968645, |
| "grad_norm": 7.338948726654053, |
| "learning_rate": 4.505599104143337e-05, |
| "loss": 2.1858, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 6.163172245025635, |
| "learning_rate": 4.5037327360955586e-05, |
| "loss": 2.2643, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.2989921612541993, |
| "grad_norm": 7.6946563720703125, |
| "learning_rate": 4.501866368047779e-05, |
| "loss": 2.294, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.30011198208286677, |
| "grad_norm": 12.317503929138184, |
| "learning_rate": 4.5e-05, |
| "loss": 2.5286, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.3012318029115342, |
| "grad_norm": 7.581274509429932, |
| "learning_rate": 4.498133631952221e-05, |
| "loss": 2.146, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.3023516237402016, |
| "grad_norm": 6.5440778732299805, |
| "learning_rate": 4.4962672639044426e-05, |
| "loss": 2.4251, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.303471444568869, |
| "grad_norm": 4.657285213470459, |
| "learning_rate": 4.494400895856663e-05, |
| "loss": 1.8409, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.3045912653975364, |
| "grad_norm": 1.9951245784759521, |
| "learning_rate": 4.492534527808884e-05, |
| "loss": 2.2119, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.3057110862262038, |
| "grad_norm": 6.937575340270996, |
| "learning_rate": 4.490668159761105e-05, |
| "loss": 1.8932, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.3068309070548712, |
| "grad_norm": 12.604211807250977, |
| "learning_rate": 4.488801791713326e-05, |
| "loss": 2.1911, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.3079507278835386, |
| "grad_norm": 2.306835412979126, |
| "learning_rate": 4.486935423665547e-05, |
| "loss": 2.225, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.3090705487122061, |
| "grad_norm": 7.850268840789795, |
| "learning_rate": 4.485069055617768e-05, |
| "loss": 2.2062, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.3101903695408735, |
| "grad_norm": 8.962443351745605, |
| "learning_rate": 4.483202687569989e-05, |
| "loss": 2.0101, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.3113101903695409, |
| "grad_norm": 2.3884646892547607, |
| "learning_rate": 4.48133631952221e-05, |
| "loss": 2.4539, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.3124300111982083, |
| "grad_norm": 4.534022808074951, |
| "learning_rate": 4.479469951474431e-05, |
| "loss": 2.0423, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.3135498320268757, |
| "grad_norm": 2.491356372833252, |
| "learning_rate": 4.477603583426652e-05, |
| "loss": 2.3449, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3146696528555431, |
| "grad_norm": 5.900778293609619, |
| "learning_rate": 4.475737215378873e-05, |
| "loss": 2.3496, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 3.9138317108154297, |
| "learning_rate": 4.473870847331094e-05, |
| "loss": 2.2142, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.3169092945128779, |
| "grad_norm": 9.516107559204102, |
| "learning_rate": 4.472004479283315e-05, |
| "loss": 2.0819, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.3180291153415454, |
| "grad_norm": 2.504873275756836, |
| "learning_rate": 4.4701381112355356e-05, |
| "loss": 2.0613, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 8.265789031982422, |
| "learning_rate": 4.468271743187757e-05, |
| "loss": 2.2209, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.3202687569988802, |
| "grad_norm": 3.2764816284179688, |
| "learning_rate": 4.466405375139977e-05, |
| "loss": 2.7096, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.3213885778275476, |
| "grad_norm": 10.983661651611328, |
| "learning_rate": 4.464539007092199e-05, |
| "loss": 2.638, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.322508398656215, |
| "grad_norm": 2.8227787017822266, |
| "learning_rate": 4.4626726390444196e-05, |
| "loss": 2.6929, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.3236282194848824, |
| "grad_norm": 2.553760528564453, |
| "learning_rate": 4.460806270996641e-05, |
| "loss": 1.8541, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.3247480403135498, |
| "grad_norm": 9.215750694274902, |
| "learning_rate": 4.458939902948861e-05, |
| "loss": 2.3142, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3258678611422172, |
| "grad_norm": 3.168344020843506, |
| "learning_rate": 4.457073534901083e-05, |
| "loss": 2.4242, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.3269876819708847, |
| "grad_norm": 3.0249898433685303, |
| "learning_rate": 4.4552071668533036e-05, |
| "loss": 2.0356, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.3281075027995521, |
| "grad_norm": 7.524886608123779, |
| "learning_rate": 4.453340798805525e-05, |
| "loss": 2.7437, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.3292273236282195, |
| "grad_norm": 8.902599334716797, |
| "learning_rate": 4.451474430757745e-05, |
| "loss": 2.0116, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.3303471444568869, |
| "grad_norm": 3.2997946739196777, |
| "learning_rate": 4.4496080627099664e-05, |
| "loss": 2.359, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.3314669652855543, |
| "grad_norm": 3.123281717300415, |
| "learning_rate": 4.4477416946621876e-05, |
| "loss": 2.2666, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.3325867861142217, |
| "grad_norm": 10.098536491394043, |
| "learning_rate": 4.445875326614409e-05, |
| "loss": 2.501, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.3337066069428891, |
| "grad_norm": 11.130685806274414, |
| "learning_rate": 4.444008958566629e-05, |
| "loss": 2.4014, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.3348264277715566, |
| "grad_norm": 8.4888334274292, |
| "learning_rate": 4.4421425905188505e-05, |
| "loss": 2.3178, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.335946248600224, |
| "grad_norm": 8.757832527160645, |
| "learning_rate": 4.4402762224710716e-05, |
| "loss": 2.1205, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3370660694288914, |
| "grad_norm": 8.70385456085205, |
| "learning_rate": 4.438409854423293e-05, |
| "loss": 2.4269, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.3381858902575588, |
| "grad_norm": 8.281830787658691, |
| "learning_rate": 4.436543486375513e-05, |
| "loss": 2.5491, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.3393057110862262, |
| "grad_norm": 9.058775901794434, |
| "learning_rate": 4.4346771183277345e-05, |
| "loss": 2.3167, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 5.364592552185059, |
| "learning_rate": 4.4328107502799556e-05, |
| "loss": 1.9588, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.341545352743561, |
| "grad_norm": 2.446974277496338, |
| "learning_rate": 4.430944382232177e-05, |
| "loss": 2.3064, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.3426651735722284, |
| "grad_norm": 2.6895692348480225, |
| "learning_rate": 4.429078014184397e-05, |
| "loss": 2.3751, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.3437849944008959, |
| "grad_norm": 7.783231735229492, |
| "learning_rate": 4.427211646136618e-05, |
| "loss": 1.6835, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.3449048152295633, |
| "grad_norm": 3.170950412750244, |
| "learning_rate": 4.4253452780888396e-05, |
| "loss": 2.5539, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.3460246360582307, |
| "grad_norm": 7.711115837097168, |
| "learning_rate": 4.42347891004106e-05, |
| "loss": 2.78, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.3471444568868981, |
| "grad_norm": 8.71380615234375, |
| "learning_rate": 4.421612541993281e-05, |
| "loss": 1.7815, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.3482642777155655, |
| "grad_norm": 2.3626303672790527, |
| "learning_rate": 4.419746173945502e-05, |
| "loss": 2.3035, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.3493840985442329, |
| "grad_norm": 2.5161445140838623, |
| "learning_rate": 4.4178798058977236e-05, |
| "loss": 2.3065, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.3505039193729003, |
| "grad_norm": 2.395263433456421, |
| "learning_rate": 4.416013437849944e-05, |
| "loss": 2.3419, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.3516237402015677, |
| "grad_norm": 6.902035713195801, |
| "learning_rate": 4.414147069802165e-05, |
| "loss": 2.3343, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.3527435610302352, |
| "grad_norm": 5.079914093017578, |
| "learning_rate": 4.412280701754386e-05, |
| "loss": 2.4017, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.3538633818589026, |
| "grad_norm": 3.483292579650879, |
| "learning_rate": 4.410414333706607e-05, |
| "loss": 2.3309, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.35498320268757, |
| "grad_norm": 7.4583940505981445, |
| "learning_rate": 4.408547965658828e-05, |
| "loss": 2.1556, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.3561030235162374, |
| "grad_norm": 16.233184814453125, |
| "learning_rate": 4.406681597611049e-05, |
| "loss": 2.2442, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.3572228443449048, |
| "grad_norm": 9.553163528442383, |
| "learning_rate": 4.40481522956327e-05, |
| "loss": 2.1811, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.3583426651735722, |
| "grad_norm": 5.221775531768799, |
| "learning_rate": 4.402948861515491e-05, |
| "loss": 2.8077, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.3594624860022396, |
| "grad_norm": 2.419001579284668, |
| "learning_rate": 4.401082493467712e-05, |
| "loss": 1.9981, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.36058230683090703, |
| "grad_norm": 2.5910959243774414, |
| "learning_rate": 4.399216125419933e-05, |
| "loss": 1.9731, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 3.1020877361297607, |
| "learning_rate": 4.397349757372154e-05, |
| "loss": 2.0817, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.3628219484882419, |
| "grad_norm": 3.0343470573425293, |
| "learning_rate": 4.395483389324375e-05, |
| "loss": 2.3142, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.3639417693169093, |
| "grad_norm": 12.035741806030273, |
| "learning_rate": 4.393617021276596e-05, |
| "loss": 2.4769, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.3650615901455767, |
| "grad_norm": 3.116953134536743, |
| "learning_rate": 4.391750653228817e-05, |
| "loss": 2.347, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.3661814109742441, |
| "grad_norm": 2.565833330154419, |
| "learning_rate": 4.389884285181038e-05, |
| "loss": 1.9123, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.3673012318029115, |
| "grad_norm": 2.983285427093506, |
| "learning_rate": 4.388017917133259e-05, |
| "loss": 2.4622, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 11.630106925964355, |
| "learning_rate": 4.38615154908548e-05, |
| "loss": 2.4459, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.36954087346024633, |
| "grad_norm": 6.8500285148620605, |
| "learning_rate": 4.3842851810377006e-05, |
| "loss": 2.143, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.3706606942889138, |
| "grad_norm": 2.3746914863586426, |
| "learning_rate": 4.382418812989922e-05, |
| "loss": 2.143, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.3717805151175812, |
| "grad_norm": 2.91323184967041, |
| "learning_rate": 4.380552444942142e-05, |
| "loss": 2.2025, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.3729003359462486, |
| "grad_norm": 2.4903807640075684, |
| "learning_rate": 4.378686076894364e-05, |
| "loss": 2.3756, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.374020156774916, |
| "grad_norm": 4.964207172393799, |
| "learning_rate": 4.3768197088465846e-05, |
| "loss": 1.7364, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.3751399776035834, |
| "grad_norm": 7.413595199584961, |
| "learning_rate": 4.374953340798806e-05, |
| "loss": 2.3942, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.3762597984322508, |
| "grad_norm": 2.7675399780273438, |
| "learning_rate": 4.373086972751026e-05, |
| "loss": 2.5504, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.37737961926091823, |
| "grad_norm": 1.9106221199035645, |
| "learning_rate": 4.3712206047032475e-05, |
| "loss": 2.2394, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.3784994400895857, |
| "grad_norm": 5.255868911743164, |
| "learning_rate": 4.3693542366554686e-05, |
| "loss": 1.7869, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.3796192609182531, |
| "grad_norm": 4.734898567199707, |
| "learning_rate": 4.36748786860769e-05, |
| "loss": 2.1864, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.3807390817469205, |
| "grad_norm": 11.226783752441406, |
| "learning_rate": 4.36562150055991e-05, |
| "loss": 2.4792, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.3818589025755879, |
| "grad_norm": 8.230179786682129, |
| "learning_rate": 4.3637551325121315e-05, |
| "loss": 1.9946, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 2.981816291809082, |
| "learning_rate": 4.3618887644643526e-05, |
| "loss": 1.9636, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.3840985442329227, |
| "grad_norm": 7.890393257141113, |
| "learning_rate": 4.360022396416574e-05, |
| "loss": 2.0774, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.38521836506159013, |
| "grad_norm": 2.7089128494262695, |
| "learning_rate": 4.358156028368794e-05, |
| "loss": 2.1393, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.38633818589025753, |
| "grad_norm": 7.063770771026611, |
| "learning_rate": 4.3562896603210155e-05, |
| "loss": 1.9429, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.387458006718925, |
| "grad_norm": 2.608469247817993, |
| "learning_rate": 4.3544232922732366e-05, |
| "loss": 2.8053, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.3885778275475924, |
| "grad_norm": 2.1650965213775635, |
| "learning_rate": 4.352556924225458e-05, |
| "loss": 2.4611, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.3896976483762598, |
| "grad_norm": 7.017950057983398, |
| "learning_rate": 4.350690556177678e-05, |
| "loss": 2.0722, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.3908174692049272, |
| "grad_norm": 2.769286870956421, |
| "learning_rate": 4.3488241881298995e-05, |
| "loss": 2.4542, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.3919372900335946, |
| "grad_norm": 9.565979957580566, |
| "learning_rate": 4.3469578200821206e-05, |
| "loss": 2.6218, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.393057110862262, |
| "grad_norm": 12.220897674560547, |
| "learning_rate": 4.345091452034342e-05, |
| "loss": 2.248, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.39417693169092943, |
| "grad_norm": 12.827961921691895, |
| "learning_rate": 4.343225083986562e-05, |
| "loss": 1.9856, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.39529675251959684, |
| "grad_norm": 2.4457015991210938, |
| "learning_rate": 4.341358715938783e-05, |
| "loss": 2.6235, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.3964165733482643, |
| "grad_norm": 5.266937255859375, |
| "learning_rate": 4.3394923478910046e-05, |
| "loss": 2.4887, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.3975363941769317, |
| "grad_norm": 8.347966194152832, |
| "learning_rate": 4.337625979843225e-05, |
| "loss": 2.212, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3986562150055991, |
| "grad_norm": 7.743762969970703, |
| "learning_rate": 4.335759611795446e-05, |
| "loss": 2.3995, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.3997760358342665, |
| "grad_norm": 3.587676763534546, |
| "learning_rate": 4.333893243747667e-05, |
| "loss": 2.355, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.4008958566629339, |
| "grad_norm": 3.1175928115844727, |
| "learning_rate": 4.3320268756998886e-05, |
| "loss": 2.5916, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.40201567749160133, |
| "grad_norm": 8.9489107131958, |
| "learning_rate": 4.330160507652109e-05, |
| "loss": 2.6132, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.40313549832026874, |
| "grad_norm": 12.342984199523926, |
| "learning_rate": 4.32829413960433e-05, |
| "loss": 2.3637, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 2.721482276916504, |
| "learning_rate": 4.326427771556551e-05, |
| "loss": 2.3011, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.4053751399776036, |
| "grad_norm": 2.5782060623168945, |
| "learning_rate": 4.324561403508772e-05, |
| "loss": 2.1841, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.406494960806271, |
| "grad_norm": 2.5713908672332764, |
| "learning_rate": 4.322695035460993e-05, |
| "loss": 2.3976, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.4076147816349384, |
| "grad_norm": 7.063972473144531, |
| "learning_rate": 4.320828667413214e-05, |
| "loss": 2.4057, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.4087346024636058, |
| "grad_norm": 8.767318725585938, |
| "learning_rate": 4.318962299365435e-05, |
| "loss": 2.3432, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.40985442329227323, |
| "grad_norm": 9.010395050048828, |
| "learning_rate": 4.317095931317656e-05, |
| "loss": 2.3386, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.41097424412094063, |
| "grad_norm": 5.226011276245117, |
| "learning_rate": 4.315229563269877e-05, |
| "loss": 1.9751, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.41209406494960804, |
| "grad_norm": 2.9475603103637695, |
| "learning_rate": 4.313363195222098e-05, |
| "loss": 2.0421, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.4132138857782755, |
| "grad_norm": 2.4759316444396973, |
| "learning_rate": 4.311496827174319e-05, |
| "loss": 2.0569, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.4143337066069429, |
| "grad_norm": 2.6276895999908447, |
| "learning_rate": 4.30963045912654e-05, |
| "loss": 1.8686, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4154535274356103, |
| "grad_norm": 5.415910243988037, |
| "learning_rate": 4.307764091078761e-05, |
| "loss": 2.2398, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.4165733482642777, |
| "grad_norm": 9.693281173706055, |
| "learning_rate": 4.305897723030982e-05, |
| "loss": 2.4323, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.4176931690929451, |
| "grad_norm": 6.599532127380371, |
| "learning_rate": 4.304031354983203e-05, |
| "loss": 2.0143, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.41881298992161253, |
| "grad_norm": 4.097227096557617, |
| "learning_rate": 4.302164986935424e-05, |
| "loss": 2.3824, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.41993281075027994, |
| "grad_norm": 3.5678653717041016, |
| "learning_rate": 4.300298618887645e-05, |
| "loss": 2.0583, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 13.594582557678223, |
| "learning_rate": 4.2984322508398656e-05, |
| "loss": 2.3702, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.4221724524076148, |
| "grad_norm": 10.508759498596191, |
| "learning_rate": 4.296565882792087e-05, |
| "loss": 1.9474, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.4232922732362822, |
| "grad_norm": 2.452303647994995, |
| "learning_rate": 4.294699514744307e-05, |
| "loss": 2.3192, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.4244120940649496, |
| "grad_norm": 7.144927978515625, |
| "learning_rate": 4.292833146696529e-05, |
| "loss": 2.1651, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 8.945828437805176, |
| "learning_rate": 4.2909667786487496e-05, |
| "loss": 2.3486, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.42665173572228443, |
| "grad_norm": 2.609912633895874, |
| "learning_rate": 4.289100410600971e-05, |
| "loss": 1.8995, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.42777155655095184, |
| "grad_norm": 7.373888969421387, |
| "learning_rate": 4.287234042553191e-05, |
| "loss": 2.0976, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.42889137737961924, |
| "grad_norm": 2.694624662399292, |
| "learning_rate": 4.2853676745054125e-05, |
| "loss": 1.9521, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.43001119820828665, |
| "grad_norm": 2.8247783184051514, |
| "learning_rate": 4.2835013064576336e-05, |
| "loss": 2.7777, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.4311310190369541, |
| "grad_norm": 9.070876121520996, |
| "learning_rate": 4.281634938409855e-05, |
| "loss": 2.4438, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.4322508398656215, |
| "grad_norm": 5.014525890350342, |
| "learning_rate": 4.279768570362075e-05, |
| "loss": 2.1444, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.4333706606942889, |
| "grad_norm": 3.661271333694458, |
| "learning_rate": 4.2779022023142965e-05, |
| "loss": 2.052, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.43449048152295633, |
| "grad_norm": 6.962841033935547, |
| "learning_rate": 4.2760358342665176e-05, |
| "loss": 2.1674, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.43561030235162373, |
| "grad_norm": 2.9479000568389893, |
| "learning_rate": 4.274169466218739e-05, |
| "loss": 2.0745, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.43673012318029114, |
| "grad_norm": 2.4860355854034424, |
| "learning_rate": 4.272303098170959e-05, |
| "loss": 2.2103, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.43784994400895855, |
| "grad_norm": 3.2063636779785156, |
| "learning_rate": 4.2704367301231805e-05, |
| "loss": 2.6216, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.43896976483762595, |
| "grad_norm": 8.925811767578125, |
| "learning_rate": 4.2685703620754016e-05, |
| "loss": 1.8027, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.4400895856662934, |
| "grad_norm": 2.438516616821289, |
| "learning_rate": 4.266703994027623e-05, |
| "loss": 2.2463, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.4412094064949608, |
| "grad_norm": 3.3323545455932617, |
| "learning_rate": 4.264837625979843e-05, |
| "loss": 2.0737, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.4423292273236282, |
| "grad_norm": 8.51876163482666, |
| "learning_rate": 4.2629712579320645e-05, |
| "loss": 2.366, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.44344904815229563, |
| "grad_norm": 15.145380973815918, |
| "learning_rate": 4.2611048898842856e-05, |
| "loss": 2.3532, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.44456886898096304, |
| "grad_norm": 7.819403171539307, |
| "learning_rate": 4.259238521836507e-05, |
| "loss": 2.0457, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.44568868980963045, |
| "grad_norm": 2.681534767150879, |
| "learning_rate": 4.257372153788727e-05, |
| "loss": 2.159, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 2.585684061050415, |
| "learning_rate": 4.255505785740948e-05, |
| "loss": 2.5404, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.4479283314669653, |
| "grad_norm": 6.74754524230957, |
| "learning_rate": 4.2536394176931696e-05, |
| "loss": 2.3685, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.4490481522956327, |
| "grad_norm": 2.9003231525421143, |
| "learning_rate": 4.25177304964539e-05, |
| "loss": 2.1618, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.4501679731243001, |
| "grad_norm": 13.633071899414062, |
| "learning_rate": 4.249906681597611e-05, |
| "loss": 2.375, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.45128779395296753, |
| "grad_norm": 9.91258430480957, |
| "learning_rate": 4.248040313549832e-05, |
| "loss": 2.0909, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.45240761478163494, |
| "grad_norm": 4.096499443054199, |
| "learning_rate": 4.246173945502053e-05, |
| "loss": 2.1711, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.45352743561030234, |
| "grad_norm": 10.29516887664795, |
| "learning_rate": 4.244307577454274e-05, |
| "loss": 2.4019, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.45464725643896975, |
| "grad_norm": 9.679535865783691, |
| "learning_rate": 4.242441209406495e-05, |
| "loss": 2.1732, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.45576707726763716, |
| "grad_norm": 2.7053027153015137, |
| "learning_rate": 4.240574841358716e-05, |
| "loss": 2.0555, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.4568868980963046, |
| "grad_norm": 7.90255069732666, |
| "learning_rate": 4.238708473310937e-05, |
| "loss": 2.3904, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.458006718924972, |
| "grad_norm": 3.999415397644043, |
| "learning_rate": 4.236842105263158e-05, |
| "loss": 2.2267, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.45912653975363943, |
| "grad_norm": 4.677366256713867, |
| "learning_rate": 4.234975737215379e-05, |
| "loss": 1.7037, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.46024636058230683, |
| "grad_norm": 10.746310234069824, |
| "learning_rate": 4.2331093691676e-05, |
| "loss": 2.3778, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.46136618141097424, |
| "grad_norm": 2.9237968921661377, |
| "learning_rate": 4.231243001119821e-05, |
| "loss": 2.2335, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.46248600223964165, |
| "grad_norm": 3.3537890911102295, |
| "learning_rate": 4.229376633072042e-05, |
| "loss": 2.0904, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.46360582306830905, |
| "grad_norm": 4.9723358154296875, |
| "learning_rate": 4.227510265024263e-05, |
| "loss": 2.0794, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.46472564389697646, |
| "grad_norm": 3.5420267581939697, |
| "learning_rate": 4.225643896976484e-05, |
| "loss": 1.95, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.4658454647256439, |
| "grad_norm": 5.858832359313965, |
| "learning_rate": 4.223777528928705e-05, |
| "loss": 2.3874, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.4669652855543113, |
| "grad_norm": 3.2437384128570557, |
| "learning_rate": 4.221911160880926e-05, |
| "loss": 1.9892, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 8.610901832580566, |
| "learning_rate": 4.220044792833147e-05, |
| "loss": 2.8553, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.46920492721164614, |
| "grad_norm": 8.59118938446045, |
| "learning_rate": 4.218178424785368e-05, |
| "loss": 2.4644, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.47032474804031354, |
| "grad_norm": 6.852227210998535, |
| "learning_rate": 4.216312056737589e-05, |
| "loss": 2.1337, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.47144456886898095, |
| "grad_norm": 6.020224571228027, |
| "learning_rate": 4.21444568868981e-05, |
| "loss": 1.9491, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.47256438969764836, |
| "grad_norm": 4.580352783203125, |
| "learning_rate": 4.2125793206420306e-05, |
| "loss": 2.6378, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 10.987154006958008, |
| "learning_rate": 4.210712952594252e-05, |
| "loss": 2.3521, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.4748040313549832, |
| "grad_norm": 2.8252968788146973, |
| "learning_rate": 4.208846584546472e-05, |
| "loss": 2.3216, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.47592385218365063, |
| "grad_norm": 5.822597503662109, |
| "learning_rate": 4.2069802164986935e-05, |
| "loss": 1.9804, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.47704367301231804, |
| "grad_norm": 3.373899221420288, |
| "learning_rate": 4.2051138484509146e-05, |
| "loss": 1.9312, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.47816349384098544, |
| "grad_norm": 8.621574401855469, |
| "learning_rate": 4.203247480403136e-05, |
| "loss": 2.1056, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.47928331466965285, |
| "grad_norm": 3.1795461177825928, |
| "learning_rate": 4.201381112355356e-05, |
| "loss": 1.9162, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.48040313549832026, |
| "grad_norm": 3.5701396465301514, |
| "learning_rate": 4.1995147443075775e-05, |
| "loss": 2.3904, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.48152295632698766, |
| "grad_norm": 2.4584646224975586, |
| "learning_rate": 4.1976483762597986e-05, |
| "loss": 1.9739, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.4826427771556551, |
| "grad_norm": 9.168150901794434, |
| "learning_rate": 4.19578200821202e-05, |
| "loss": 2.242, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.48376259798432253, |
| "grad_norm": 6.235483169555664, |
| "learning_rate": 4.19391564016424e-05, |
| "loss": 2.0577, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.48488241881298993, |
| "grad_norm": 4.428210735321045, |
| "learning_rate": 4.1920492721164615e-05, |
| "loss": 2.3219, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.48600223964165734, |
| "grad_norm": 10.322796821594238, |
| "learning_rate": 4.1901829040686826e-05, |
| "loss": 2.4196, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.48712206047032475, |
| "grad_norm": 11.971220016479492, |
| "learning_rate": 4.188316536020904e-05, |
| "loss": 2.221, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.48824188129899215, |
| "grad_norm": 2.4789071083068848, |
| "learning_rate": 4.186450167973124e-05, |
| "loss": 1.9658, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 3.5437817573547363, |
| "learning_rate": 4.1845837999253455e-05, |
| "loss": 2.3086, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.49048152295632697, |
| "grad_norm": 2.637206554412842, |
| "learning_rate": 4.1827174318775666e-05, |
| "loss": 2.0656, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.4916013437849944, |
| "grad_norm": 2.3072986602783203, |
| "learning_rate": 4.180851063829788e-05, |
| "loss": 1.8875, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.49272116461366183, |
| "grad_norm": 11.45031452178955, |
| "learning_rate": 4.178984695782008e-05, |
| "loss": 1.8249, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.49384098544232924, |
| "grad_norm": 8.976868629455566, |
| "learning_rate": 4.1771183277342295e-05, |
| "loss": 2.4993, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.49496080627099664, |
| "grad_norm": 2.619194507598877, |
| "learning_rate": 4.1752519596864506e-05, |
| "loss": 2.221, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.49608062709966405, |
| "grad_norm": 10.117256164550781, |
| "learning_rate": 4.173385591638672e-05, |
| "loss": 1.8587, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.49720044792833146, |
| "grad_norm": 2.874436140060425, |
| "learning_rate": 4.171519223590892e-05, |
| "loss": 1.5831, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.49832026875699886, |
| "grad_norm": 10.60855484008789, |
| "learning_rate": 4.1696528555431135e-05, |
| "loss": 2.483, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.49944008958566627, |
| "grad_norm": 5.419251441955566, |
| "learning_rate": 4.167786487495334e-05, |
| "loss": 1.905, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.5005599104143337, |
| "grad_norm": 4.474939823150635, |
| "learning_rate": 4.165920119447555e-05, |
| "loss": 2.1842, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.5016797312430011, |
| "grad_norm": 2.611745595932007, |
| "learning_rate": 4.164053751399776e-05, |
| "loss": 2.2377, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.5027995520716685, |
| "grad_norm": 3.1102752685546875, |
| "learning_rate": 4.162187383351997e-05, |
| "loss": 1.7457, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.503919372900336, |
| "grad_norm": 3.362260341644287, |
| "learning_rate": 4.160321015304218e-05, |
| "loss": 2.2162, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5050391937290034, |
| "grad_norm": 5.862063407897949, |
| "learning_rate": 4.158454647256439e-05, |
| "loss": 1.9277, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.5061590145576708, |
| "grad_norm": 2.268481969833374, |
| "learning_rate": 4.15658827920866e-05, |
| "loss": 2.0642, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.5072788353863382, |
| "grad_norm": 3.2130661010742188, |
| "learning_rate": 4.154721911160881e-05, |
| "loss": 2.2511, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.5083986562150056, |
| "grad_norm": 9.958855628967285, |
| "learning_rate": 4.152855543113102e-05, |
| "loss": 2.8674, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.509518477043673, |
| "grad_norm": 3.7821731567382812, |
| "learning_rate": 4.150989175065323e-05, |
| "loss": 2.2098, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 3.210670232772827, |
| "learning_rate": 4.149122807017544e-05, |
| "loss": 2.1041, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.5117581187010078, |
| "grad_norm": 12.174056053161621, |
| "learning_rate": 4.147256438969765e-05, |
| "loss": 2.1001, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.5128779395296752, |
| "grad_norm": 10.776714324951172, |
| "learning_rate": 4.145390070921986e-05, |
| "loss": 2.2956, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.5139977603583427, |
| "grad_norm": 7.005626201629639, |
| "learning_rate": 4.143523702874207e-05, |
| "loss": 2.2044, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.5151175811870101, |
| "grad_norm": 11.280997276306152, |
| "learning_rate": 4.141657334826428e-05, |
| "loss": 2.3835, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5162374020156775, |
| "grad_norm": 8.00539779663086, |
| "learning_rate": 4.139790966778649e-05, |
| "loss": 2.15, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.5173572228443449, |
| "grad_norm": 11.379185676574707, |
| "learning_rate": 4.13792459873087e-05, |
| "loss": 2.2607, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.5184770436730123, |
| "grad_norm": 3.4828784465789795, |
| "learning_rate": 4.136058230683091e-05, |
| "loss": 2.6439, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.5195968645016797, |
| "grad_norm": 8.438654899597168, |
| "learning_rate": 4.134191862635312e-05, |
| "loss": 1.7996, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.5207166853303471, |
| "grad_norm": 3.8132407665252686, |
| "learning_rate": 4.132325494587533e-05, |
| "loss": 2.1626, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.5218365061590146, |
| "grad_norm": 11.904292106628418, |
| "learning_rate": 4.130459126539754e-05, |
| "loss": 2.5126, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.522956326987682, |
| "grad_norm": 8.020877838134766, |
| "learning_rate": 4.1285927584919745e-05, |
| "loss": 2.3528, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.5240761478163494, |
| "grad_norm": 2.708252191543579, |
| "learning_rate": 4.126726390444196e-05, |
| "loss": 2.0487, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.5251959686450168, |
| "grad_norm": 3.0927486419677734, |
| "learning_rate": 4.124860022396417e-05, |
| "loss": 1.6742, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 12.992857933044434, |
| "learning_rate": 4.122993654348637e-05, |
| "loss": 1.9681, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.5274356103023516, |
| "grad_norm": 6.5138325691223145, |
| "learning_rate": 4.1211272863008585e-05, |
| "loss": 1.923, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.528555431131019, |
| "grad_norm": 3.025493621826172, |
| "learning_rate": 4.1192609182530796e-05, |
| "loss": 1.6204, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.5296752519596865, |
| "grad_norm": 3.9649546146392822, |
| "learning_rate": 4.117394550205301e-05, |
| "loss": 2.3652, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.5307950727883539, |
| "grad_norm": 3.212306499481201, |
| "learning_rate": 4.115528182157521e-05, |
| "loss": 2.0342, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 9.27729320526123, |
| "learning_rate": 4.1136618141097425e-05, |
| "loss": 1.9462, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.5330347144456887, |
| "grad_norm": 8.476268768310547, |
| "learning_rate": 4.1117954460619636e-05, |
| "loss": 2.3341, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.5341545352743561, |
| "grad_norm": 3.2790377140045166, |
| "learning_rate": 4.109929078014185e-05, |
| "loss": 2.1268, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.5352743561030235, |
| "grad_norm": 2.6565237045288086, |
| "learning_rate": 4.108062709966405e-05, |
| "loss": 1.9036, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.5363941769316909, |
| "grad_norm": 6.67348051071167, |
| "learning_rate": 4.1061963419186265e-05, |
| "loss": 1.9455, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.5375139977603584, |
| "grad_norm": 8.719578742980957, |
| "learning_rate": 4.1043299738708476e-05, |
| "loss": 2.1615, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.5386338185890257, |
| "grad_norm": 5.975245475769043, |
| "learning_rate": 4.102463605823069e-05, |
| "loss": 2.1647, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.5397536394176932, |
| "grad_norm": 2.856062412261963, |
| "learning_rate": 4.100597237775289e-05, |
| "loss": 2.0456, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.5408734602463606, |
| "grad_norm": 2.8386130332946777, |
| "learning_rate": 4.0987308697275105e-05, |
| "loss": 2.1015, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.541993281075028, |
| "grad_norm": 3.556990623474121, |
| "learning_rate": 4.0968645016797316e-05, |
| "loss": 2.2417, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.5431131019036954, |
| "grad_norm": 7.924489498138428, |
| "learning_rate": 4.094998133631953e-05, |
| "loss": 1.9925, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.5442329227323628, |
| "grad_norm": 11.762128829956055, |
| "learning_rate": 4.093131765584173e-05, |
| "loss": 2.5949, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.5453527435610303, |
| "grad_norm": 7.913935661315918, |
| "learning_rate": 4.0912653975363945e-05, |
| "loss": 2.0106, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.5464725643896976, |
| "grad_norm": 8.176780700683594, |
| "learning_rate": 4.0893990294886156e-05, |
| "loss": 2.1813, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.5475923852183651, |
| "grad_norm": 2.9236576557159424, |
| "learning_rate": 4.087532661440837e-05, |
| "loss": 2.1256, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.5487122060470325, |
| "grad_norm": 12.421939849853516, |
| "learning_rate": 4.085666293393057e-05, |
| "loss": 2.2168, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.5498320268756999, |
| "grad_norm": 2.374150514602661, |
| "learning_rate": 4.0837999253452785e-05, |
| "loss": 2.1841, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.5509518477043673, |
| "grad_norm": 5.778265953063965, |
| "learning_rate": 4.081933557297499e-05, |
| "loss": 1.866, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.5520716685330347, |
| "grad_norm": 2.9031143188476562, |
| "learning_rate": 4.08006718924972e-05, |
| "loss": 2.1778, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 2.8752217292785645, |
| "learning_rate": 4.078200821201941e-05, |
| "loss": 2.1301, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.5543113101903695, |
| "grad_norm": 6.815023899078369, |
| "learning_rate": 4.076334453154162e-05, |
| "loss": 2.1037, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.555431131019037, |
| "grad_norm": 3.5605039596557617, |
| "learning_rate": 4.074468085106383e-05, |
| "loss": 2.181, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.5565509518477044, |
| "grad_norm": 3.3536183834075928, |
| "learning_rate": 4.072601717058604e-05, |
| "loss": 2.1846, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.5576707726763718, |
| "grad_norm": 14.845537185668945, |
| "learning_rate": 4.070735349010825e-05, |
| "loss": 2.332, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.5587905935050392, |
| "grad_norm": 6.083976745605469, |
| "learning_rate": 4.068868980963046e-05, |
| "loss": 2.1927, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.5599104143337066, |
| "grad_norm": 3.194537401199341, |
| "learning_rate": 4.067002612915267e-05, |
| "loss": 2.3715, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.561030235162374, |
| "grad_norm": 2.682272434234619, |
| "learning_rate": 4.065136244867488e-05, |
| "loss": 1.95, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.5621500559910414, |
| "grad_norm": 3.429429054260254, |
| "learning_rate": 4.063269876819709e-05, |
| "loss": 2.2317, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.5632698768197089, |
| "grad_norm": 6.22359037399292, |
| "learning_rate": 4.06140350877193e-05, |
| "loss": 2.2734, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.5643896976483762, |
| "grad_norm": 12.685219764709473, |
| "learning_rate": 4.059537140724151e-05, |
| "loss": 2.3362, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.5655095184770437, |
| "grad_norm": 3.122385025024414, |
| "learning_rate": 4.057670772676372e-05, |
| "loss": 2.2147, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.5666293393057111, |
| "grad_norm": 3.515317678451538, |
| "learning_rate": 4.055804404628593e-05, |
| "loss": 2.3661, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.5677491601343785, |
| "grad_norm": 5.837533473968506, |
| "learning_rate": 4.053938036580814e-05, |
| "loss": 2.057, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.568868980963046, |
| "grad_norm": 2.728402614593506, |
| "learning_rate": 4.052071668533035e-05, |
| "loss": 1.9779, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.5699888017917133, |
| "grad_norm": 5.042017459869385, |
| "learning_rate": 4.050205300485256e-05, |
| "loss": 1.8092, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.5711086226203808, |
| "grad_norm": 9.7918701171875, |
| "learning_rate": 4.048338932437477e-05, |
| "loss": 2.4933, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.5722284434490481, |
| "grad_norm": 3.005107879638672, |
| "learning_rate": 4.046472564389698e-05, |
| "loss": 2.2101, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.5733482642777156, |
| "grad_norm": 4.867323875427246, |
| "learning_rate": 4.044606196341919e-05, |
| "loss": 2.0726, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 4.434531211853027, |
| "learning_rate": 4.0427398282941395e-05, |
| "loss": 2.5219, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.5755879059350504, |
| "grad_norm": 9.07414722442627, |
| "learning_rate": 4.040873460246361e-05, |
| "loss": 2.2031, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.5767077267637178, |
| "grad_norm": 2.6722495555877686, |
| "learning_rate": 4.039007092198582e-05, |
| "loss": 2.2475, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.5778275475923852, |
| "grad_norm": 6.318906784057617, |
| "learning_rate": 4.037140724150802e-05, |
| "loss": 2.1646, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 2.754269599914551, |
| "learning_rate": 4.0352743561030235e-05, |
| "loss": 2.6936, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.58006718924972, |
| "grad_norm": 8.491806983947754, |
| "learning_rate": 4.0334079880552446e-05, |
| "loss": 2.308, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.5811870100783875, |
| "grad_norm": 4.256706714630127, |
| "learning_rate": 4.031541620007466e-05, |
| "loss": 1.9769, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.5823068309070548, |
| "grad_norm": 8.133840560913086, |
| "learning_rate": 4.029675251959686e-05, |
| "loss": 1.8873, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.5834266517357223, |
| "grad_norm": 3.6523616313934326, |
| "learning_rate": 4.0278088839119075e-05, |
| "loss": 1.9991, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.5845464725643897, |
| "grad_norm": 2.976468324661255, |
| "learning_rate": 4.0259425158641286e-05, |
| "loss": 2.6573, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.5856662933930571, |
| "grad_norm": 8.252400398254395, |
| "learning_rate": 4.02407614781635e-05, |
| "loss": 2.221, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.5867861142217246, |
| "grad_norm": 3.0009639263153076, |
| "learning_rate": 4.02220977976857e-05, |
| "loss": 2.2523, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.5879059350503919, |
| "grad_norm": 2.764678955078125, |
| "learning_rate": 4.0203434117207915e-05, |
| "loss": 2.0534, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.5890257558790594, |
| "grad_norm": 2.818638563156128, |
| "learning_rate": 4.0184770436730126e-05, |
| "loss": 1.9171, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.5901455767077267, |
| "grad_norm": 3.9487977027893066, |
| "learning_rate": 4.016610675625234e-05, |
| "loss": 2.1506, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.5912653975363942, |
| "grad_norm": 4.936847686767578, |
| "learning_rate": 4.014744307577454e-05, |
| "loss": 2.1909, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.5923852183650616, |
| "grad_norm": 14.60064697265625, |
| "learning_rate": 4.0128779395296755e-05, |
| "loss": 2.0533, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.593505039193729, |
| "grad_norm": 9.342129707336426, |
| "learning_rate": 4.0110115714818966e-05, |
| "loss": 2.0769, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.5946248600223965, |
| "grad_norm": 16.89434242248535, |
| "learning_rate": 4.009145203434118e-05, |
| "loss": 2.2267, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 6.977470397949219, |
| "learning_rate": 4.007278835386338e-05, |
| "loss": 2.954, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.5968645016797313, |
| "grad_norm": 2.928067922592163, |
| "learning_rate": 4.0054124673385595e-05, |
| "loss": 1.9651, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.5979843225083986, |
| "grad_norm": 2.7120723724365234, |
| "learning_rate": 4.00354609929078e-05, |
| "loss": 2.5949, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.5991041433370661, |
| "grad_norm": 2.3959896564483643, |
| "learning_rate": 4.001679731243002e-05, |
| "loss": 2.5269, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.6002239641657335, |
| "grad_norm": 4.766486644744873, |
| "learning_rate": 3.999813363195222e-05, |
| "loss": 2.1063, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.6013437849944009, |
| "grad_norm": 2.9101717472076416, |
| "learning_rate": 3.9979469951474435e-05, |
| "loss": 2.4363, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.6024636058230683, |
| "grad_norm": 2.3980298042297363, |
| "learning_rate": 3.996080627099664e-05, |
| "loss": 2.166, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.6035834266517357, |
| "grad_norm": 3.109349012374878, |
| "learning_rate": 3.994214259051885e-05, |
| "loss": 2.3349, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.6047032474804032, |
| "grad_norm": 3.364403486251831, |
| "learning_rate": 3.992347891004106e-05, |
| "loss": 2.036, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.6058230683090705, |
| "grad_norm": 6.84296989440918, |
| "learning_rate": 3.990481522956327e-05, |
| "loss": 2.1545, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.606942889137738, |
| "grad_norm": 3.494910717010498, |
| "learning_rate": 3.988615154908548e-05, |
| "loss": 2.0747, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.6080627099664053, |
| "grad_norm": 11.233692169189453, |
| "learning_rate": 3.986748786860769e-05, |
| "loss": 2.1933, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.6091825307950728, |
| "grad_norm": 2.6794285774230957, |
| "learning_rate": 3.98488241881299e-05, |
| "loss": 1.9889, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.6103023516237402, |
| "grad_norm": 6.740621089935303, |
| "learning_rate": 3.983016050765211e-05, |
| "loss": 2.4853, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.6114221724524076, |
| "grad_norm": 3.250119686126709, |
| "learning_rate": 3.981149682717432e-05, |
| "loss": 2.2843, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.6125419932810751, |
| "grad_norm": 5.2820940017700195, |
| "learning_rate": 3.979283314669653e-05, |
| "loss": 2.0738, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.6136618141097424, |
| "grad_norm": 5.155092716217041, |
| "learning_rate": 3.977416946621874e-05, |
| "loss": 2.0983, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.6147816349384099, |
| "grad_norm": 10.836530685424805, |
| "learning_rate": 3.975550578574095e-05, |
| "loss": 1.8994, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.6159014557670772, |
| "grad_norm": 4.8996968269348145, |
| "learning_rate": 3.973684210526316e-05, |
| "loss": 2.0153, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 12.028742790222168, |
| "learning_rate": 3.971817842478537e-05, |
| "loss": 2.4317, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.6181410974244121, |
| "grad_norm": 10.310006141662598, |
| "learning_rate": 3.969951474430758e-05, |
| "loss": 2.5903, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.6192609182530795, |
| "grad_norm": 3.7316179275512695, |
| "learning_rate": 3.968085106382979e-05, |
| "loss": 2.148, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.620380739081747, |
| "grad_norm": 4.745426177978516, |
| "learning_rate": 3.9662187383352e-05, |
| "loss": 2.3905, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.6215005599104143, |
| "grad_norm": 12.419564247131348, |
| "learning_rate": 3.9643523702874205e-05, |
| "loss": 2.6076, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.6226203807390818, |
| "grad_norm": 3.6548733711242676, |
| "learning_rate": 3.962486002239642e-05, |
| "loss": 2.2913, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.6237402015677491, |
| "grad_norm": 2.8344454765319824, |
| "learning_rate": 3.960619634191863e-05, |
| "loss": 2.3774, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.6248600223964166, |
| "grad_norm": 4.7610321044921875, |
| "learning_rate": 3.958753266144084e-05, |
| "loss": 2.2121, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.6259798432250839, |
| "grad_norm": 3.1101725101470947, |
| "learning_rate": 3.9568868980963045e-05, |
| "loss": 2.3034, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.6270996640537514, |
| "grad_norm": 2.6766905784606934, |
| "learning_rate": 3.955020530048526e-05, |
| "loss": 1.7205, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.6282194848824189, |
| "grad_norm": 3.271083116531372, |
| "learning_rate": 3.953154162000747e-05, |
| "loss": 2.4918, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.6293393057110862, |
| "grad_norm": 7.914976119995117, |
| "learning_rate": 3.951287793952967e-05, |
| "loss": 2.1463, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.6304591265397537, |
| "grad_norm": 3.1537246704101562, |
| "learning_rate": 3.9494214259051885e-05, |
| "loss": 2.0338, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 10.18811321258545, |
| "learning_rate": 3.9475550578574096e-05, |
| "loss": 2.1563, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.6326987681970885, |
| "grad_norm": 11.715261459350586, |
| "learning_rate": 3.945688689809631e-05, |
| "loss": 2.3083, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.6338185890257558, |
| "grad_norm": 2.7163820266723633, |
| "learning_rate": 3.943822321761851e-05, |
| "loss": 2.3445, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.6349384098544233, |
| "grad_norm": 6.3636627197265625, |
| "learning_rate": 3.9419559537140725e-05, |
| "loss": 2.2905, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.6360582306830908, |
| "grad_norm": 3.9701311588287354, |
| "learning_rate": 3.9400895856662936e-05, |
| "loss": 1.8656, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.6371780515117581, |
| "grad_norm": 5.779101848602295, |
| "learning_rate": 3.938223217618515e-05, |
| "loss": 2.1488, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 3.038818359375, |
| "learning_rate": 3.936356849570735e-05, |
| "loss": 2.2644, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.6394176931690929, |
| "grad_norm": 2.684335470199585, |
| "learning_rate": 3.9344904815229565e-05, |
| "loss": 2.1819, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.6405375139977604, |
| "grad_norm": 11.84406566619873, |
| "learning_rate": 3.9326241134751776e-05, |
| "loss": 2.2329, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.6416573348264277, |
| "grad_norm": 2.978997230529785, |
| "learning_rate": 3.930757745427399e-05, |
| "loss": 2.2307, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.6427771556550952, |
| "grad_norm": 3.1249163150787354, |
| "learning_rate": 3.928891377379619e-05, |
| "loss": 1.8239, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.6438969764837627, |
| "grad_norm": 8.90749454498291, |
| "learning_rate": 3.9270250093318405e-05, |
| "loss": 2.3477, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.64501679731243, |
| "grad_norm": 8.59403133392334, |
| "learning_rate": 3.925158641284061e-05, |
| "loss": 1.6853, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.6461366181410975, |
| "grad_norm": 3.567573070526123, |
| "learning_rate": 3.923292273236283e-05, |
| "loss": 1.7603, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.6472564389697648, |
| "grad_norm": 2.913238286972046, |
| "learning_rate": 3.921425905188503e-05, |
| "loss": 1.9284, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.6483762597984323, |
| "grad_norm": 5.024287700653076, |
| "learning_rate": 3.9195595371407245e-05, |
| "loss": 1.9221, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.6494960806270996, |
| "grad_norm": 13.80717945098877, |
| "learning_rate": 3.917693169092945e-05, |
| "loss": 2.1171, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.6506159014557671, |
| "grad_norm": 2.929304599761963, |
| "learning_rate": 3.915826801045167e-05, |
| "loss": 1.929, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.6517357222844344, |
| "grad_norm": 2.820366859436035, |
| "learning_rate": 3.913960432997387e-05, |
| "loss": 2.1108, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.6528555431131019, |
| "grad_norm": 3.1681950092315674, |
| "learning_rate": 3.9120940649496085e-05, |
| "loss": 2.4005, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.6539753639417694, |
| "grad_norm": 10.097253799438477, |
| "learning_rate": 3.910227696901829e-05, |
| "loss": 1.8262, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.6550951847704367, |
| "grad_norm": 3.0544557571411133, |
| "learning_rate": 3.90836132885405e-05, |
| "loss": 2.2202, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.6562150055991042, |
| "grad_norm": 7.928321838378906, |
| "learning_rate": 3.906494960806271e-05, |
| "loss": 1.8896, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.6573348264277715, |
| "grad_norm": 12.526985168457031, |
| "learning_rate": 3.904628592758492e-05, |
| "loss": 2.263, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.658454647256439, |
| "grad_norm": 12.4088716506958, |
| "learning_rate": 3.902762224710713e-05, |
| "loss": 2.3226, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 8.499159812927246, |
| "learning_rate": 3.900895856662934e-05, |
| "loss": 1.9171, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.6606942889137738, |
| "grad_norm": 6.148478031158447, |
| "learning_rate": 3.899029488615155e-05, |
| "loss": 1.7816, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.6618141097424413, |
| "grad_norm": 2.6093831062316895, |
| "learning_rate": 3.897163120567376e-05, |
| "loss": 1.874, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.6629339305711086, |
| "grad_norm": 2.9577527046203613, |
| "learning_rate": 3.895296752519597e-05, |
| "loss": 2.4124, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.6640537513997761, |
| "grad_norm": 2.769073724746704, |
| "learning_rate": 3.893430384471818e-05, |
| "loss": 2.3388, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.6651735722284434, |
| "grad_norm": 3.398643970489502, |
| "learning_rate": 3.891564016424039e-05, |
| "loss": 2.2894, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.6662933930571109, |
| "grad_norm": 3.1375699043273926, |
| "learning_rate": 3.88969764837626e-05, |
| "loss": 2.4933, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.6674132138857782, |
| "grad_norm": 8.086012840270996, |
| "learning_rate": 3.887831280328481e-05, |
| "loss": 2.1847, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.6685330347144457, |
| "grad_norm": 7.541558742523193, |
| "learning_rate": 3.8859649122807015e-05, |
| "loss": 2.0018, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.6696528555431132, |
| "grad_norm": 8.947002410888672, |
| "learning_rate": 3.884098544232923e-05, |
| "loss": 2.3184, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.6707726763717805, |
| "grad_norm": 9.351658821105957, |
| "learning_rate": 3.882232176185144e-05, |
| "loss": 2.3077, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.671892497200448, |
| "grad_norm": 6.452417850494385, |
| "learning_rate": 3.880365808137365e-05, |
| "loss": 1.9899, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.6730123180291153, |
| "grad_norm": 7.518797874450684, |
| "learning_rate": 3.8784994400895855e-05, |
| "loss": 2.3504, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.6741321388577828, |
| "grad_norm": 11.191749572753906, |
| "learning_rate": 3.876633072041807e-05, |
| "loss": 2.2785, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.6752519596864501, |
| "grad_norm": 3.318284273147583, |
| "learning_rate": 3.874766703994028e-05, |
| "loss": 2.1379, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.6763717805151176, |
| "grad_norm": 11.762707710266113, |
| "learning_rate": 3.872900335946249e-05, |
| "loss": 2.1742, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.6774916013437849, |
| "grad_norm": 2.576070785522461, |
| "learning_rate": 3.8710339678984695e-05, |
| "loss": 2.033, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.6786114221724524, |
| "grad_norm": 3.21813702583313, |
| "learning_rate": 3.869167599850691e-05, |
| "loss": 2.1262, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.6797312430011199, |
| "grad_norm": 5.945693016052246, |
| "learning_rate": 3.867301231802912e-05, |
| "loss": 2.3859, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 4.385049819946289, |
| "learning_rate": 3.865434863755133e-05, |
| "loss": 1.8462, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.6819708846584547, |
| "grad_norm": 3.1201934814453125, |
| "learning_rate": 3.8635684957073535e-05, |
| "loss": 2.2583, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.683090705487122, |
| "grad_norm": 3.0420446395874023, |
| "learning_rate": 3.8617021276595746e-05, |
| "loss": 2.3901, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 2.6930365562438965, |
| "learning_rate": 3.859835759611796e-05, |
| "loss": 2.2662, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.6853303471444568, |
| "grad_norm": 4.4401984214782715, |
| "learning_rate": 3.857969391564016e-05, |
| "loss": 2.472, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.6864501679731243, |
| "grad_norm": 2.8523037433624268, |
| "learning_rate": 3.8561030235162375e-05, |
| "loss": 1.9654, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.6875699888017918, |
| "grad_norm": 13.0241060256958, |
| "learning_rate": 3.8542366554684587e-05, |
| "loss": 2.3012, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.6886898096304591, |
| "grad_norm": 3.5604312419891357, |
| "learning_rate": 3.85237028742068e-05, |
| "loss": 2.0856, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.6898096304591266, |
| "grad_norm": 2.850850820541382, |
| "learning_rate": 3.8505039193729e-05, |
| "loss": 2.0274, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.6909294512877939, |
| "grad_norm": 8.735082626342773, |
| "learning_rate": 3.8486375513251215e-05, |
| "loss": 1.6773, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.6920492721164614, |
| "grad_norm": 3.8772952556610107, |
| "learning_rate": 3.8467711832773427e-05, |
| "loss": 2.5705, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.6931690929451287, |
| "grad_norm": 8.324105262756348, |
| "learning_rate": 3.844904815229564e-05, |
| "loss": 2.331, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.6942889137737962, |
| "grad_norm": 3.392038583755493, |
| "learning_rate": 3.843038447181784e-05, |
| "loss": 2.4718, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.6954087346024636, |
| "grad_norm": 3.956043004989624, |
| "learning_rate": 3.8411720791340055e-05, |
| "loss": 2.0328, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.696528555431131, |
| "grad_norm": 3.05572247505188, |
| "learning_rate": 3.839305711086226e-05, |
| "loss": 1.9317, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.6976483762597985, |
| "grad_norm": 3.7026829719543457, |
| "learning_rate": 3.837439343038448e-05, |
| "loss": 2.1722, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.6987681970884658, |
| "grad_norm": 7.879319190979004, |
| "learning_rate": 3.835572974990668e-05, |
| "loss": 1.8841, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.6998880179171333, |
| "grad_norm": 11.358241081237793, |
| "learning_rate": 3.8337066069428895e-05, |
| "loss": 2.5414, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.7010078387458006, |
| "grad_norm": 2.9824588298797607, |
| "learning_rate": 3.83184023889511e-05, |
| "loss": 2.1727, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 11.302024841308594, |
| "learning_rate": 3.829973870847332e-05, |
| "loss": 2.2552, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.7032474804031354, |
| "grad_norm": 13.782984733581543, |
| "learning_rate": 3.828107502799552e-05, |
| "loss": 2.3492, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.7043673012318029, |
| "grad_norm": 2.993431329727173, |
| "learning_rate": 3.8262411347517735e-05, |
| "loss": 1.8148, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.7054871220604704, |
| "grad_norm": 7.389340400695801, |
| "learning_rate": 3.824374766703994e-05, |
| "loss": 1.9859, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.7066069428891377, |
| "grad_norm": 9.87246036529541, |
| "learning_rate": 3.822508398656215e-05, |
| "loss": 1.997, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.7077267637178052, |
| "grad_norm": 3.7313337326049805, |
| "learning_rate": 3.820642030608436e-05, |
| "loss": 2.1025, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.7088465845464725, |
| "grad_norm": 12.712873458862305, |
| "learning_rate": 3.818775662560657e-05, |
| "loss": 2.1316, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.70996640537514, |
| "grad_norm": 3.423029661178589, |
| "learning_rate": 3.816909294512878e-05, |
| "loss": 1.8786, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.7110862262038073, |
| "grad_norm": 2.487156391143799, |
| "learning_rate": 3.815042926465099e-05, |
| "loss": 2.2605, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.7122060470324748, |
| "grad_norm": 3.7588279247283936, |
| "learning_rate": 3.81317655841732e-05, |
| "loss": 2.1991, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.7133258678611423, |
| "grad_norm": 3.227130174636841, |
| "learning_rate": 3.811310190369541e-05, |
| "loss": 2.4743, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.7144456886898096, |
| "grad_norm": 7.917088985443115, |
| "learning_rate": 3.809443822321762e-05, |
| "loss": 2.1551, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.7155655095184771, |
| "grad_norm": 2.7183947563171387, |
| "learning_rate": 3.807577454273983e-05, |
| "loss": 2.188, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.7166853303471444, |
| "grad_norm": 3.0164196491241455, |
| "learning_rate": 3.805711086226204e-05, |
| "loss": 2.1092, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.7178051511758119, |
| "grad_norm": 3.0733413696289062, |
| "learning_rate": 3.803844718178425e-05, |
| "loss": 2.0739, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.7189249720044792, |
| "grad_norm": 3.4838147163391113, |
| "learning_rate": 3.801978350130646e-05, |
| "loss": 2.5713, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.7200447928331467, |
| "grad_norm": 7.422530174255371, |
| "learning_rate": 3.8001119820828665e-05, |
| "loss": 2.1976, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.7211646136618141, |
| "grad_norm": 3.010465383529663, |
| "learning_rate": 3.798245614035088e-05, |
| "loss": 2.1626, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.7222844344904815, |
| "grad_norm": 2.360297918319702, |
| "learning_rate": 3.796379245987309e-05, |
| "loss": 1.8999, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 4.043231964111328, |
| "learning_rate": 3.79451287793953e-05, |
| "loss": 2.4137, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.7245240761478163, |
| "grad_norm": 13.390426635742188, |
| "learning_rate": 3.7926465098917505e-05, |
| "loss": 2.3208, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.7256438969764838, |
| "grad_norm": 3.9308841228485107, |
| "learning_rate": 3.790780141843972e-05, |
| "loss": 2.2158, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.7267637178051511, |
| "grad_norm": 7.398802280426025, |
| "learning_rate": 3.788913773796193e-05, |
| "loss": 2.3551, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.7278835386338186, |
| "grad_norm": 9.143538475036621, |
| "learning_rate": 3.787047405748414e-05, |
| "loss": 2.2071, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.729003359462486, |
| "grad_norm": 3.0637400150299072, |
| "learning_rate": 3.7851810377006345e-05, |
| "loss": 2.0304, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.7301231802911534, |
| "grad_norm": 3.206883668899536, |
| "learning_rate": 3.7833146696528557e-05, |
| "loss": 2.2099, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.7312430011198209, |
| "grad_norm": 10.21704387664795, |
| "learning_rate": 3.781448301605077e-05, |
| "loss": 2.4963, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.7323628219484882, |
| "grad_norm": 11.84740161895752, |
| "learning_rate": 3.779581933557298e-05, |
| "loss": 2.342, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.7334826427771557, |
| "grad_norm": 9.463152885437012, |
| "learning_rate": 3.7777155655095185e-05, |
| "loss": 2.2016, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.734602463605823, |
| "grad_norm": 3.3957138061523438, |
| "learning_rate": 3.7758491974617397e-05, |
| "loss": 2.285, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.7357222844344905, |
| "grad_norm": 3.3638765811920166, |
| "learning_rate": 3.773982829413961e-05, |
| "loss": 2.2848, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 8.35213565826416, |
| "learning_rate": 3.772116461366181e-05, |
| "loss": 2.1612, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.7379619260918253, |
| "grad_norm": 7.612375259399414, |
| "learning_rate": 3.7702500933184025e-05, |
| "loss": 2.2899, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.7390817469204927, |
| "grad_norm": 7.650971412658691, |
| "learning_rate": 3.7683837252706237e-05, |
| "loss": 2.0612, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.7402015677491601, |
| "grad_norm": 3.540432929992676, |
| "learning_rate": 3.766517357222845e-05, |
| "loss": 2.0103, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.7413213885778276, |
| "grad_norm": 8.57761001586914, |
| "learning_rate": 3.764650989175065e-05, |
| "loss": 2.3906, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.7424412094064949, |
| "grad_norm": 11.053874969482422, |
| "learning_rate": 3.7627846211272865e-05, |
| "loss": 2.2103, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.7435610302351624, |
| "grad_norm": 8.416523933410645, |
| "learning_rate": 3.760918253079507e-05, |
| "loss": 2.3171, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 3.172659397125244, |
| "learning_rate": 3.759051885031729e-05, |
| "loss": 2.0747, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.7458006718924972, |
| "grad_norm": 3.0577268600463867, |
| "learning_rate": 3.757185516983949e-05, |
| "loss": 1.7781, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.7469204927211646, |
| "grad_norm": 2.7276387214660645, |
| "learning_rate": 3.7553191489361705e-05, |
| "loss": 2.3419, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.748040313549832, |
| "grad_norm": 6.0162529945373535, |
| "learning_rate": 3.753452780888391e-05, |
| "loss": 2.047, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.7491601343784995, |
| "grad_norm": 8.213367462158203, |
| "learning_rate": 3.751586412840613e-05, |
| "loss": 2.1848, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.7502799552071668, |
| "grad_norm": 2.8683581352233887, |
| "learning_rate": 3.749720044792833e-05, |
| "loss": 2.3152, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.7513997760358343, |
| "grad_norm": 7.736207485198975, |
| "learning_rate": 3.7478536767450545e-05, |
| "loss": 2.2199, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.7525195968645016, |
| "grad_norm": 13.607484817504883, |
| "learning_rate": 3.745987308697275e-05, |
| "loss": 2.0759, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.7536394176931691, |
| "grad_norm": 12.661739349365234, |
| "learning_rate": 3.744120940649496e-05, |
| "loss": 2.1309, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.7547592385218365, |
| "grad_norm": 7.0321364402771, |
| "learning_rate": 3.742254572601717e-05, |
| "loss": 2.2709, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.7558790593505039, |
| "grad_norm": 7.621607780456543, |
| "learning_rate": 3.7403882045539385e-05, |
| "loss": 1.9236, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.7569988801791714, |
| "grad_norm": 9.952698707580566, |
| "learning_rate": 3.738521836506159e-05, |
| "loss": 2.2815, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.7581187010078387, |
| "grad_norm": 3.3789877891540527, |
| "learning_rate": 3.73665546845838e-05, |
| "loss": 1.7131, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.7592385218365062, |
| "grad_norm": 5.496334075927734, |
| "learning_rate": 3.734789100410601e-05, |
| "loss": 2.07, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.7603583426651735, |
| "grad_norm": 6.971884250640869, |
| "learning_rate": 3.732922732362822e-05, |
| "loss": 2.0964, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.761478163493841, |
| "grad_norm": 8.502189636230469, |
| "learning_rate": 3.731056364315043e-05, |
| "loss": 2.1573, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.7625979843225084, |
| "grad_norm": 12.738436698913574, |
| "learning_rate": 3.729189996267264e-05, |
| "loss": 2.1589, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.7637178051511758, |
| "grad_norm": 4.9455790519714355, |
| "learning_rate": 3.727323628219485e-05, |
| "loss": 2.1109, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.7648376259798432, |
| "grad_norm": 4.243088245391846, |
| "learning_rate": 3.725457260171706e-05, |
| "loss": 1.96, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 3.8965704441070557, |
| "learning_rate": 3.723590892123927e-05, |
| "loss": 2.0415, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.7670772676371781, |
| "grad_norm": 3.0561602115631104, |
| "learning_rate": 3.7217245240761475e-05, |
| "loss": 1.8753, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.7681970884658454, |
| "grad_norm": 3.344120979309082, |
| "learning_rate": 3.719858156028369e-05, |
| "loss": 2.3332, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.7693169092945129, |
| "grad_norm": 15.658031463623047, |
| "learning_rate": 3.71799178798059e-05, |
| "loss": 1.8573, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.7704367301231803, |
| "grad_norm": 13.487674713134766, |
| "learning_rate": 3.716125419932811e-05, |
| "loss": 2.1022, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.7715565509518477, |
| "grad_norm": 4.366361141204834, |
| "learning_rate": 3.7142590518850315e-05, |
| "loss": 2.1056, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.7726763717805151, |
| "grad_norm": 6.940586566925049, |
| "learning_rate": 3.712392683837253e-05, |
| "loss": 1.7214, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.7737961926091825, |
| "grad_norm": 3.119396448135376, |
| "learning_rate": 3.710526315789474e-05, |
| "loss": 2.4966, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.77491601343785, |
| "grad_norm": 2.804882287979126, |
| "learning_rate": 3.708659947741695e-05, |
| "loss": 1.9811, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.7760358342665173, |
| "grad_norm": 6.220757484436035, |
| "learning_rate": 3.7067935796939155e-05, |
| "loss": 2.0228, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.7771556550951848, |
| "grad_norm": 7.664346218109131, |
| "learning_rate": 3.7049272116461367e-05, |
| "loss": 2.375, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.7782754759238522, |
| "grad_norm": 3.2381927967071533, |
| "learning_rate": 3.703060843598358e-05, |
| "loss": 2.1603, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.7793952967525196, |
| "grad_norm": 2.67271089553833, |
| "learning_rate": 3.701194475550579e-05, |
| "loss": 1.975, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.780515117581187, |
| "grad_norm": 8.897006034851074, |
| "learning_rate": 3.6993281075027995e-05, |
| "loss": 2.1017, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.7816349384098544, |
| "grad_norm": 4.098658084869385, |
| "learning_rate": 3.6974617394550207e-05, |
| "loss": 2.0624, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.7827547592385219, |
| "grad_norm": 9.428001403808594, |
| "learning_rate": 3.695595371407242e-05, |
| "loss": 1.9006, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.7838745800671892, |
| "grad_norm": 3.8001720905303955, |
| "learning_rate": 3.693729003359463e-05, |
| "loss": 2.2506, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7849944008958567, |
| "grad_norm": 14.967480659484863, |
| "learning_rate": 3.6918626353116835e-05, |
| "loss": 2.4808, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.786114221724524, |
| "grad_norm": 5.58108377456665, |
| "learning_rate": 3.6899962672639047e-05, |
| "loss": 2.5445, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 4.169144153594971, |
| "learning_rate": 3.688129899216126e-05, |
| "loss": 2.2701, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.7883538633818589, |
| "grad_norm": 3.629635810852051, |
| "learning_rate": 3.686263531168346e-05, |
| "loss": 2.0769, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 3.2318367958068848, |
| "learning_rate": 3.6843971631205675e-05, |
| "loss": 1.9304, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.7905935050391937, |
| "grad_norm": 3.569641590118408, |
| "learning_rate": 3.682530795072788e-05, |
| "loss": 1.9362, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.7917133258678611, |
| "grad_norm": 3.6073529720306396, |
| "learning_rate": 3.68066442702501e-05, |
| "loss": 2.1551, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.7928331466965286, |
| "grad_norm": 2.949209690093994, |
| "learning_rate": 3.67879805897723e-05, |
| "loss": 1.7599, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.793952967525196, |
| "grad_norm": 7.541772842407227, |
| "learning_rate": 3.6769316909294515e-05, |
| "loss": 2.2318, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.7950727883538634, |
| "grad_norm": 3.108989953994751, |
| "learning_rate": 3.675065322881672e-05, |
| "loss": 2.1161, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.7961926091825308, |
| "grad_norm": 2.859032392501831, |
| "learning_rate": 3.673198954833894e-05, |
| "loss": 2.2659, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.7973124300111982, |
| "grad_norm": 4.491294860839844, |
| "learning_rate": 3.671332586786114e-05, |
| "loss": 1.4341, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.7984322508398656, |
| "grad_norm": 8.079992294311523, |
| "learning_rate": 3.6694662187383355e-05, |
| "loss": 2.1608, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.799552071668533, |
| "grad_norm": 3.3629186153411865, |
| "learning_rate": 3.667599850690556e-05, |
| "loss": 2.0803, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.8006718924972005, |
| "grad_norm": 7.034578800201416, |
| "learning_rate": 3.665733482642777e-05, |
| "loss": 2.2975, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.8017917133258678, |
| "grad_norm": 3.3165249824523926, |
| "learning_rate": 3.663867114594998e-05, |
| "loss": 2.0453, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.8029115341545353, |
| "grad_norm": 11.479082107543945, |
| "learning_rate": 3.6620007465472195e-05, |
| "loss": 1.9926, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.8040313549832027, |
| "grad_norm": 2.8620989322662354, |
| "learning_rate": 3.66013437849944e-05, |
| "loss": 2.0147, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.8051511758118701, |
| "grad_norm": 9.503447532653809, |
| "learning_rate": 3.658268010451661e-05, |
| "loss": 2.1835, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.8062709966405375, |
| "grad_norm": 2.9380719661712646, |
| "learning_rate": 3.656401642403882e-05, |
| "loss": 1.9345, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.8073908174692049, |
| "grad_norm": 9.880309104919434, |
| "learning_rate": 3.6545352743561035e-05, |
| "loss": 2.1885, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 8.49301528930664, |
| "learning_rate": 3.652668906308324e-05, |
| "loss": 1.7974, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.8096304591265397, |
| "grad_norm": 7.494529724121094, |
| "learning_rate": 3.650802538260545e-05, |
| "loss": 2.1994, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.8107502799552072, |
| "grad_norm": 2.999682664871216, |
| "learning_rate": 3.648936170212766e-05, |
| "loss": 2.2821, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.8118701007838746, |
| "grad_norm": 2.5797007083892822, |
| "learning_rate": 3.647069802164987e-05, |
| "loss": 2.1872, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.812989921612542, |
| "grad_norm": 9.615920066833496, |
| "learning_rate": 3.645203434117208e-05, |
| "loss": 2.3596, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.8141097424412094, |
| "grad_norm": 8.524604797363281, |
| "learning_rate": 3.6433370660694285e-05, |
| "loss": 2.6097, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.8152295632698768, |
| "grad_norm": 7.994124889373779, |
| "learning_rate": 3.64147069802165e-05, |
| "loss": 1.7589, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.8163493840985442, |
| "grad_norm": 2.902440071105957, |
| "learning_rate": 3.639604329973871e-05, |
| "loss": 2.1298, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.8174692049272116, |
| "grad_norm": 2.9455184936523438, |
| "learning_rate": 3.637737961926092e-05, |
| "loss": 2.2527, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.8185890257558791, |
| "grad_norm": 7.609272003173828, |
| "learning_rate": 3.6358715938783125e-05, |
| "loss": 2.2256, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.8197088465845465, |
| "grad_norm": 3.6720242500305176, |
| "learning_rate": 3.634005225830534e-05, |
| "loss": 2.034, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.8208286674132139, |
| "grad_norm": 6.270810604095459, |
| "learning_rate": 3.632138857782755e-05, |
| "loss": 1.9424, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.8219484882418813, |
| "grad_norm": 9.397404670715332, |
| "learning_rate": 3.630272489734976e-05, |
| "loss": 2.0945, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.8230683090705487, |
| "grad_norm": 3.0468692779541016, |
| "learning_rate": 3.6284061216871965e-05, |
| "loss": 2.382, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.8241881298992161, |
| "grad_norm": 5.612720966339111, |
| "learning_rate": 3.626539753639418e-05, |
| "loss": 1.9622, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.8253079507278835, |
| "grad_norm": 4.7055983543396, |
| "learning_rate": 3.624673385591639e-05, |
| "loss": 2.2322, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.826427771556551, |
| "grad_norm": 4.574550628662109, |
| "learning_rate": 3.62280701754386e-05, |
| "loss": 2.0364, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.8275475923852184, |
| "grad_norm": 9.704349517822266, |
| "learning_rate": 3.6209406494960805e-05, |
| "loss": 2.6025, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.8286674132138858, |
| "grad_norm": 3.6313247680664062, |
| "learning_rate": 3.6190742814483017e-05, |
| "loss": 2.0449, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 5.157100200653076, |
| "learning_rate": 3.617207913400523e-05, |
| "loss": 2.0293, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.8309070548712206, |
| "grad_norm": 11.102890968322754, |
| "learning_rate": 3.615341545352744e-05, |
| "loss": 2.3059, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.832026875699888, |
| "grad_norm": 6.320305347442627, |
| "learning_rate": 3.6134751773049645e-05, |
| "loss": 2.1117, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.8331466965285554, |
| "grad_norm": 9.381714820861816, |
| "learning_rate": 3.611608809257186e-05, |
| "loss": 2.5279, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.8342665173572228, |
| "grad_norm": 3.505153179168701, |
| "learning_rate": 3.609742441209407e-05, |
| "loss": 2.176, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.8353863381858903, |
| "grad_norm": 6.633389472961426, |
| "learning_rate": 3.607876073161628e-05, |
| "loss": 2.1151, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.8365061590145577, |
| "grad_norm": 2.6333770751953125, |
| "learning_rate": 3.6060097051138485e-05, |
| "loss": 2.4062, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.8376259798432251, |
| "grad_norm": 3.540119171142578, |
| "learning_rate": 3.60414333706607e-05, |
| "loss": 2.1977, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.8387458006718925, |
| "grad_norm": 2.524616003036499, |
| "learning_rate": 3.602276969018291e-05, |
| "loss": 2.1205, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.8398656215005599, |
| "grad_norm": 6.62229061126709, |
| "learning_rate": 3.600410600970511e-05, |
| "loss": 2.2605, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8409854423292273, |
| "grad_norm": 3.3816375732421875, |
| "learning_rate": 3.5985442329227325e-05, |
| "loss": 2.0532, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 2.873293161392212, |
| "learning_rate": 3.596677864874953e-05, |
| "loss": 2.251, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.8432250839865622, |
| "grad_norm": 8.764281272888184, |
| "learning_rate": 3.594811496827175e-05, |
| "loss": 2.2559, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.8443449048152296, |
| "grad_norm": 11.748472213745117, |
| "learning_rate": 3.592945128779395e-05, |
| "loss": 1.9883, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.845464725643897, |
| "grad_norm": 8.871268272399902, |
| "learning_rate": 3.5910787607316165e-05, |
| "loss": 2.2952, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.8465845464725644, |
| "grad_norm": 3.4777164459228516, |
| "learning_rate": 3.589212392683837e-05, |
| "loss": 2.1871, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.8477043673012318, |
| "grad_norm": 13.322107315063477, |
| "learning_rate": 3.587346024636059e-05, |
| "loss": 2.2841, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.8488241881298992, |
| "grad_norm": 3.384903907775879, |
| "learning_rate": 3.585479656588279e-05, |
| "loss": 1.7451, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.8499440089585666, |
| "grad_norm": 3.1881563663482666, |
| "learning_rate": 3.5836132885405005e-05, |
| "loss": 2.3855, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 8.006708145141602, |
| "learning_rate": 3.581746920492721e-05, |
| "loss": 2.0241, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.8521836506159015, |
| "grad_norm": 3.468590259552002, |
| "learning_rate": 3.579880552444942e-05, |
| "loss": 2.3348, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.8533034714445689, |
| "grad_norm": 8.839496612548828, |
| "learning_rate": 3.578014184397163e-05, |
| "loss": 2.0181, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.8544232922732363, |
| "grad_norm": 2.8391735553741455, |
| "learning_rate": 3.5761478163493845e-05, |
| "loss": 2.2046, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.8555431131019037, |
| "grad_norm": 2.341062068939209, |
| "learning_rate": 3.574281448301605e-05, |
| "loss": 2.509, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.8566629339305711, |
| "grad_norm": 4.910477161407471, |
| "learning_rate": 3.572415080253826e-05, |
| "loss": 2.4332, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.8577827547592385, |
| "grad_norm": 9.427245140075684, |
| "learning_rate": 3.570548712206047e-05, |
| "loss": 2.2668, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.858902575587906, |
| "grad_norm": 2.9346938133239746, |
| "learning_rate": 3.5686823441582685e-05, |
| "loss": 1.8698, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.8600223964165733, |
| "grad_norm": 3.292447328567505, |
| "learning_rate": 3.566815976110489e-05, |
| "loss": 2.1736, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.8611422172452408, |
| "grad_norm": 3.89292573928833, |
| "learning_rate": 3.56494960806271e-05, |
| "loss": 2.4429, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.8622620380739082, |
| "grad_norm": 8.658332824707031, |
| "learning_rate": 3.563083240014931e-05, |
| "loss": 2.1728, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.8633818589025756, |
| "grad_norm": 8.725335121154785, |
| "learning_rate": 3.5612168719671525e-05, |
| "loss": 2.0126, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.864501679731243, |
| "grad_norm": 2.872495412826538, |
| "learning_rate": 3.559350503919373e-05, |
| "loss": 2.4384, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.8656215005599104, |
| "grad_norm": 17.420711517333984, |
| "learning_rate": 3.5574841358715935e-05, |
| "loss": 1.8937, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.8667413213885778, |
| "grad_norm": 7.1470489501953125, |
| "learning_rate": 3.555617767823815e-05, |
| "loss": 1.6198, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.8678611422172452, |
| "grad_norm": 9.926697731018066, |
| "learning_rate": 3.553751399776036e-05, |
| "loss": 1.9396, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.8689809630459127, |
| "grad_norm": 3.2263200283050537, |
| "learning_rate": 3.551885031728257e-05, |
| "loss": 2.3201, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.8701007838745801, |
| "grad_norm": 7.058889865875244, |
| "learning_rate": 3.5500186636804775e-05, |
| "loss": 2.1745, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.8712206047032475, |
| "grad_norm": 12.746253967285156, |
| "learning_rate": 3.548152295632699e-05, |
| "loss": 2.4713, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 10.826375961303711, |
| "learning_rate": 3.54628592758492e-05, |
| "loss": 1.9633, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.8734602463605823, |
| "grad_norm": 3.0302278995513916, |
| "learning_rate": 3.544419559537141e-05, |
| "loss": 2.3689, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.8745800671892497, |
| "grad_norm": 2.6338868141174316, |
| "learning_rate": 3.5425531914893615e-05, |
| "loss": 1.6076, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.8756998880179171, |
| "grad_norm": 2.724898099899292, |
| "learning_rate": 3.540686823441583e-05, |
| "loss": 1.6231, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.8768197088465846, |
| "grad_norm": 5.752689361572266, |
| "learning_rate": 3.538820455393804e-05, |
| "loss": 1.8717, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.8779395296752519, |
| "grad_norm": 3.7347118854522705, |
| "learning_rate": 3.536954087346025e-05, |
| "loss": 2.1681, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.8790593505039194, |
| "grad_norm": 6.816357612609863, |
| "learning_rate": 3.5350877192982455e-05, |
| "loss": 1.6885, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.8801791713325868, |
| "grad_norm": 10.730504989624023, |
| "learning_rate": 3.533221351250467e-05, |
| "loss": 2.3201, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.8812989921612542, |
| "grad_norm": 10.225847244262695, |
| "learning_rate": 3.531354983202688e-05, |
| "loss": 2.307, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.8824188129899216, |
| "grad_norm": 3.7461752891540527, |
| "learning_rate": 3.529488615154909e-05, |
| "loss": 2.4028, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.883538633818589, |
| "grad_norm": 3.2298667430877686, |
| "learning_rate": 3.5276222471071295e-05, |
| "loss": 2.2726, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.8846584546472565, |
| "grad_norm": 4.180459976196289, |
| "learning_rate": 3.525755879059351e-05, |
| "loss": 2.3693, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.8857782754759238, |
| "grad_norm": 3.8476152420043945, |
| "learning_rate": 3.523889511011572e-05, |
| "loss": 1.9672, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.8868980963045913, |
| "grad_norm": 2.8095366954803467, |
| "learning_rate": 3.522023142963793e-05, |
| "loss": 1.5762, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.8880179171332587, |
| "grad_norm": 11.424389839172363, |
| "learning_rate": 3.5201567749160135e-05, |
| "loss": 1.9222, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.8891377379619261, |
| "grad_norm": 10.916204452514648, |
| "learning_rate": 3.518290406868235e-05, |
| "loss": 2.2247, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.8902575587905935, |
| "grad_norm": 8.619460105895996, |
| "learning_rate": 3.516424038820456e-05, |
| "loss": 2.5038, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.8913773796192609, |
| "grad_norm": 3.383333444595337, |
| "learning_rate": 3.514557670772676e-05, |
| "loss": 2.0325, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.8924972004479284, |
| "grad_norm": 2.8794310092926025, |
| "learning_rate": 3.5126913027248975e-05, |
| "loss": 2.2037, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 3.270104169845581, |
| "learning_rate": 3.510824934677118e-05, |
| "loss": 2.5766, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 5.58250617980957, |
| "learning_rate": 3.50895856662934e-05, |
| "loss": 2.0883, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.8958566629339306, |
| "grad_norm": 4.2217488288879395, |
| "learning_rate": 3.50709219858156e-05, |
| "loss": 2.1016, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.896976483762598, |
| "grad_norm": 5.107589244842529, |
| "learning_rate": 3.5052258305337815e-05, |
| "loss": 1.9658, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.8980963045912654, |
| "grad_norm": 3.2384800910949707, |
| "learning_rate": 3.503359462486002e-05, |
| "loss": 1.8828, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.8992161254199328, |
| "grad_norm": 3.6768581867218018, |
| "learning_rate": 3.501493094438223e-05, |
| "loss": 1.6965, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.9003359462486002, |
| "grad_norm": 3.0174429416656494, |
| "learning_rate": 3.499626726390444e-05, |
| "loss": 2.16, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.9014557670772676, |
| "grad_norm": 13.087141036987305, |
| "learning_rate": 3.4977603583426655e-05, |
| "loss": 1.7648, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.9025755879059351, |
| "grad_norm": 11.361166000366211, |
| "learning_rate": 3.495893990294886e-05, |
| "loss": 1.94, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.9036954087346024, |
| "grad_norm": 3.1637301445007324, |
| "learning_rate": 3.494027622247107e-05, |
| "loss": 2.0952, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.9048152295632699, |
| "grad_norm": 3.610626459121704, |
| "learning_rate": 3.492161254199328e-05, |
| "loss": 2.4445, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.9059350503919373, |
| "grad_norm": 3.5841760635375977, |
| "learning_rate": 3.4902948861515495e-05, |
| "loss": 2.3581, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.9070548712206047, |
| "grad_norm": 8.359783172607422, |
| "learning_rate": 3.48842851810377e-05, |
| "loss": 2.0259, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.9081746920492721, |
| "grad_norm": 3.30151629447937, |
| "learning_rate": 3.486562150055991e-05, |
| "loss": 2.3721, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.9092945128779395, |
| "grad_norm": 3.3997480869293213, |
| "learning_rate": 3.484695782008212e-05, |
| "loss": 2.0008, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.910414333706607, |
| "grad_norm": 4.009676456451416, |
| "learning_rate": 3.4828294139604335e-05, |
| "loss": 1.8577, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.9115341545352743, |
| "grad_norm": 11.38424015045166, |
| "learning_rate": 3.480963045912654e-05, |
| "loss": 2.1525, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.9126539753639418, |
| "grad_norm": 3.4985761642456055, |
| "learning_rate": 3.479096677864875e-05, |
| "loss": 2.0185, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.9137737961926092, |
| "grad_norm": 2.8695907592773438, |
| "learning_rate": 3.4772303098170963e-05, |
| "loss": 2.2721, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 9.850143432617188, |
| "learning_rate": 3.4753639417693175e-05, |
| "loss": 2.0199, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.916013437849944, |
| "grad_norm": 6.895715713500977, |
| "learning_rate": 3.473497573721538e-05, |
| "loss": 2.0351, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.9171332586786114, |
| "grad_norm": 3.088392972946167, |
| "learning_rate": 3.4716312056737585e-05, |
| "loss": 2.1426, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.9182530795072789, |
| "grad_norm": 2.6038413047790527, |
| "learning_rate": 3.4697648376259803e-05, |
| "loss": 1.9374, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.9193729003359462, |
| "grad_norm": 8.73396110534668, |
| "learning_rate": 3.467898469578201e-05, |
| "loss": 2.3542, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.9204927211646137, |
| "grad_norm": 7.529842853546143, |
| "learning_rate": 3.466032101530422e-05, |
| "loss": 2.5576, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.921612541993281, |
| "grad_norm": 11.439668655395508, |
| "learning_rate": 3.4641657334826425e-05, |
| "loss": 2.3506, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.9227323628219485, |
| "grad_norm": 11.114765167236328, |
| "learning_rate": 3.462299365434864e-05, |
| "loss": 2.1594, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.9238521836506159, |
| "grad_norm": 3.631915807723999, |
| "learning_rate": 3.460432997387085e-05, |
| "loss": 1.9301, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.9249720044792833, |
| "grad_norm": 4.813271522521973, |
| "learning_rate": 3.458566629339306e-05, |
| "loss": 1.7937, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.9260918253079508, |
| "grad_norm": 9.251919746398926, |
| "learning_rate": 3.4567002612915265e-05, |
| "loss": 2.4907, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.9272116461366181, |
| "grad_norm": 10.042062759399414, |
| "learning_rate": 3.454833893243748e-05, |
| "loss": 2.3766, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.9283314669652856, |
| "grad_norm": 5.098442554473877, |
| "learning_rate": 3.452967525195969e-05, |
| "loss": 1.9331, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.9294512877939529, |
| "grad_norm": 3.054330348968506, |
| "learning_rate": 3.45110115714819e-05, |
| "loss": 1.9556, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.9305711086226204, |
| "grad_norm": 5.500843524932861, |
| "learning_rate": 3.4492347891004105e-05, |
| "loss": 1.8301, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.9316909294512878, |
| "grad_norm": 11.334184646606445, |
| "learning_rate": 3.447368421052632e-05, |
| "loss": 2.3866, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.9328107502799552, |
| "grad_norm": 9.781439781188965, |
| "learning_rate": 3.445502053004853e-05, |
| "loss": 2.3349, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.9339305711086227, |
| "grad_norm": 5.9633049964904785, |
| "learning_rate": 3.443635684957074e-05, |
| "loss": 1.5215, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.93505039193729, |
| "grad_norm": 9.052412033081055, |
| "learning_rate": 3.4417693169092945e-05, |
| "loss": 2.2278, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 15.582505226135254, |
| "learning_rate": 3.439902948861516e-05, |
| "loss": 2.2908, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.9372900335946248, |
| "grad_norm": 9.649676322937012, |
| "learning_rate": 3.438036580813737e-05, |
| "loss": 2.0235, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.9384098544232923, |
| "grad_norm": 2.680288314819336, |
| "learning_rate": 3.436170212765958e-05, |
| "loss": 1.9282, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.9395296752519597, |
| "grad_norm": 3.08258318901062, |
| "learning_rate": 3.4343038447181785e-05, |
| "loss": 2.2415, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.9406494960806271, |
| "grad_norm": 4.9708380699157715, |
| "learning_rate": 3.4324374766704e-05, |
| "loss": 2.3105, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.9417693169092946, |
| "grad_norm": 2.7266993522644043, |
| "learning_rate": 3.430571108622621e-05, |
| "loss": 2.2027, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.9428891377379619, |
| "grad_norm": 10.68362808227539, |
| "learning_rate": 3.428704740574841e-05, |
| "loss": 2.0632, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.9440089585666294, |
| "grad_norm": 3.0944361686706543, |
| "learning_rate": 3.4268383725270625e-05, |
| "loss": 2.2351, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.9451287793952967, |
| "grad_norm": 3.2292227745056152, |
| "learning_rate": 3.424972004479283e-05, |
| "loss": 1.9755, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.9462486002239642, |
| "grad_norm": 16.302453994750977, |
| "learning_rate": 3.423105636431504e-05, |
| "loss": 2.0759, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 4.625180244445801, |
| "learning_rate": 3.421239268383725e-05, |
| "loss": 2.5148, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.948488241881299, |
| "grad_norm": 8.57646656036377, |
| "learning_rate": 3.4193729003359465e-05, |
| "loss": 1.9348, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.9496080627099664, |
| "grad_norm": 3.611316442489624, |
| "learning_rate": 3.417506532288167e-05, |
| "loss": 2.3202, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.9507278835386338, |
| "grad_norm": 12.828388214111328, |
| "learning_rate": 3.415640164240388e-05, |
| "loss": 2.0078, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.9518477043673013, |
| "grad_norm": 7.542992115020752, |
| "learning_rate": 3.413773796192609e-05, |
| "loss": 1.8984, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9529675251959686, |
| "grad_norm": 10.747339248657227, |
| "learning_rate": 3.4119074281448305e-05, |
| "loss": 2.2511, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.9540873460246361, |
| "grad_norm": 6.7283453941345215, |
| "learning_rate": 3.410041060097051e-05, |
| "loss": 1.9405, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.9552071668533034, |
| "grad_norm": 2.935981512069702, |
| "learning_rate": 3.408174692049272e-05, |
| "loss": 2.1033, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.9563269876819709, |
| "grad_norm": 3.4737389087677, |
| "learning_rate": 3.4063083240014933e-05, |
| "loss": 2.0328, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 8.525548934936523, |
| "learning_rate": 3.4044419559537145e-05, |
| "loss": 1.8537, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.9585666293393057, |
| "grad_norm": 6.467761516571045, |
| "learning_rate": 3.402575587905935e-05, |
| "loss": 2.0973, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.9596864501679732, |
| "grad_norm": 10.39410400390625, |
| "learning_rate": 3.400709219858156e-05, |
| "loss": 2.0941, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.9608062709966405, |
| "grad_norm": 5.414796829223633, |
| "learning_rate": 3.3988428518103773e-05, |
| "loss": 1.6318, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.961926091825308, |
| "grad_norm": 2.808164119720459, |
| "learning_rate": 3.3969764837625985e-05, |
| "loss": 1.8583, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.9630459126539753, |
| "grad_norm": 2.665485382080078, |
| "learning_rate": 3.395110115714819e-05, |
| "loss": 2.1733, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.9641657334826428, |
| "grad_norm": 3.183068037033081, |
| "learning_rate": 3.39324374766704e-05, |
| "loss": 1.7059, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.9652855543113102, |
| "grad_norm": 2.7500557899475098, |
| "learning_rate": 3.3913773796192613e-05, |
| "loss": 2.2814, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.9664053751399776, |
| "grad_norm": 6.916834831237793, |
| "learning_rate": 3.3895110115714825e-05, |
| "loss": 2.0092, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.9675251959686451, |
| "grad_norm": 11.789180755615234, |
| "learning_rate": 3.387644643523703e-05, |
| "loss": 1.744, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.9686450167973124, |
| "grad_norm": 12.441953659057617, |
| "learning_rate": 3.3857782754759235e-05, |
| "loss": 2.2952, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.9697648376259799, |
| "grad_norm": 4.348373889923096, |
| "learning_rate": 3.3839119074281453e-05, |
| "loss": 1.8666, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.9708846584546472, |
| "grad_norm": 3.6366405487060547, |
| "learning_rate": 3.382045539380366e-05, |
| "loss": 2.3619, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.9720044792833147, |
| "grad_norm": 5.705763816833496, |
| "learning_rate": 3.380179171332587e-05, |
| "loss": 1.8297, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.973124300111982, |
| "grad_norm": 8.548418998718262, |
| "learning_rate": 3.3783128032848075e-05, |
| "loss": 1.6297, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.9742441209406495, |
| "grad_norm": 6.786285877227783, |
| "learning_rate": 3.376446435237029e-05, |
| "loss": 1.6739, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.975363941769317, |
| "grad_norm": 2.983182430267334, |
| "learning_rate": 3.37458006718925e-05, |
| "loss": 1.8939, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.9764837625979843, |
| "grad_norm": 7.78575325012207, |
| "learning_rate": 3.372713699141471e-05, |
| "loss": 2.2775, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.9776035834266518, |
| "grad_norm": 2.9739723205566406, |
| "learning_rate": 3.3708473310936915e-05, |
| "loss": 2.3235, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 13.118427276611328, |
| "learning_rate": 3.368980963045913e-05, |
| "loss": 2.2344, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.9798432250839866, |
| "grad_norm": 3.2696194648742676, |
| "learning_rate": 3.367114594998134e-05, |
| "loss": 2.3788, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.9809630459126539, |
| "grad_norm": 2.9010257720947266, |
| "learning_rate": 3.365248226950355e-05, |
| "loss": 2.0307, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.9820828667413214, |
| "grad_norm": 3.2224440574645996, |
| "learning_rate": 3.3633818589025755e-05, |
| "loss": 2.0968, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.9832026875699889, |
| "grad_norm": 9.395108222961426, |
| "learning_rate": 3.361515490854797e-05, |
| "loss": 2.2748, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.9843225083986562, |
| "grad_norm": 3.0687882900238037, |
| "learning_rate": 3.359649122807018e-05, |
| "loss": 1.7044, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.9854423292273237, |
| "grad_norm": 3.7267823219299316, |
| "learning_rate": 3.357782754759239e-05, |
| "loss": 1.6726, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.986562150055991, |
| "grad_norm": 3.8064417839050293, |
| "learning_rate": 3.3559163867114595e-05, |
| "loss": 1.8408, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.9876819708846585, |
| "grad_norm": 8.669193267822266, |
| "learning_rate": 3.354050018663681e-05, |
| "loss": 1.8474, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.9888017917133258, |
| "grad_norm": 14.256889343261719, |
| "learning_rate": 3.352183650615902e-05, |
| "loss": 2.0988, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.9899216125419933, |
| "grad_norm": 10.54806137084961, |
| "learning_rate": 3.350317282568123e-05, |
| "loss": 2.1008, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.9910414333706606, |
| "grad_norm": 3.6541545391082764, |
| "learning_rate": 3.3484509145203435e-05, |
| "loss": 1.7903, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.9921612541993281, |
| "grad_norm": 3.3884453773498535, |
| "learning_rate": 3.346584546472565e-05, |
| "loss": 2.0664, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.9932810750279956, |
| "grad_norm": 3.597472906112671, |
| "learning_rate": 3.344718178424786e-05, |
| "loss": 2.2501, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.9944008958566629, |
| "grad_norm": 3.326669931411743, |
| "learning_rate": 3.3428518103770063e-05, |
| "loss": 2.2494, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.9955207166853304, |
| "grad_norm": 3.445563316345215, |
| "learning_rate": 3.3409854423292275e-05, |
| "loss": 2.4157, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.9966405375139977, |
| "grad_norm": 3.6265370845794678, |
| "learning_rate": 3.339119074281448e-05, |
| "loss": 1.9543, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.9977603583426652, |
| "grad_norm": 6.9715471267700195, |
| "learning_rate": 3.337252706233669e-05, |
| "loss": 1.9576, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.9988801791713325, |
| "grad_norm": 2.990663528442383, |
| "learning_rate": 3.3353863381858903e-05, |
| "loss": 1.7998, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 16.68695640563965, |
| "learning_rate": 3.3335199701381115e-05, |
| "loss": 2.4764, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.0011198208286674, |
| "grad_norm": 3.5761334896087646, |
| "learning_rate": 3.331653602090332e-05, |
| "loss": 2.0049, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.002239641657335, |
| "grad_norm": 7.336765289306641, |
| "learning_rate": 3.329787234042553e-05, |
| "loss": 1.9596, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.0033594624860023, |
| "grad_norm": 9.628479957580566, |
| "learning_rate": 3.3279208659947743e-05, |
| "loss": 2.1628, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.0044792833146696, |
| "grad_norm": 6.432254791259766, |
| "learning_rate": 3.3260544979469955e-05, |
| "loss": 1.9689, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.005599104143337, |
| "grad_norm": 3.5775656700134277, |
| "learning_rate": 3.324188129899216e-05, |
| "loss": 2.0591, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.0067189249720045, |
| "grad_norm": 7.444267272949219, |
| "learning_rate": 3.322321761851437e-05, |
| "loss": 2.2116, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.007838745800672, |
| "grad_norm": 9.2912015914917, |
| "learning_rate": 3.3204553938036583e-05, |
| "loss": 1.8121, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.0089585666293392, |
| "grad_norm": 12.298483848571777, |
| "learning_rate": 3.3185890257558795e-05, |
| "loss": 2.2219, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.0100783874580068, |
| "grad_norm": 11.347268104553223, |
| "learning_rate": 3.3167226577081e-05, |
| "loss": 1.9998, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.0111982082866742, |
| "grad_norm": 4.02382755279541, |
| "learning_rate": 3.314856289660321e-05, |
| "loss": 2.1333, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.0123180291153415, |
| "grad_norm": 8.781681060791016, |
| "learning_rate": 3.3129899216125423e-05, |
| "loss": 2.0904, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.0134378499440089, |
| "grad_norm": 7.836172580718994, |
| "learning_rate": 3.3111235535647635e-05, |
| "loss": 1.8313, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.0145576707726764, |
| "grad_norm": 7.975405693054199, |
| "learning_rate": 3.309257185516984e-05, |
| "loss": 2.1517, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.0156774916013438, |
| "grad_norm": 9.539911270141602, |
| "learning_rate": 3.307390817469205e-05, |
| "loss": 2.228, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.0167973124300111, |
| "grad_norm": 4.019872665405273, |
| "learning_rate": 3.3055244494214263e-05, |
| "loss": 1.97, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.0179171332586787, |
| "grad_norm": 3.3895974159240723, |
| "learning_rate": 3.3036580813736475e-05, |
| "loss": 1.6972, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.019036954087346, |
| "grad_norm": 12.689729690551758, |
| "learning_rate": 3.301791713325868e-05, |
| "loss": 1.7648, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.0201567749160134, |
| "grad_norm": 15.403214454650879, |
| "learning_rate": 3.2999253452780885e-05, |
| "loss": 2.3582, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 3.3380067348480225, |
| "learning_rate": 3.29805897723031e-05, |
| "loss": 1.9642, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.0223964165733483, |
| "grad_norm": 3.8533127307891846, |
| "learning_rate": 3.296192609182531e-05, |
| "loss": 1.9215, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.0235162374020157, |
| "grad_norm": 3.532688617706299, |
| "learning_rate": 3.294326241134752e-05, |
| "loss": 1.6916, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.024636058230683, |
| "grad_norm": 2.9172744750976562, |
| "learning_rate": 3.2924598730869725e-05, |
| "loss": 2.0278, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.0257558790593504, |
| "grad_norm": 4.141864776611328, |
| "learning_rate": 3.290593505039194e-05, |
| "loss": 2.57, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.026875699888018, |
| "grad_norm": 15.59328842163086, |
| "learning_rate": 3.288727136991415e-05, |
| "loss": 2.3305, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.0279955207166853, |
| "grad_norm": 8.242568016052246, |
| "learning_rate": 3.286860768943636e-05, |
| "loss": 2.2541, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.0291153415453527, |
| "grad_norm": 7.377376079559326, |
| "learning_rate": 3.2849944008958565e-05, |
| "loss": 2.2119, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.0302351623740202, |
| "grad_norm": 3.7288410663604736, |
| "learning_rate": 3.283128032848078e-05, |
| "loss": 1.9549, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.0313549832026876, |
| "grad_norm": 7.731942653656006, |
| "learning_rate": 3.281261664800299e-05, |
| "loss": 2.0964, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.032474804031355, |
| "grad_norm": 8.507601737976074, |
| "learning_rate": 3.27939529675252e-05, |
| "loss": 1.4743, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.0335946248600223, |
| "grad_norm": 10.14968204498291, |
| "learning_rate": 3.2775289287047405e-05, |
| "loss": 2.1759, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.0347144456886899, |
| "grad_norm": 4.713762283325195, |
| "learning_rate": 3.275662560656962e-05, |
| "loss": 2.4896, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.0358342665173572, |
| "grad_norm": 4.729640483856201, |
| "learning_rate": 3.273796192609183e-05, |
| "loss": 2.0363, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.0369540873460246, |
| "grad_norm": 3.9254088401794434, |
| "learning_rate": 3.271929824561404e-05, |
| "loss": 1.82, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.0380739081746921, |
| "grad_norm": 6.3994622230529785, |
| "learning_rate": 3.2700634565136245e-05, |
| "loss": 2.0063, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.0391937290033595, |
| "grad_norm": 4.113112449645996, |
| "learning_rate": 3.268197088465846e-05, |
| "loss": 1.8885, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.0403135498320268, |
| "grad_norm": 9.683294296264648, |
| "learning_rate": 3.266330720418067e-05, |
| "loss": 2.087, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.0414333706606942, |
| "grad_norm": 3.7569706439971924, |
| "learning_rate": 3.264464352370288e-05, |
| "loss": 2.0629, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.0425531914893618, |
| "grad_norm": 6.442532062530518, |
| "learning_rate": 3.2625979843225085e-05, |
| "loss": 2.1135, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.0436730123180291, |
| "grad_norm": 7.427597522735596, |
| "learning_rate": 3.26073161627473e-05, |
| "loss": 1.4265, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.0447928331466965, |
| "grad_norm": 14.338908195495605, |
| "learning_rate": 3.25886524822695e-05, |
| "loss": 1.9124, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.045912653975364, |
| "grad_norm": 9.877331733703613, |
| "learning_rate": 3.256998880179172e-05, |
| "loss": 2.5536, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.0470324748040314, |
| "grad_norm": 5.434894561767578, |
| "learning_rate": 3.2551325121313925e-05, |
| "loss": 2.0743, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.0481522956326987, |
| "grad_norm": 5.651406764984131, |
| "learning_rate": 3.253266144083613e-05, |
| "loss": 1.6426, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.049272116461366, |
| "grad_norm": 5.694229602813721, |
| "learning_rate": 3.251399776035834e-05, |
| "loss": 1.709, |
| "step": 9370 |
| }, |
| { |
| "epoch": 1.0503919372900337, |
| "grad_norm": 8.92438793182373, |
| "learning_rate": 3.2495334079880553e-05, |
| "loss": 1.8702, |
| "step": 9380 |
| }, |
| { |
| "epoch": 1.051511758118701, |
| "grad_norm": 6.862886428833008, |
| "learning_rate": 3.2476670399402765e-05, |
| "loss": 2.3834, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 7.55111026763916, |
| "learning_rate": 3.245800671892497e-05, |
| "loss": 1.8377, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.053751399776036, |
| "grad_norm": 4.6407341957092285, |
| "learning_rate": 3.243934303844718e-05, |
| "loss": 2.1082, |
| "step": 9410 |
| }, |
| { |
| "epoch": 1.0548712206047033, |
| "grad_norm": 6.718739032745361, |
| "learning_rate": 3.2420679357969393e-05, |
| "loss": 2.4031, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.0559910414333706, |
| "grad_norm": 2.9721930027008057, |
| "learning_rate": 3.2402015677491605e-05, |
| "loss": 1.9876, |
| "step": 9430 |
| }, |
| { |
| "epoch": 1.057110862262038, |
| "grad_norm": 2.9498345851898193, |
| "learning_rate": 3.238335199701381e-05, |
| "loss": 1.8549, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.0582306830907056, |
| "grad_norm": 13.339334487915039, |
| "learning_rate": 3.236468831653602e-05, |
| "loss": 1.8697, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.059350503919373, |
| "grad_norm": 4.650289058685303, |
| "learning_rate": 3.2346024636058234e-05, |
| "loss": 2.2676, |
| "step": 9460 |
| }, |
| { |
| "epoch": 1.0604703247480403, |
| "grad_norm": 14.234888076782227, |
| "learning_rate": 3.2327360955580445e-05, |
| "loss": 2.3395, |
| "step": 9470 |
| }, |
| { |
| "epoch": 1.0615901455767078, |
| "grad_norm": 3.4030983448028564, |
| "learning_rate": 3.230869727510265e-05, |
| "loss": 2.2116, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.0627099664053752, |
| "grad_norm": 4.666158199310303, |
| "learning_rate": 3.229003359462486e-05, |
| "loss": 2.3914, |
| "step": 9490 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 9.740036010742188, |
| "learning_rate": 3.2271369914147074e-05, |
| "loss": 1.7909, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0649496080627099, |
| "grad_norm": 3.0109105110168457, |
| "learning_rate": 3.2252706233669285e-05, |
| "loss": 2.4118, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.0660694288913775, |
| "grad_norm": 3.5755748748779297, |
| "learning_rate": 3.223404255319149e-05, |
| "loss": 2.0648, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.0671892497200448, |
| "grad_norm": 3.826801061630249, |
| "learning_rate": 3.22153788727137e-05, |
| "loss": 1.5787, |
| "step": 9530 |
| }, |
| { |
| "epoch": 1.0683090705487122, |
| "grad_norm": 10.07292366027832, |
| "learning_rate": 3.219671519223591e-05, |
| "loss": 2.2986, |
| "step": 9540 |
| }, |
| { |
| "epoch": 1.0694288913773797, |
| "grad_norm": 13.892361640930176, |
| "learning_rate": 3.2178051511758125e-05, |
| "loss": 1.9787, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.070548712206047, |
| "grad_norm": 36.16096496582031, |
| "learning_rate": 3.215938783128033e-05, |
| "loss": 1.885, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.0716685330347144, |
| "grad_norm": 2.89412260055542, |
| "learning_rate": 3.214072415080254e-05, |
| "loss": 1.745, |
| "step": 9570 |
| }, |
| { |
| "epoch": 1.0727883538633818, |
| "grad_norm": 11.991975784301758, |
| "learning_rate": 3.212206047032475e-05, |
| "loss": 1.4454, |
| "step": 9580 |
| }, |
| { |
| "epoch": 1.0739081746920494, |
| "grad_norm": 14.4371337890625, |
| "learning_rate": 3.210339678984696e-05, |
| "loss": 1.8037, |
| "step": 9590 |
| }, |
| { |
| "epoch": 1.0750279955207167, |
| "grad_norm": 3.9582924842834473, |
| "learning_rate": 3.208473310936917e-05, |
| "loss": 2.414, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.076147816349384, |
| "grad_norm": 3.2256970405578613, |
| "learning_rate": 3.2066069428891375e-05, |
| "loss": 1.905, |
| "step": 9610 |
| }, |
| { |
| "epoch": 1.0772676371780516, |
| "grad_norm": 10.48331356048584, |
| "learning_rate": 3.204740574841359e-05, |
| "loss": 1.9094, |
| "step": 9620 |
| }, |
| { |
| "epoch": 1.078387458006719, |
| "grad_norm": 5.042234420776367, |
| "learning_rate": 3.20287420679358e-05, |
| "loss": 2.2765, |
| "step": 9630 |
| }, |
| { |
| "epoch": 1.0795072788353863, |
| "grad_norm": 3.3927605152130127, |
| "learning_rate": 3.201007838745801e-05, |
| "loss": 1.9585, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.0806270996640537, |
| "grad_norm": 3.9636199474334717, |
| "learning_rate": 3.1991414706980215e-05, |
| "loss": 2.0755, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.0817469204927213, |
| "grad_norm": 3.8078179359436035, |
| "learning_rate": 3.197275102650243e-05, |
| "loss": 1.7727, |
| "step": 9660 |
| }, |
| { |
| "epoch": 1.0828667413213886, |
| "grad_norm": 11.449139595031738, |
| "learning_rate": 3.195408734602464e-05, |
| "loss": 2.0101, |
| "step": 9670 |
| }, |
| { |
| "epoch": 1.083986562150056, |
| "grad_norm": 13.973347663879395, |
| "learning_rate": 3.193542366554685e-05, |
| "loss": 2.2601, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.0851063829787233, |
| "grad_norm": 11.239791870117188, |
| "learning_rate": 3.1916759985069055e-05, |
| "loss": 2.1264, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.0862262038073909, |
| "grad_norm": 11.495058059692383, |
| "learning_rate": 3.189809630459127e-05, |
| "loss": 1.9445, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.0873460246360582, |
| "grad_norm": 4.135149002075195, |
| "learning_rate": 3.187943262411348e-05, |
| "loss": 1.7415, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.0884658454647256, |
| "grad_norm": 10.35810375213623, |
| "learning_rate": 3.186076894363569e-05, |
| "loss": 2.1196, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.0895856662933932, |
| "grad_norm": 3.5504679679870605, |
| "learning_rate": 3.1842105263157895e-05, |
| "loss": 2.1255, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.0907054871220605, |
| "grad_norm": 7.433374404907227, |
| "learning_rate": 3.182344158268011e-05, |
| "loss": 2.0092, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.0918253079507279, |
| "grad_norm": 7.075191974639893, |
| "learning_rate": 3.180477790220231e-05, |
| "loss": 1.935, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.0929451287793952, |
| "grad_norm": 5.563907623291016, |
| "learning_rate": 3.178611422172453e-05, |
| "loss": 2.0882, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.0940649496080628, |
| "grad_norm": 3.0820200443267822, |
| "learning_rate": 3.1767450541246735e-05, |
| "loss": 1.7695, |
| "step": 9770 |
| }, |
| { |
| "epoch": 1.0951847704367301, |
| "grad_norm": 5.051406383514404, |
| "learning_rate": 3.174878686076895e-05, |
| "loss": 2.3345, |
| "step": 9780 |
| }, |
| { |
| "epoch": 1.0963045912653975, |
| "grad_norm": 9.736443519592285, |
| "learning_rate": 3.173012318029115e-05, |
| "loss": 1.9699, |
| "step": 9790 |
| }, |
| { |
| "epoch": 1.097424412094065, |
| "grad_norm": 3.930483818054199, |
| "learning_rate": 3.171145949981337e-05, |
| "loss": 2.1495, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.0985442329227324, |
| "grad_norm": 9.857285499572754, |
| "learning_rate": 3.1692795819335575e-05, |
| "loss": 2.1356, |
| "step": 9810 |
| }, |
| { |
| "epoch": 1.0996640537513998, |
| "grad_norm": 3.3318631649017334, |
| "learning_rate": 3.167413213885778e-05, |
| "loss": 1.9198, |
| "step": 9820 |
| }, |
| { |
| "epoch": 1.100783874580067, |
| "grad_norm": 3.945836067199707, |
| "learning_rate": 3.165546845837999e-05, |
| "loss": 2.0346, |
| "step": 9830 |
| }, |
| { |
| "epoch": 1.1019036954087347, |
| "grad_norm": 9.270819664001465, |
| "learning_rate": 3.1636804777902204e-05, |
| "loss": 2.1785, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.103023516237402, |
| "grad_norm": 14.800010681152344, |
| "learning_rate": 3.1618141097424415e-05, |
| "loss": 1.9342, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.1041433370660694, |
| "grad_norm": 8.554313659667969, |
| "learning_rate": 3.159947741694662e-05, |
| "loss": 2.0811, |
| "step": 9860 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 3.686922073364258, |
| "learning_rate": 3.158081373646883e-05, |
| "loss": 2.1673, |
| "step": 9870 |
| }, |
| { |
| "epoch": 1.1063829787234043, |
| "grad_norm": 2.909205436706543, |
| "learning_rate": 3.1562150055991044e-05, |
| "loss": 2.0985, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.1075027995520716, |
| "grad_norm": 3.2634377479553223, |
| "learning_rate": 3.1543486375513255e-05, |
| "loss": 1.7348, |
| "step": 9890 |
| }, |
| { |
| "epoch": 1.108622620380739, |
| "grad_norm": 3.7922704219818115, |
| "learning_rate": 3.152482269503546e-05, |
| "loss": 2.0642, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.1097424412094066, |
| "grad_norm": 3.7772440910339355, |
| "learning_rate": 3.150615901455767e-05, |
| "loss": 1.6834, |
| "step": 9910 |
| }, |
| { |
| "epoch": 1.110862262038074, |
| "grad_norm": 6.348939895629883, |
| "learning_rate": 3.1487495334079884e-05, |
| "loss": 1.8862, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.1119820828667413, |
| "grad_norm": 4.5603790283203125, |
| "learning_rate": 3.1468831653602095e-05, |
| "loss": 2.0568, |
| "step": 9930 |
| }, |
| { |
| "epoch": 1.1131019036954086, |
| "grad_norm": 11.244080543518066, |
| "learning_rate": 3.14501679731243e-05, |
| "loss": 2.015, |
| "step": 9940 |
| }, |
| { |
| "epoch": 1.1142217245240762, |
| "grad_norm": 8.52851390838623, |
| "learning_rate": 3.143150429264651e-05, |
| "loss": 2.0168, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.1153415453527435, |
| "grad_norm": 3.2907376289367676, |
| "learning_rate": 3.1412840612168724e-05, |
| "loss": 2.0653, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.116461366181411, |
| "grad_norm": 3.665787696838379, |
| "learning_rate": 3.1394176931690935e-05, |
| "loss": 2.5296, |
| "step": 9970 |
| }, |
| { |
| "epoch": 1.1175811870100785, |
| "grad_norm": 3.52567982673645, |
| "learning_rate": 3.137551325121314e-05, |
| "loss": 2.0133, |
| "step": 9980 |
| }, |
| { |
| "epoch": 1.1187010078387458, |
| "grad_norm": 3.8598620891571045, |
| "learning_rate": 3.135684957073535e-05, |
| "loss": 2.0675, |
| "step": 9990 |
| }, |
| { |
| "epoch": 1.1198208286674132, |
| "grad_norm": 10.049873352050781, |
| "learning_rate": 3.133818589025756e-05, |
| "loss": 2.1145, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.1209406494960805, |
| "grad_norm": 3.7973287105560303, |
| "learning_rate": 3.1319522209779775e-05, |
| "loss": 2.3125, |
| "step": 10010 |
| }, |
| { |
| "epoch": 1.122060470324748, |
| "grad_norm": 9.45921516418457, |
| "learning_rate": 3.130085852930198e-05, |
| "loss": 2.2593, |
| "step": 10020 |
| }, |
| { |
| "epoch": 1.1231802911534154, |
| "grad_norm": 3.3235390186309814, |
| "learning_rate": 3.128219484882419e-05, |
| "loss": 2.0233, |
| "step": 10030 |
| }, |
| { |
| "epoch": 1.1243001119820828, |
| "grad_norm": 8.556841850280762, |
| "learning_rate": 3.12635311683464e-05, |
| "loss": 2.0527, |
| "step": 10040 |
| }, |
| { |
| "epoch": 1.1254199328107504, |
| "grad_norm": 3.7315330505371094, |
| "learning_rate": 3.124486748786861e-05, |
| "loss": 1.9582, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.1265397536394177, |
| "grad_norm": 9.111562728881836, |
| "learning_rate": 3.122620380739082e-05, |
| "loss": 1.931, |
| "step": 10060 |
| }, |
| { |
| "epoch": 1.127659574468085, |
| "grad_norm": 13.934300422668457, |
| "learning_rate": 3.1207540126913025e-05, |
| "loss": 2.2748, |
| "step": 10070 |
| }, |
| { |
| "epoch": 1.1287793952967524, |
| "grad_norm": 2.9079439640045166, |
| "learning_rate": 3.118887644643524e-05, |
| "loss": 2.0733, |
| "step": 10080 |
| }, |
| { |
| "epoch": 1.12989921612542, |
| "grad_norm": 14.349089622497559, |
| "learning_rate": 3.117021276595745e-05, |
| "loss": 1.9089, |
| "step": 10090 |
| }, |
| { |
| "epoch": 1.1310190369540873, |
| "grad_norm": 4.357903003692627, |
| "learning_rate": 3.115154908547966e-05, |
| "loss": 2.1913, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.1321388577827547, |
| "grad_norm": 18.6312255859375, |
| "learning_rate": 3.1132885405001865e-05, |
| "loss": 2.0145, |
| "step": 10110 |
| }, |
| { |
| "epoch": 1.1332586786114223, |
| "grad_norm": 10.29723834991455, |
| "learning_rate": 3.111422172452408e-05, |
| "loss": 2.1836, |
| "step": 10120 |
| }, |
| { |
| "epoch": 1.1343784994400896, |
| "grad_norm": 8.374372482299805, |
| "learning_rate": 3.109555804404629e-05, |
| "loss": 1.8299, |
| "step": 10130 |
| }, |
| { |
| "epoch": 1.135498320268757, |
| "grad_norm": 3.970510244369507, |
| "learning_rate": 3.10768943635685e-05, |
| "loss": 1.2505, |
| "step": 10140 |
| }, |
| { |
| "epoch": 1.1366181410974243, |
| "grad_norm": 2.9629228115081787, |
| "learning_rate": 3.1058230683090705e-05, |
| "loss": 1.8105, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.137737961926092, |
| "grad_norm": 11.693266868591309, |
| "learning_rate": 3.103956700261292e-05, |
| "loss": 2.1849, |
| "step": 10160 |
| }, |
| { |
| "epoch": 1.1388577827547592, |
| "grad_norm": 5.791164398193359, |
| "learning_rate": 3.102090332213513e-05, |
| "loss": 2.1892, |
| "step": 10170 |
| }, |
| { |
| "epoch": 1.1399776035834266, |
| "grad_norm": 13.902082443237305, |
| "learning_rate": 3.100223964165734e-05, |
| "loss": 2.0516, |
| "step": 10180 |
| }, |
| { |
| "epoch": 1.1410974244120942, |
| "grad_norm": 8.851689338684082, |
| "learning_rate": 3.0983575961179545e-05, |
| "loss": 1.8785, |
| "step": 10190 |
| }, |
| { |
| "epoch": 1.1422172452407615, |
| "grad_norm": 7.506970405578613, |
| "learning_rate": 3.096491228070176e-05, |
| "loss": 1.77, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.1433370660694289, |
| "grad_norm": 5.175302028656006, |
| "learning_rate": 3.094624860022396e-05, |
| "loss": 1.5816, |
| "step": 10210 |
| }, |
| { |
| "epoch": 1.1444568868980962, |
| "grad_norm": 11.070626258850098, |
| "learning_rate": 3.092758491974618e-05, |
| "loss": 2.2113, |
| "step": 10220 |
| }, |
| { |
| "epoch": 1.1455767077267638, |
| "grad_norm": 14.317008018493652, |
| "learning_rate": 3.0908921239268385e-05, |
| "loss": 2.4373, |
| "step": 10230 |
| }, |
| { |
| "epoch": 1.1466965285554311, |
| "grad_norm": 3.7413330078125, |
| "learning_rate": 3.08902575587906e-05, |
| "loss": 2.0543, |
| "step": 10240 |
| }, |
| { |
| "epoch": 1.1478163493840985, |
| "grad_norm": 8.698836326599121, |
| "learning_rate": 3.08715938783128e-05, |
| "loss": 1.8267, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.148936170212766, |
| "grad_norm": 9.122303009033203, |
| "learning_rate": 3.085293019783502e-05, |
| "loss": 2.1165, |
| "step": 10260 |
| }, |
| { |
| "epoch": 1.1500559910414334, |
| "grad_norm": 10.478148460388184, |
| "learning_rate": 3.0834266517357225e-05, |
| "loss": 1.7777, |
| "step": 10270 |
| }, |
| { |
| "epoch": 1.1511758118701008, |
| "grad_norm": 7.338038444519043, |
| "learning_rate": 3.081560283687943e-05, |
| "loss": 1.5512, |
| "step": 10280 |
| }, |
| { |
| "epoch": 1.1522956326987681, |
| "grad_norm": 5.104288578033447, |
| "learning_rate": 3.079693915640164e-05, |
| "loss": 1.5521, |
| "step": 10290 |
| }, |
| { |
| "epoch": 1.1534154535274357, |
| "grad_norm": 6.295915126800537, |
| "learning_rate": 3.0778275475923854e-05, |
| "loss": 1.8437, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.154535274356103, |
| "grad_norm": 3.8285670280456543, |
| "learning_rate": 3.0759611795446065e-05, |
| "loss": 2.3621, |
| "step": 10310 |
| }, |
| { |
| "epoch": 1.1556550951847704, |
| "grad_norm": 9.0399751663208, |
| "learning_rate": 3.074094811496827e-05, |
| "loss": 1.9887, |
| "step": 10320 |
| }, |
| { |
| "epoch": 1.156774916013438, |
| "grad_norm": 14.121618270874023, |
| "learning_rate": 3.072228443449048e-05, |
| "loss": 2.091, |
| "step": 10330 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 3.0184133052825928, |
| "learning_rate": 3.0703620754012694e-05, |
| "loss": 2.018, |
| "step": 10340 |
| }, |
| { |
| "epoch": 1.1590145576707727, |
| "grad_norm": 7.774500370025635, |
| "learning_rate": 3.0684957073534905e-05, |
| "loss": 1.6259, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.16013437849944, |
| "grad_norm": 3.3404550552368164, |
| "learning_rate": 3.066629339305711e-05, |
| "loss": 1.6987, |
| "step": 10360 |
| }, |
| { |
| "epoch": 1.1612541993281076, |
| "grad_norm": 5.787201881408691, |
| "learning_rate": 3.064762971257932e-05, |
| "loss": 1.7056, |
| "step": 10370 |
| }, |
| { |
| "epoch": 1.162374020156775, |
| "grad_norm": 4.292003631591797, |
| "learning_rate": 3.0628966032101534e-05, |
| "loss": 2.0171, |
| "step": 10380 |
| }, |
| { |
| "epoch": 1.1634938409854423, |
| "grad_norm": 8.185914993286133, |
| "learning_rate": 3.0610302351623745e-05, |
| "loss": 1.75, |
| "step": 10390 |
| }, |
| { |
| "epoch": 1.1646136618141099, |
| "grad_norm": 12.01564884185791, |
| "learning_rate": 3.059163867114595e-05, |
| "loss": 1.8298, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.1657334826427772, |
| "grad_norm": 4.348435878753662, |
| "learning_rate": 3.057297499066816e-05, |
| "loss": 1.829, |
| "step": 10410 |
| }, |
| { |
| "epoch": 1.1668533034714446, |
| "grad_norm": 12.32309341430664, |
| "learning_rate": 3.055431131019037e-05, |
| "loss": 1.6587, |
| "step": 10420 |
| }, |
| { |
| "epoch": 1.167973124300112, |
| "grad_norm": 8.916961669921875, |
| "learning_rate": 3.0535647629712585e-05, |
| "loss": 1.814, |
| "step": 10430 |
| }, |
| { |
| "epoch": 1.1690929451287795, |
| "grad_norm": 6.272485256195068, |
| "learning_rate": 3.051698394923479e-05, |
| "loss": 1.8092, |
| "step": 10440 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "grad_norm": 12.134109497070312, |
| "learning_rate": 3.0498320268757002e-05, |
| "loss": 2.1558, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.1713325867861142, |
| "grad_norm": 5.216768741607666, |
| "learning_rate": 3.047965658827921e-05, |
| "loss": 1.9535, |
| "step": 10460 |
| }, |
| { |
| "epoch": 1.1724524076147818, |
| "grad_norm": 9.889372825622559, |
| "learning_rate": 3.0460992907801422e-05, |
| "loss": 1.6845, |
| "step": 10470 |
| }, |
| { |
| "epoch": 1.173572228443449, |
| "grad_norm": 9.93710708618164, |
| "learning_rate": 3.044232922732363e-05, |
| "loss": 1.7271, |
| "step": 10480 |
| }, |
| { |
| "epoch": 1.1746920492721165, |
| "grad_norm": 11.036845207214355, |
| "learning_rate": 3.0423665546845842e-05, |
| "loss": 1.8613, |
| "step": 10490 |
| }, |
| { |
| "epoch": 1.1758118701007838, |
| "grad_norm": 4.051137924194336, |
| "learning_rate": 3.040500186636805e-05, |
| "loss": 1.6322, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1769316909294514, |
| "grad_norm": 12.119963645935059, |
| "learning_rate": 3.0386338185890255e-05, |
| "loss": 1.9307, |
| "step": 10510 |
| }, |
| { |
| "epoch": 1.1780515117581187, |
| "grad_norm": 3.4073593616485596, |
| "learning_rate": 3.036767450541247e-05, |
| "loss": 1.8893, |
| "step": 10520 |
| }, |
| { |
| "epoch": 1.179171332586786, |
| "grad_norm": 4.570372104644775, |
| "learning_rate": 3.0349010824934675e-05, |
| "loss": 2.0297, |
| "step": 10530 |
| }, |
| { |
| "epoch": 1.1802911534154534, |
| "grad_norm": 4.415748119354248, |
| "learning_rate": 3.033034714445689e-05, |
| "loss": 1.818, |
| "step": 10540 |
| }, |
| { |
| "epoch": 1.181410974244121, |
| "grad_norm": 4.645330429077148, |
| "learning_rate": 3.0311683463979095e-05, |
| "loss": 2.1483, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.1825307950727884, |
| "grad_norm": 5.267803192138672, |
| "learning_rate": 3.0293019783501307e-05, |
| "loss": 2.009, |
| "step": 10560 |
| }, |
| { |
| "epoch": 1.1836506159014557, |
| "grad_norm": 4.232731819152832, |
| "learning_rate": 3.0274356103023515e-05, |
| "loss": 2.0684, |
| "step": 10570 |
| }, |
| { |
| "epoch": 1.184770436730123, |
| "grad_norm": 4.118504047393799, |
| "learning_rate": 3.0255692422545727e-05, |
| "loss": 2.1593, |
| "step": 10580 |
| }, |
| { |
| "epoch": 1.1858902575587906, |
| "grad_norm": 3.479337692260742, |
| "learning_rate": 3.0237028742067935e-05, |
| "loss": 2.428, |
| "step": 10590 |
| }, |
| { |
| "epoch": 1.187010078387458, |
| "grad_norm": 9.307233810424805, |
| "learning_rate": 3.0218365061590147e-05, |
| "loss": 2.113, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.1881298992161253, |
| "grad_norm": 7.910929203033447, |
| "learning_rate": 3.0199701381112355e-05, |
| "loss": 1.8977, |
| "step": 10610 |
| }, |
| { |
| "epoch": 1.189249720044793, |
| "grad_norm": 14.38522720336914, |
| "learning_rate": 3.0181037700634567e-05, |
| "loss": 2.3208, |
| "step": 10620 |
| }, |
| { |
| "epoch": 1.1903695408734603, |
| "grad_norm": 7.7208147048950195, |
| "learning_rate": 3.0162374020156775e-05, |
| "loss": 1.9594, |
| "step": 10630 |
| }, |
| { |
| "epoch": 1.1914893617021276, |
| "grad_norm": 3.620098114013672, |
| "learning_rate": 3.0143710339678987e-05, |
| "loss": 1.999, |
| "step": 10640 |
| }, |
| { |
| "epoch": 1.192609182530795, |
| "grad_norm": 8.450474739074707, |
| "learning_rate": 3.0125046659201195e-05, |
| "loss": 2.0836, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.1937290033594625, |
| "grad_norm": 5.103601932525635, |
| "learning_rate": 3.0106382978723407e-05, |
| "loss": 2.0798, |
| "step": 10660 |
| }, |
| { |
| "epoch": 1.1948488241881299, |
| "grad_norm": 3.6850714683532715, |
| "learning_rate": 3.0087719298245615e-05, |
| "loss": 2.0787, |
| "step": 10670 |
| }, |
| { |
| "epoch": 1.1959686450167972, |
| "grad_norm": 10.51152229309082, |
| "learning_rate": 3.0069055617767827e-05, |
| "loss": 2.4453, |
| "step": 10680 |
| }, |
| { |
| "epoch": 1.1970884658454648, |
| "grad_norm": 9.488080978393555, |
| "learning_rate": 3.0050391937290035e-05, |
| "loss": 2.3139, |
| "step": 10690 |
| }, |
| { |
| "epoch": 1.1982082866741322, |
| "grad_norm": 4.7864274978637695, |
| "learning_rate": 3.0031728256812247e-05, |
| "loss": 1.9159, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.1993281075027995, |
| "grad_norm": 4.440032482147217, |
| "learning_rate": 3.0013064576334455e-05, |
| "loss": 2.0064, |
| "step": 10710 |
| }, |
| { |
| "epoch": 1.2004479283314669, |
| "grad_norm": 5.0448503494262695, |
| "learning_rate": 2.9994400895856667e-05, |
| "loss": 2.42, |
| "step": 10720 |
| }, |
| { |
| "epoch": 1.2015677491601344, |
| "grad_norm": 3.938079357147217, |
| "learning_rate": 2.9975737215378875e-05, |
| "loss": 1.8528, |
| "step": 10730 |
| }, |
| { |
| "epoch": 1.2026875699888018, |
| "grad_norm": 8.685888290405273, |
| "learning_rate": 2.9957073534901087e-05, |
| "loss": 1.8489, |
| "step": 10740 |
| }, |
| { |
| "epoch": 1.2038073908174691, |
| "grad_norm": 4.123725891113281, |
| "learning_rate": 2.9938409854423295e-05, |
| "loss": 2.4495, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.2049272116461367, |
| "grad_norm": 7.6613593101501465, |
| "learning_rate": 2.99197461739455e-05, |
| "loss": 1.8993, |
| "step": 10760 |
| }, |
| { |
| "epoch": 1.206047032474804, |
| "grad_norm": 4.251744747161865, |
| "learning_rate": 2.9901082493467715e-05, |
| "loss": 2.1757, |
| "step": 10770 |
| }, |
| { |
| "epoch": 1.2071668533034714, |
| "grad_norm": 8.871329307556152, |
| "learning_rate": 2.988241881298992e-05, |
| "loss": 1.6899, |
| "step": 10780 |
| }, |
| { |
| "epoch": 1.2082866741321387, |
| "grad_norm": 3.407541275024414, |
| "learning_rate": 2.9863755132512132e-05, |
| "loss": 2.0731, |
| "step": 10790 |
| }, |
| { |
| "epoch": 1.2094064949608063, |
| "grad_norm": 4.200164794921875, |
| "learning_rate": 2.984509145203434e-05, |
| "loss": 2.1467, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 5.967247009277344, |
| "learning_rate": 2.9826427771556552e-05, |
| "loss": 2.3341, |
| "step": 10810 |
| }, |
| { |
| "epoch": 1.211646136618141, |
| "grad_norm": 6.902993202209473, |
| "learning_rate": 2.980776409107876e-05, |
| "loss": 2.1097, |
| "step": 10820 |
| }, |
| { |
| "epoch": 1.2127659574468086, |
| "grad_norm": 3.4361205101013184, |
| "learning_rate": 2.9789100410600972e-05, |
| "loss": 2.2153, |
| "step": 10830 |
| }, |
| { |
| "epoch": 1.213885778275476, |
| "grad_norm": 3.588088274002075, |
| "learning_rate": 2.977043673012318e-05, |
| "loss": 2.4579, |
| "step": 10840 |
| }, |
| { |
| "epoch": 1.2150055991041433, |
| "grad_norm": 5.1023173332214355, |
| "learning_rate": 2.9751773049645392e-05, |
| "loss": 1.8074, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.2161254199328106, |
| "grad_norm": 5.618678092956543, |
| "learning_rate": 2.97331093691676e-05, |
| "loss": 1.7553, |
| "step": 10860 |
| }, |
| { |
| "epoch": 1.2172452407614782, |
| "grad_norm": 3.7290029525756836, |
| "learning_rate": 2.9714445688689812e-05, |
| "loss": 2.2483, |
| "step": 10870 |
| }, |
| { |
| "epoch": 1.2183650615901456, |
| "grad_norm": 9.374983787536621, |
| "learning_rate": 2.969578200821202e-05, |
| "loss": 2.295, |
| "step": 10880 |
| }, |
| { |
| "epoch": 1.219484882418813, |
| "grad_norm": 12.954818725585938, |
| "learning_rate": 2.9677118327734232e-05, |
| "loss": 2.1509, |
| "step": 10890 |
| }, |
| { |
| "epoch": 1.2206047032474805, |
| "grad_norm": 5.120643615722656, |
| "learning_rate": 2.965845464725644e-05, |
| "loss": 1.6961, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.2217245240761478, |
| "grad_norm": 6.945920944213867, |
| "learning_rate": 2.9639790966778652e-05, |
| "loss": 1.92, |
| "step": 10910 |
| }, |
| { |
| "epoch": 1.2228443449048152, |
| "grad_norm": 4.189951419830322, |
| "learning_rate": 2.962112728630086e-05, |
| "loss": 1.8421, |
| "step": 10920 |
| }, |
| { |
| "epoch": 1.2239641657334825, |
| "grad_norm": 13.6853666305542, |
| "learning_rate": 2.9602463605823072e-05, |
| "loss": 1.6631, |
| "step": 10930 |
| }, |
| { |
| "epoch": 1.2250839865621501, |
| "grad_norm": 13.50125789642334, |
| "learning_rate": 2.958379992534528e-05, |
| "loss": 1.9662, |
| "step": 10940 |
| }, |
| { |
| "epoch": 1.2262038073908175, |
| "grad_norm": 11.182577133178711, |
| "learning_rate": 2.9565136244867492e-05, |
| "loss": 2.474, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.2273236282194848, |
| "grad_norm": 8.855241775512695, |
| "learning_rate": 2.95464725643897e-05, |
| "loss": 1.9798, |
| "step": 10960 |
| }, |
| { |
| "epoch": 1.2284434490481524, |
| "grad_norm": 2.290292263031006, |
| "learning_rate": 2.9527808883911912e-05, |
| "loss": 1.7049, |
| "step": 10970 |
| }, |
| { |
| "epoch": 1.2295632698768197, |
| "grad_norm": 3.8447682857513428, |
| "learning_rate": 2.950914520343412e-05, |
| "loss": 2.0167, |
| "step": 10980 |
| }, |
| { |
| "epoch": 1.230683090705487, |
| "grad_norm": 3.326638698577881, |
| "learning_rate": 2.9490481522956325e-05, |
| "loss": 1.8316, |
| "step": 10990 |
| }, |
| { |
| "epoch": 1.2318029115341544, |
| "grad_norm": 9.62152099609375, |
| "learning_rate": 2.9471817842478537e-05, |
| "loss": 1.6587, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.232922732362822, |
| "grad_norm": 4.14316463470459, |
| "learning_rate": 2.9453154162000745e-05, |
| "loss": 1.8924, |
| "step": 11010 |
| }, |
| { |
| "epoch": 1.2340425531914894, |
| "grad_norm": 12.48459529876709, |
| "learning_rate": 2.9434490481522957e-05, |
| "loss": 1.8707, |
| "step": 11020 |
| }, |
| { |
| "epoch": 1.2351623740201567, |
| "grad_norm": 8.288812637329102, |
| "learning_rate": 2.9415826801045165e-05, |
| "loss": 1.8107, |
| "step": 11030 |
| }, |
| { |
| "epoch": 1.2362821948488243, |
| "grad_norm": 14.502120018005371, |
| "learning_rate": 2.9397163120567377e-05, |
| "loss": 2.3128, |
| "step": 11040 |
| }, |
| { |
| "epoch": 1.2374020156774916, |
| "grad_norm": 3.264012336730957, |
| "learning_rate": 2.9378499440089585e-05, |
| "loss": 2.1651, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.238521836506159, |
| "grad_norm": 7.62103271484375, |
| "learning_rate": 2.9359835759611797e-05, |
| "loss": 2.0078, |
| "step": 11060 |
| }, |
| { |
| "epoch": 1.2396416573348263, |
| "grad_norm": 14.445006370544434, |
| "learning_rate": 2.9341172079134005e-05, |
| "loss": 1.9942, |
| "step": 11070 |
| }, |
| { |
| "epoch": 1.240761478163494, |
| "grad_norm": 4.4992899894714355, |
| "learning_rate": 2.9322508398656217e-05, |
| "loss": 2.1584, |
| "step": 11080 |
| }, |
| { |
| "epoch": 1.2418812989921613, |
| "grad_norm": 7.469330310821533, |
| "learning_rate": 2.9303844718178425e-05, |
| "loss": 1.9318, |
| "step": 11090 |
| }, |
| { |
| "epoch": 1.2430011198208286, |
| "grad_norm": 4.640170574188232, |
| "learning_rate": 2.9285181037700637e-05, |
| "loss": 2.0114, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.2441209406494962, |
| "grad_norm": 5.856334686279297, |
| "learning_rate": 2.9266517357222845e-05, |
| "loss": 1.7062, |
| "step": 11110 |
| }, |
| { |
| "epoch": 1.2452407614781635, |
| "grad_norm": 5.0445404052734375, |
| "learning_rate": 2.9247853676745057e-05, |
| "loss": 1.8474, |
| "step": 11120 |
| }, |
| { |
| "epoch": 1.2463605823068309, |
| "grad_norm": 11.517007827758789, |
| "learning_rate": 2.9229189996267265e-05, |
| "loss": 2.4335, |
| "step": 11130 |
| }, |
| { |
| "epoch": 1.2474804031354982, |
| "grad_norm": 15.464090347290039, |
| "learning_rate": 2.9210526315789477e-05, |
| "loss": 2.2518, |
| "step": 11140 |
| }, |
| { |
| "epoch": 1.2486002239641658, |
| "grad_norm": 4.1234025955200195, |
| "learning_rate": 2.9191862635311685e-05, |
| "loss": 2.2545, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.2497200447928332, |
| "grad_norm": 12.045602798461914, |
| "learning_rate": 2.9173198954833897e-05, |
| "loss": 1.9101, |
| "step": 11160 |
| }, |
| { |
| "epoch": 1.2508398656215005, |
| "grad_norm": 9.400586128234863, |
| "learning_rate": 2.9154535274356105e-05, |
| "loss": 2.2002, |
| "step": 11170 |
| }, |
| { |
| "epoch": 1.251959686450168, |
| "grad_norm": 4.317978382110596, |
| "learning_rate": 2.9135871593878317e-05, |
| "loss": 2.008, |
| "step": 11180 |
| }, |
| { |
| "epoch": 1.2530795072788354, |
| "grad_norm": 3.613831043243408, |
| "learning_rate": 2.9117207913400525e-05, |
| "loss": 2.2759, |
| "step": 11190 |
| }, |
| { |
| "epoch": 1.2541993281075028, |
| "grad_norm": 9.903818130493164, |
| "learning_rate": 2.9098544232922737e-05, |
| "loss": 1.8002, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.2553191489361701, |
| "grad_norm": 4.823100566864014, |
| "learning_rate": 2.9079880552444942e-05, |
| "loss": 2.041, |
| "step": 11210 |
| }, |
| { |
| "epoch": 1.2564389697648375, |
| "grad_norm": 8.871933937072754, |
| "learning_rate": 2.906121687196715e-05, |
| "loss": 1.8554, |
| "step": 11220 |
| }, |
| { |
| "epoch": 1.257558790593505, |
| "grad_norm": 3.7882330417633057, |
| "learning_rate": 2.9042553191489362e-05, |
| "loss": 2.0572, |
| "step": 11230 |
| }, |
| { |
| "epoch": 1.2586786114221724, |
| "grad_norm": 3.4256579875946045, |
| "learning_rate": 2.902388951101157e-05, |
| "loss": 2.0062, |
| "step": 11240 |
| }, |
| { |
| "epoch": 1.25979843225084, |
| "grad_norm": 7.357487678527832, |
| "learning_rate": 2.9005225830533782e-05, |
| "loss": 2.0538, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.2609182530795073, |
| "grad_norm": 8.090987205505371, |
| "learning_rate": 2.898656215005599e-05, |
| "loss": 1.7325, |
| "step": 11260 |
| }, |
| { |
| "epoch": 1.2620380739081747, |
| "grad_norm": 7.4141669273376465, |
| "learning_rate": 2.8967898469578202e-05, |
| "loss": 2.2278, |
| "step": 11270 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 9.293551445007324, |
| "learning_rate": 2.894923478910041e-05, |
| "loss": 1.9437, |
| "step": 11280 |
| }, |
| { |
| "epoch": 1.2642777155655094, |
| "grad_norm": 7.823407173156738, |
| "learning_rate": 2.8930571108622622e-05, |
| "loss": 1.7969, |
| "step": 11290 |
| }, |
| { |
| "epoch": 1.265397536394177, |
| "grad_norm": 7.021416664123535, |
| "learning_rate": 2.891190742814483e-05, |
| "loss": 1.7032, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.2665173572228443, |
| "grad_norm": 5.103081703186035, |
| "learning_rate": 2.8893243747667042e-05, |
| "loss": 1.9367, |
| "step": 11310 |
| }, |
| { |
| "epoch": 1.2676371780515119, |
| "grad_norm": 11.817680358886719, |
| "learning_rate": 2.887458006718925e-05, |
| "loss": 1.9144, |
| "step": 11320 |
| }, |
| { |
| "epoch": 1.2687569988801792, |
| "grad_norm": 4.182484149932861, |
| "learning_rate": 2.8855916386711462e-05, |
| "loss": 2.2476, |
| "step": 11330 |
| }, |
| { |
| "epoch": 1.2698768197088466, |
| "grad_norm": 12.477089881896973, |
| "learning_rate": 2.883725270623367e-05, |
| "loss": 2.1192, |
| "step": 11340 |
| }, |
| { |
| "epoch": 1.270996640537514, |
| "grad_norm": 4.275122165679932, |
| "learning_rate": 2.8818589025755882e-05, |
| "loss": 2.4108, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.2721164613661813, |
| "grad_norm": 15.332164764404297, |
| "learning_rate": 2.879992534527809e-05, |
| "loss": 2.3805, |
| "step": 11360 |
| }, |
| { |
| "epoch": 1.2732362821948489, |
| "grad_norm": 10.845608711242676, |
| "learning_rate": 2.8781261664800302e-05, |
| "loss": 2.3275, |
| "step": 11370 |
| }, |
| { |
| "epoch": 1.2743561030235162, |
| "grad_norm": 8.792692184448242, |
| "learning_rate": 2.876259798432251e-05, |
| "loss": 1.6722, |
| "step": 11380 |
| }, |
| { |
| "epoch": 1.2754759238521838, |
| "grad_norm": 9.559557914733887, |
| "learning_rate": 2.8743934303844722e-05, |
| "loss": 2.2257, |
| "step": 11390 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 6.456275463104248, |
| "learning_rate": 2.872527062336693e-05, |
| "loss": 1.7045, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.2777155655095185, |
| "grad_norm": 8.79680347442627, |
| "learning_rate": 2.8706606942889142e-05, |
| "loss": 2.2707, |
| "step": 11410 |
| }, |
| { |
| "epoch": 1.2788353863381858, |
| "grad_norm": 4.077367782592773, |
| "learning_rate": 2.8687943262411347e-05, |
| "loss": 2.0724, |
| "step": 11420 |
| }, |
| { |
| "epoch": 1.2799552071668532, |
| "grad_norm": 15.875419616699219, |
| "learning_rate": 2.8669279581933562e-05, |
| "loss": 2.4802, |
| "step": 11430 |
| }, |
| { |
| "epoch": 1.2810750279955208, |
| "grad_norm": 9.360994338989258, |
| "learning_rate": 2.8650615901455767e-05, |
| "loss": 2.561, |
| "step": 11440 |
| }, |
| { |
| "epoch": 1.282194848824188, |
| "grad_norm": 3.356452226638794, |
| "learning_rate": 2.8631952220977975e-05, |
| "loss": 1.6649, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.2833146696528557, |
| "grad_norm": 4.318080425262451, |
| "learning_rate": 2.8613288540500187e-05, |
| "loss": 2.1682, |
| "step": 11460 |
| }, |
| { |
| "epoch": 1.284434490481523, |
| "grad_norm": 14.16100025177002, |
| "learning_rate": 2.8594624860022395e-05, |
| "loss": 2.245, |
| "step": 11470 |
| }, |
| { |
| "epoch": 1.2855543113101904, |
| "grad_norm": 4.342535495758057, |
| "learning_rate": 2.8575961179544607e-05, |
| "loss": 1.9388, |
| "step": 11480 |
| }, |
| { |
| "epoch": 1.2866741321388577, |
| "grad_norm": 4.287493705749512, |
| "learning_rate": 2.8557297499066815e-05, |
| "loss": 2.0392, |
| "step": 11490 |
| }, |
| { |
| "epoch": 1.287793952967525, |
| "grad_norm": 4.413599967956543, |
| "learning_rate": 2.8538633818589027e-05, |
| "loss": 1.7355, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.2889137737961927, |
| "grad_norm": 6.577482223510742, |
| "learning_rate": 2.8519970138111235e-05, |
| "loss": 2.0267, |
| "step": 11510 |
| }, |
| { |
| "epoch": 1.29003359462486, |
| "grad_norm": 16.563228607177734, |
| "learning_rate": 2.8501306457633447e-05, |
| "loss": 1.7356, |
| "step": 11520 |
| }, |
| { |
| "epoch": 1.2911534154535274, |
| "grad_norm": 4.8970255851745605, |
| "learning_rate": 2.8482642777155655e-05, |
| "loss": 2.0905, |
| "step": 11530 |
| }, |
| { |
| "epoch": 1.292273236282195, |
| "grad_norm": 3.6036787033081055, |
| "learning_rate": 2.8463979096677867e-05, |
| "loss": 1.8632, |
| "step": 11540 |
| }, |
| { |
| "epoch": 1.2933930571108623, |
| "grad_norm": 3.7850587368011475, |
| "learning_rate": 2.8445315416200075e-05, |
| "loss": 1.8984, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.2945128779395296, |
| "grad_norm": 3.808590888977051, |
| "learning_rate": 2.8426651735722287e-05, |
| "loss": 2.0321, |
| "step": 11560 |
| }, |
| { |
| "epoch": 1.295632698768197, |
| "grad_norm": 6.799190044403076, |
| "learning_rate": 2.8407988055244495e-05, |
| "loss": 2.3606, |
| "step": 11570 |
| }, |
| { |
| "epoch": 1.2967525195968646, |
| "grad_norm": 13.739738464355469, |
| "learning_rate": 2.8389324374766707e-05, |
| "loss": 2.4276, |
| "step": 11580 |
| }, |
| { |
| "epoch": 1.297872340425532, |
| "grad_norm": 10.142343521118164, |
| "learning_rate": 2.8370660694288915e-05, |
| "loss": 2.1635, |
| "step": 11590 |
| }, |
| { |
| "epoch": 1.2989921612541993, |
| "grad_norm": 7.874263286590576, |
| "learning_rate": 2.8351997013811127e-05, |
| "loss": 1.9213, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.3001119820828668, |
| "grad_norm": 9.147733688354492, |
| "learning_rate": 2.8333333333333335e-05, |
| "loss": 1.9669, |
| "step": 11610 |
| }, |
| { |
| "epoch": 1.3012318029115342, |
| "grad_norm": 4.748540878295898, |
| "learning_rate": 2.8314669652855547e-05, |
| "loss": 2.1498, |
| "step": 11620 |
| }, |
| { |
| "epoch": 1.3023516237402015, |
| "grad_norm": 3.711635112762451, |
| "learning_rate": 2.8296005972377755e-05, |
| "loss": 2.0037, |
| "step": 11630 |
| }, |
| { |
| "epoch": 1.3034714445688689, |
| "grad_norm": 9.331302642822266, |
| "learning_rate": 2.8277342291899967e-05, |
| "loss": 2.2264, |
| "step": 11640 |
| }, |
| { |
| "epoch": 1.3045912653975364, |
| "grad_norm": 3.66086483001709, |
| "learning_rate": 2.8258678611422172e-05, |
| "loss": 1.9577, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.3057110862262038, |
| "grad_norm": 3.9760518074035645, |
| "learning_rate": 2.8240014930944387e-05, |
| "loss": 2.4175, |
| "step": 11660 |
| }, |
| { |
| "epoch": 1.3068309070548711, |
| "grad_norm": 11.729776382446289, |
| "learning_rate": 2.8221351250466592e-05, |
| "loss": 1.8006, |
| "step": 11670 |
| }, |
| { |
| "epoch": 1.3079507278835387, |
| "grad_norm": 5.228203296661377, |
| "learning_rate": 2.82026875699888e-05, |
| "loss": 2.1344, |
| "step": 11680 |
| }, |
| { |
| "epoch": 1.309070548712206, |
| "grad_norm": 4.904013633728027, |
| "learning_rate": 2.8184023889511012e-05, |
| "loss": 1.3898, |
| "step": 11690 |
| }, |
| { |
| "epoch": 1.3101903695408734, |
| "grad_norm": 10.309314727783203, |
| "learning_rate": 2.816536020903322e-05, |
| "loss": 1.6145, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.3113101903695408, |
| "grad_norm": 6.570261478424072, |
| "learning_rate": 2.8146696528555432e-05, |
| "loss": 2.0292, |
| "step": 11710 |
| }, |
| { |
| "epoch": 1.3124300111982083, |
| "grad_norm": 4.200310707092285, |
| "learning_rate": 2.812803284807764e-05, |
| "loss": 2.086, |
| "step": 11720 |
| }, |
| { |
| "epoch": 1.3135498320268757, |
| "grad_norm": 10.7840576171875, |
| "learning_rate": 2.8109369167599852e-05, |
| "loss": 2.0459, |
| "step": 11730 |
| }, |
| { |
| "epoch": 1.314669652855543, |
| "grad_norm": 3.7457571029663086, |
| "learning_rate": 2.809070548712206e-05, |
| "loss": 1.6617, |
| "step": 11740 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 4.625324726104736, |
| "learning_rate": 2.8072041806644272e-05, |
| "loss": 1.5736, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.316909294512878, |
| "grad_norm": 9.565194129943848, |
| "learning_rate": 2.805337812616648e-05, |
| "loss": 2.4976, |
| "step": 11760 |
| }, |
| { |
| "epoch": 1.3180291153415453, |
| "grad_norm": 10.894997596740723, |
| "learning_rate": 2.8034714445688692e-05, |
| "loss": 1.9974, |
| "step": 11770 |
| }, |
| { |
| "epoch": 1.3191489361702127, |
| "grad_norm": 4.6816725730896, |
| "learning_rate": 2.80160507652109e-05, |
| "loss": 2.1758, |
| "step": 11780 |
| }, |
| { |
| "epoch": 1.3202687569988802, |
| "grad_norm": 3.4345543384552, |
| "learning_rate": 2.7997387084733112e-05, |
| "loss": 2.0768, |
| "step": 11790 |
| }, |
| { |
| "epoch": 1.3213885778275476, |
| "grad_norm": 11.61649227142334, |
| "learning_rate": 2.797872340425532e-05, |
| "loss": 2.135, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.322508398656215, |
| "grad_norm": 3.6992645263671875, |
| "learning_rate": 2.7960059723777532e-05, |
| "loss": 2.0889, |
| "step": 11810 |
| }, |
| { |
| "epoch": 1.3236282194848825, |
| "grad_norm": 3.465416193008423, |
| "learning_rate": 2.794139604329974e-05, |
| "loss": 2.0867, |
| "step": 11820 |
| }, |
| { |
| "epoch": 1.3247480403135499, |
| "grad_norm": 7.6769795417785645, |
| "learning_rate": 2.7922732362821952e-05, |
| "loss": 1.9802, |
| "step": 11830 |
| }, |
| { |
| "epoch": 1.3258678611422172, |
| "grad_norm": 4.010658264160156, |
| "learning_rate": 2.790406868234416e-05, |
| "loss": 2.2954, |
| "step": 11840 |
| }, |
| { |
| "epoch": 1.3269876819708846, |
| "grad_norm": 5.182217597961426, |
| "learning_rate": 2.7885405001866372e-05, |
| "loss": 2.3104, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.3281075027995521, |
| "grad_norm": 9.429098129272461, |
| "learning_rate": 2.7866741321388577e-05, |
| "loss": 1.7616, |
| "step": 11860 |
| }, |
| { |
| "epoch": 1.3292273236282195, |
| "grad_norm": 3.6495516300201416, |
| "learning_rate": 2.7848077640910792e-05, |
| "loss": 1.7191, |
| "step": 11870 |
| }, |
| { |
| "epoch": 1.3303471444568868, |
| "grad_norm": 3.693429470062256, |
| "learning_rate": 2.7829413960432997e-05, |
| "loss": 2.2813, |
| "step": 11880 |
| }, |
| { |
| "epoch": 1.3314669652855544, |
| "grad_norm": 3.9754602909088135, |
| "learning_rate": 2.7810750279955212e-05, |
| "loss": 1.9402, |
| "step": 11890 |
| }, |
| { |
| "epoch": 1.3325867861142218, |
| "grad_norm": 8.377337455749512, |
| "learning_rate": 2.7792086599477417e-05, |
| "loss": 2.0752, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.3337066069428891, |
| "grad_norm": 7.4295220375061035, |
| "learning_rate": 2.7773422918999625e-05, |
| "loss": 1.6284, |
| "step": 11910 |
| }, |
| { |
| "epoch": 1.3348264277715565, |
| "grad_norm": 3.935297966003418, |
| "learning_rate": 2.7754759238521837e-05, |
| "loss": 2.2115, |
| "step": 11920 |
| }, |
| { |
| "epoch": 1.335946248600224, |
| "grad_norm": 9.240707397460938, |
| "learning_rate": 2.7736095558044045e-05, |
| "loss": 1.8889, |
| "step": 11930 |
| }, |
| { |
| "epoch": 1.3370660694288914, |
| "grad_norm": 11.241867065429688, |
| "learning_rate": 2.7717431877566257e-05, |
| "loss": 1.7422, |
| "step": 11940 |
| }, |
| { |
| "epoch": 1.3381858902575587, |
| "grad_norm": 5.747684478759766, |
| "learning_rate": 2.7698768197088465e-05, |
| "loss": 1.8776, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.3393057110862263, |
| "grad_norm": 8.002111434936523, |
| "learning_rate": 2.7680104516610677e-05, |
| "loss": 2.0645, |
| "step": 11960 |
| }, |
| { |
| "epoch": 1.3404255319148937, |
| "grad_norm": 6.661399841308594, |
| "learning_rate": 2.7661440836132885e-05, |
| "loss": 1.914, |
| "step": 11970 |
| }, |
| { |
| "epoch": 1.341545352743561, |
| "grad_norm": 4.897961616516113, |
| "learning_rate": 2.7642777155655097e-05, |
| "loss": 2.0699, |
| "step": 11980 |
| }, |
| { |
| "epoch": 1.3426651735722284, |
| "grad_norm": 8.352503776550293, |
| "learning_rate": 2.7624113475177305e-05, |
| "loss": 2.3203, |
| "step": 11990 |
| }, |
| { |
| "epoch": 1.343784994400896, |
| "grad_norm": 5.313516616821289, |
| "learning_rate": 2.7605449794699517e-05, |
| "loss": 1.4091, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.3449048152295633, |
| "grad_norm": 8.284523010253906, |
| "learning_rate": 2.7586786114221725e-05, |
| "loss": 2.2698, |
| "step": 12010 |
| }, |
| { |
| "epoch": 1.3460246360582306, |
| "grad_norm": 4.834831237792969, |
| "learning_rate": 2.7568122433743937e-05, |
| "loss": 1.9009, |
| "step": 12020 |
| }, |
| { |
| "epoch": 1.3471444568868982, |
| "grad_norm": 14.059358596801758, |
| "learning_rate": 2.7549458753266145e-05, |
| "loss": 2.1775, |
| "step": 12030 |
| }, |
| { |
| "epoch": 1.3482642777155656, |
| "grad_norm": 8.378131866455078, |
| "learning_rate": 2.7530795072788357e-05, |
| "loss": 2.1616, |
| "step": 12040 |
| }, |
| { |
| "epoch": 1.349384098544233, |
| "grad_norm": 3.4480719566345215, |
| "learning_rate": 2.7512131392310565e-05, |
| "loss": 2.2743, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.3505039193729003, |
| "grad_norm": 4.051682472229004, |
| "learning_rate": 2.7493467711832777e-05, |
| "loss": 1.7351, |
| "step": 12060 |
| }, |
| { |
| "epoch": 1.3516237402015676, |
| "grad_norm": 15.0495023727417, |
| "learning_rate": 2.7474804031354982e-05, |
| "loss": 2.3378, |
| "step": 12070 |
| }, |
| { |
| "epoch": 1.3527435610302352, |
| "grad_norm": 12.420659065246582, |
| "learning_rate": 2.7456140350877197e-05, |
| "loss": 2.0799, |
| "step": 12080 |
| }, |
| { |
| "epoch": 1.3538633818589025, |
| "grad_norm": 3.576589345932007, |
| "learning_rate": 2.7437476670399402e-05, |
| "loss": 1.7943, |
| "step": 12090 |
| }, |
| { |
| "epoch": 1.35498320268757, |
| "grad_norm": 9.293567657470703, |
| "learning_rate": 2.7418812989921617e-05, |
| "loss": 2.0307, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.3561030235162375, |
| "grad_norm": 4.058133125305176, |
| "learning_rate": 2.7400149309443822e-05, |
| "loss": 1.6863, |
| "step": 12110 |
| }, |
| { |
| "epoch": 1.3572228443449048, |
| "grad_norm": 3.99945330619812, |
| "learning_rate": 2.7381485628966037e-05, |
| "loss": 2.2194, |
| "step": 12120 |
| }, |
| { |
| "epoch": 1.3583426651735722, |
| "grad_norm": 10.465315818786621, |
| "learning_rate": 2.7362821948488242e-05, |
| "loss": 1.7545, |
| "step": 12130 |
| }, |
| { |
| "epoch": 1.3594624860022395, |
| "grad_norm": 5.384920120239258, |
| "learning_rate": 2.734415826801045e-05, |
| "loss": 1.6758, |
| "step": 12140 |
| }, |
| { |
| "epoch": 1.360582306830907, |
| "grad_norm": 3.6617019176483154, |
| "learning_rate": 2.7325494587532662e-05, |
| "loss": 1.8267, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 5.872734069824219, |
| "learning_rate": 2.730683090705487e-05, |
| "loss": 1.7598, |
| "step": 12160 |
| }, |
| { |
| "epoch": 1.362821948488242, |
| "grad_norm": 3.7369675636291504, |
| "learning_rate": 2.7288167226577082e-05, |
| "loss": 1.8391, |
| "step": 12170 |
| }, |
| { |
| "epoch": 1.3639417693169094, |
| "grad_norm": 4.193478107452393, |
| "learning_rate": 2.726950354609929e-05, |
| "loss": 1.9595, |
| "step": 12180 |
| }, |
| { |
| "epoch": 1.3650615901455767, |
| "grad_norm": 11.2186918258667, |
| "learning_rate": 2.7250839865621502e-05, |
| "loss": 2.3907, |
| "step": 12190 |
| }, |
| { |
| "epoch": 1.366181410974244, |
| "grad_norm": 10.962636947631836, |
| "learning_rate": 2.723217618514371e-05, |
| "loss": 1.8998, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.3673012318029114, |
| "grad_norm": 6.624661922454834, |
| "learning_rate": 2.7213512504665922e-05, |
| "loss": 2.1747, |
| "step": 12210 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 3.9043078422546387, |
| "learning_rate": 2.719484882418813e-05, |
| "loss": 1.9947, |
| "step": 12220 |
| }, |
| { |
| "epoch": 1.3695408734602463, |
| "grad_norm": 13.550288200378418, |
| "learning_rate": 2.7176185143710342e-05, |
| "loss": 1.7701, |
| "step": 12230 |
| }, |
| { |
| "epoch": 1.370660694288914, |
| "grad_norm": 3.8633484840393066, |
| "learning_rate": 2.715752146323255e-05, |
| "loss": 1.701, |
| "step": 12240 |
| }, |
| { |
| "epoch": 1.3717805151175813, |
| "grad_norm": 15.98534870147705, |
| "learning_rate": 2.7138857782754762e-05, |
| "loss": 2.2531, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.3729003359462486, |
| "grad_norm": 3.9303388595581055, |
| "learning_rate": 2.712019410227697e-05, |
| "loss": 2.0902, |
| "step": 12260 |
| }, |
| { |
| "epoch": 1.374020156774916, |
| "grad_norm": 5.423203945159912, |
| "learning_rate": 2.7101530421799182e-05, |
| "loss": 2.3482, |
| "step": 12270 |
| }, |
| { |
| "epoch": 1.3751399776035833, |
| "grad_norm": 12.370367050170898, |
| "learning_rate": 2.708286674132139e-05, |
| "loss": 1.9292, |
| "step": 12280 |
| }, |
| { |
| "epoch": 1.3762597984322509, |
| "grad_norm": 5.228443622589111, |
| "learning_rate": 2.7064203060843602e-05, |
| "loss": 2.1979, |
| "step": 12290 |
| }, |
| { |
| "epoch": 1.3773796192609182, |
| "grad_norm": 5.5691423416137695, |
| "learning_rate": 2.7045539380365807e-05, |
| "loss": 2.3052, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.3784994400895858, |
| "grad_norm": 4.302522659301758, |
| "learning_rate": 2.7026875699888022e-05, |
| "loss": 2.2908, |
| "step": 12310 |
| }, |
| { |
| "epoch": 1.3796192609182532, |
| "grad_norm": 5.334700584411621, |
| "learning_rate": 2.7008212019410227e-05, |
| "loss": 1.704, |
| "step": 12320 |
| }, |
| { |
| "epoch": 1.3807390817469205, |
| "grad_norm": 6.528292655944824, |
| "learning_rate": 2.6989548338932442e-05, |
| "loss": 2.0202, |
| "step": 12330 |
| }, |
| { |
| "epoch": 1.3818589025755879, |
| "grad_norm": 8.879626274108887, |
| "learning_rate": 2.6970884658454647e-05, |
| "loss": 1.9973, |
| "step": 12340 |
| }, |
| { |
| "epoch": 1.3829787234042552, |
| "grad_norm": 12.133624076843262, |
| "learning_rate": 2.6952220977976862e-05, |
| "loss": 2.0525, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.3840985442329228, |
| "grad_norm": 3.8778038024902344, |
| "learning_rate": 2.6933557297499067e-05, |
| "loss": 2.7127, |
| "step": 12360 |
| }, |
| { |
| "epoch": 1.3852183650615901, |
| "grad_norm": 12.094010353088379, |
| "learning_rate": 2.6914893617021282e-05, |
| "loss": 2.2501, |
| "step": 12370 |
| }, |
| { |
| "epoch": 1.3863381858902575, |
| "grad_norm": 14.439510345458984, |
| "learning_rate": 2.6896229936543487e-05, |
| "loss": 2.4553, |
| "step": 12380 |
| }, |
| { |
| "epoch": 1.387458006718925, |
| "grad_norm": 13.809215545654297, |
| "learning_rate": 2.6877566256065695e-05, |
| "loss": 1.7811, |
| "step": 12390 |
| }, |
| { |
| "epoch": 1.3885778275475924, |
| "grad_norm": 3.6743390560150146, |
| "learning_rate": 2.6858902575587907e-05, |
| "loss": 1.5438, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.3896976483762598, |
| "grad_norm": 4.393309116363525, |
| "learning_rate": 2.6840238895110115e-05, |
| "loss": 1.4149, |
| "step": 12410 |
| }, |
| { |
| "epoch": 1.390817469204927, |
| "grad_norm": 8.418529510498047, |
| "learning_rate": 2.6821575214632327e-05, |
| "loss": 1.8214, |
| "step": 12420 |
| }, |
| { |
| "epoch": 1.3919372900335947, |
| "grad_norm": 11.139238357543945, |
| "learning_rate": 2.6802911534154535e-05, |
| "loss": 2.2626, |
| "step": 12430 |
| }, |
| { |
| "epoch": 1.393057110862262, |
| "grad_norm": 7.604578495025635, |
| "learning_rate": 2.6784247853676747e-05, |
| "loss": 2.5468, |
| "step": 12440 |
| }, |
| { |
| "epoch": 1.3941769316909294, |
| "grad_norm": 4.145791053771973, |
| "learning_rate": 2.6765584173198955e-05, |
| "loss": 1.6144, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.395296752519597, |
| "grad_norm": 6.010091781616211, |
| "learning_rate": 2.6746920492721167e-05, |
| "loss": 2.0923, |
| "step": 12460 |
| }, |
| { |
| "epoch": 1.3964165733482643, |
| "grad_norm": 10.133779525756836, |
| "learning_rate": 2.6728256812243375e-05, |
| "loss": 1.9483, |
| "step": 12470 |
| }, |
| { |
| "epoch": 1.3975363941769317, |
| "grad_norm": 3.6997570991516113, |
| "learning_rate": 2.6709593131765587e-05, |
| "loss": 1.701, |
| "step": 12480 |
| }, |
| { |
| "epoch": 1.398656215005599, |
| "grad_norm": 10.962636947631836, |
| "learning_rate": 2.6690929451287795e-05, |
| "loss": 2.384, |
| "step": 12490 |
| }, |
| { |
| "epoch": 1.3997760358342666, |
| "grad_norm": 3.6743199825286865, |
| "learning_rate": 2.6672265770810007e-05, |
| "loss": 1.9178, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.400895856662934, |
| "grad_norm": 12.958653450012207, |
| "learning_rate": 2.6653602090332212e-05, |
| "loss": 2.594, |
| "step": 12510 |
| }, |
| { |
| "epoch": 1.4020156774916013, |
| "grad_norm": 11.249685287475586, |
| "learning_rate": 2.6634938409854427e-05, |
| "loss": 1.6907, |
| "step": 12520 |
| }, |
| { |
| "epoch": 1.4031354983202688, |
| "grad_norm": 4.222360134124756, |
| "learning_rate": 2.6616274729376632e-05, |
| "loss": 2.1065, |
| "step": 12530 |
| }, |
| { |
| "epoch": 1.4042553191489362, |
| "grad_norm": 11.4049654006958, |
| "learning_rate": 2.6597611048898847e-05, |
| "loss": 2.0968, |
| "step": 12540 |
| }, |
| { |
| "epoch": 1.4053751399776035, |
| "grad_norm": 4.947079181671143, |
| "learning_rate": 2.6578947368421052e-05, |
| "loss": 1.942, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.406494960806271, |
| "grad_norm": 11.592204093933105, |
| "learning_rate": 2.6560283687943267e-05, |
| "loss": 1.9633, |
| "step": 12560 |
| }, |
| { |
| "epoch": 1.4076147816349385, |
| "grad_norm": 5.250865459442139, |
| "learning_rate": 2.6541620007465472e-05, |
| "loss": 1.9282, |
| "step": 12570 |
| }, |
| { |
| "epoch": 1.4087346024636058, |
| "grad_norm": 12.925929069519043, |
| "learning_rate": 2.6522956326987687e-05, |
| "loss": 2.3242, |
| "step": 12580 |
| }, |
| { |
| "epoch": 1.4098544232922732, |
| "grad_norm": 13.145225524902344, |
| "learning_rate": 2.6504292646509892e-05, |
| "loss": 2.0153, |
| "step": 12590 |
| }, |
| { |
| "epoch": 1.4109742441209407, |
| "grad_norm": 5.17841100692749, |
| "learning_rate": 2.6485628966032107e-05, |
| "loss": 1.7325, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.412094064949608, |
| "grad_norm": 4.1737213134765625, |
| "learning_rate": 2.6466965285554312e-05, |
| "loss": 1.8959, |
| "step": 12610 |
| }, |
| { |
| "epoch": 1.4132138857782754, |
| "grad_norm": 14.844003677368164, |
| "learning_rate": 2.644830160507652e-05, |
| "loss": 2.1662, |
| "step": 12620 |
| }, |
| { |
| "epoch": 1.4143337066069428, |
| "grad_norm": 3.8433620929718018, |
| "learning_rate": 2.6429637924598732e-05, |
| "loss": 1.7181, |
| "step": 12630 |
| }, |
| { |
| "epoch": 1.4154535274356104, |
| "grad_norm": 7.718703746795654, |
| "learning_rate": 2.641097424412094e-05, |
| "loss": 1.9043, |
| "step": 12640 |
| }, |
| { |
| "epoch": 1.4165733482642777, |
| "grad_norm": 9.38231086730957, |
| "learning_rate": 2.6392310563643152e-05, |
| "loss": 1.8313, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.417693169092945, |
| "grad_norm": 13.019353866577148, |
| "learning_rate": 2.637364688316536e-05, |
| "loss": 2.2505, |
| "step": 12660 |
| }, |
| { |
| "epoch": 1.4188129899216126, |
| "grad_norm": 14.78768253326416, |
| "learning_rate": 2.6354983202687572e-05, |
| "loss": 1.8465, |
| "step": 12670 |
| }, |
| { |
| "epoch": 1.41993281075028, |
| "grad_norm": 12.229498863220215, |
| "learning_rate": 2.633631952220978e-05, |
| "loss": 2.4691, |
| "step": 12680 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 4.5396294593811035, |
| "learning_rate": 2.6317655841731992e-05, |
| "loss": 2.0822, |
| "step": 12690 |
| }, |
| { |
| "epoch": 1.4221724524076147, |
| "grad_norm": 11.260706901550293, |
| "learning_rate": 2.62989921612542e-05, |
| "loss": 1.8299, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.4232922732362823, |
| "grad_norm": 7.562645435333252, |
| "learning_rate": 2.6280328480776412e-05, |
| "loss": 1.9251, |
| "step": 12710 |
| }, |
| { |
| "epoch": 1.4244120940649496, |
| "grad_norm": 12.989692687988281, |
| "learning_rate": 2.6261664800298617e-05, |
| "loss": 1.8278, |
| "step": 12720 |
| }, |
| { |
| "epoch": 1.425531914893617, |
| "grad_norm": 15.355886459350586, |
| "learning_rate": 2.6243001119820832e-05, |
| "loss": 2.2122, |
| "step": 12730 |
| }, |
| { |
| "epoch": 1.4266517357222845, |
| "grad_norm": 4.491844654083252, |
| "learning_rate": 2.6224337439343037e-05, |
| "loss": 1.8286, |
| "step": 12740 |
| }, |
| { |
| "epoch": 1.427771556550952, |
| "grad_norm": 11.244644165039062, |
| "learning_rate": 2.6205673758865252e-05, |
| "loss": 2.2198, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.4288913773796192, |
| "grad_norm": 4.543248176574707, |
| "learning_rate": 2.6187010078387457e-05, |
| "loss": 2.1489, |
| "step": 12760 |
| }, |
| { |
| "epoch": 1.4300111982082866, |
| "grad_norm": 5.585264205932617, |
| "learning_rate": 2.6168346397909672e-05, |
| "loss": 1.9485, |
| "step": 12770 |
| }, |
| { |
| "epoch": 1.4311310190369542, |
| "grad_norm": 16.626436233520508, |
| "learning_rate": 2.6149682717431877e-05, |
| "loss": 2.0467, |
| "step": 12780 |
| }, |
| { |
| "epoch": 1.4322508398656215, |
| "grad_norm": 5.619150161743164, |
| "learning_rate": 2.6131019036954092e-05, |
| "loss": 2.0983, |
| "step": 12790 |
| }, |
| { |
| "epoch": 1.4333706606942889, |
| "grad_norm": 3.807325839996338, |
| "learning_rate": 2.6112355356476297e-05, |
| "loss": 1.736, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.4344904815229564, |
| "grad_norm": 16.317922592163086, |
| "learning_rate": 2.6093691675998512e-05, |
| "loss": 2.26, |
| "step": 12810 |
| }, |
| { |
| "epoch": 1.4356103023516238, |
| "grad_norm": 4.438934326171875, |
| "learning_rate": 2.6075027995520717e-05, |
| "loss": 2.2066, |
| "step": 12820 |
| }, |
| { |
| "epoch": 1.4367301231802911, |
| "grad_norm": 15.27676010131836, |
| "learning_rate": 2.605636431504293e-05, |
| "loss": 2.0826, |
| "step": 12830 |
| }, |
| { |
| "epoch": 1.4378499440089585, |
| "grad_norm": 7.73093843460083, |
| "learning_rate": 2.6037700634565137e-05, |
| "loss": 1.9085, |
| "step": 12840 |
| }, |
| { |
| "epoch": 1.4389697648376258, |
| "grad_norm": 12.442554473876953, |
| "learning_rate": 2.6019036954087345e-05, |
| "loss": 2.1945, |
| "step": 12850 |
| }, |
| { |
| "epoch": 1.4400895856662934, |
| "grad_norm": 21.156641006469727, |
| "learning_rate": 2.6000373273609557e-05, |
| "loss": 2.3811, |
| "step": 12860 |
| }, |
| { |
| "epoch": 1.4412094064949608, |
| "grad_norm": 4.013643741607666, |
| "learning_rate": 2.5981709593131765e-05, |
| "loss": 2.0067, |
| "step": 12870 |
| }, |
| { |
| "epoch": 1.4423292273236283, |
| "grad_norm": 4.505249977111816, |
| "learning_rate": 2.5963045912653977e-05, |
| "loss": 2.1332, |
| "step": 12880 |
| }, |
| { |
| "epoch": 1.4434490481522957, |
| "grad_norm": 4.283412456512451, |
| "learning_rate": 2.5944382232176185e-05, |
| "loss": 2.2716, |
| "step": 12890 |
| }, |
| { |
| "epoch": 1.444568868980963, |
| "grad_norm": 9.626873016357422, |
| "learning_rate": 2.5925718551698397e-05, |
| "loss": 2.2156, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.4456886898096304, |
| "grad_norm": 10.732905387878418, |
| "learning_rate": 2.5907054871220605e-05, |
| "loss": 2.1807, |
| "step": 12910 |
| }, |
| { |
| "epoch": 1.4468085106382977, |
| "grad_norm": 7.605788707733154, |
| "learning_rate": 2.5888391190742817e-05, |
| "loss": 1.9077, |
| "step": 12920 |
| }, |
| { |
| "epoch": 1.4479283314669653, |
| "grad_norm": 3.839841604232788, |
| "learning_rate": 2.5869727510265025e-05, |
| "loss": 1.8521, |
| "step": 12930 |
| }, |
| { |
| "epoch": 1.4490481522956327, |
| "grad_norm": 3.6968777179718018, |
| "learning_rate": 2.5851063829787237e-05, |
| "loss": 1.7962, |
| "step": 12940 |
| }, |
| { |
| "epoch": 1.4501679731243002, |
| "grad_norm": 8.658880233764648, |
| "learning_rate": 2.5832400149309442e-05, |
| "loss": 1.8974, |
| "step": 12950 |
| }, |
| { |
| "epoch": 1.4512877939529676, |
| "grad_norm": 3.764810085296631, |
| "learning_rate": 2.5813736468831657e-05, |
| "loss": 2.2889, |
| "step": 12960 |
| }, |
| { |
| "epoch": 1.452407614781635, |
| "grad_norm": 7.589803218841553, |
| "learning_rate": 2.5795072788353862e-05, |
| "loss": 1.701, |
| "step": 12970 |
| }, |
| { |
| "epoch": 1.4535274356103023, |
| "grad_norm": 15.206584930419922, |
| "learning_rate": 2.5776409107876077e-05, |
| "loss": 2.196, |
| "step": 12980 |
| }, |
| { |
| "epoch": 1.4546472564389696, |
| "grad_norm": 13.450560569763184, |
| "learning_rate": 2.5757745427398282e-05, |
| "loss": 1.7454, |
| "step": 12990 |
| }, |
| { |
| "epoch": 1.4557670772676372, |
| "grad_norm": 9.42431926727295, |
| "learning_rate": 2.5739081746920497e-05, |
| "loss": 2.2713, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.4568868980963046, |
| "grad_norm": 6.517592906951904, |
| "learning_rate": 2.5720418066442702e-05, |
| "loss": 1.9129, |
| "step": 13010 |
| }, |
| { |
| "epoch": 1.4580067189249721, |
| "grad_norm": 3.8636746406555176, |
| "learning_rate": 2.5701754385964917e-05, |
| "loss": 1.8176, |
| "step": 13020 |
| }, |
| { |
| "epoch": 1.4591265397536395, |
| "grad_norm": 4.5231523513793945, |
| "learning_rate": 2.5683090705487122e-05, |
| "loss": 2.1806, |
| "step": 13030 |
| }, |
| { |
| "epoch": 1.4602463605823068, |
| "grad_norm": 4.154500484466553, |
| "learning_rate": 2.5664427025009334e-05, |
| "loss": 2.2852, |
| "step": 13040 |
| }, |
| { |
| "epoch": 1.4613661814109742, |
| "grad_norm": 4.697875022888184, |
| "learning_rate": 2.5645763344531542e-05, |
| "loss": 2.0672, |
| "step": 13050 |
| }, |
| { |
| "epoch": 1.4624860022396415, |
| "grad_norm": 5.5377702713012695, |
| "learning_rate": 2.5627099664053754e-05, |
| "loss": 1.9248, |
| "step": 13060 |
| }, |
| { |
| "epoch": 1.463605823068309, |
| "grad_norm": 3.9264962673187256, |
| "learning_rate": 2.5608435983575962e-05, |
| "loss": 2.226, |
| "step": 13070 |
| }, |
| { |
| "epoch": 1.4647256438969765, |
| "grad_norm": 4.375810146331787, |
| "learning_rate": 2.558977230309817e-05, |
| "loss": 1.8459, |
| "step": 13080 |
| }, |
| { |
| "epoch": 1.465845464725644, |
| "grad_norm": 9.115229606628418, |
| "learning_rate": 2.5571108622620382e-05, |
| "loss": 2.0442, |
| "step": 13090 |
| }, |
| { |
| "epoch": 1.4669652855543114, |
| "grad_norm": 11.976322174072266, |
| "learning_rate": 2.555244494214259e-05, |
| "loss": 1.6853, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.4680851063829787, |
| "grad_norm": 4.857337951660156, |
| "learning_rate": 2.5533781261664802e-05, |
| "loss": 2.2852, |
| "step": 13110 |
| }, |
| { |
| "epoch": 1.469204927211646, |
| "grad_norm": 15.375901222229004, |
| "learning_rate": 2.551511758118701e-05, |
| "loss": 2.0554, |
| "step": 13120 |
| }, |
| { |
| "epoch": 1.4703247480403134, |
| "grad_norm": 10.337723731994629, |
| "learning_rate": 2.5496453900709222e-05, |
| "loss": 2.0575, |
| "step": 13130 |
| }, |
| { |
| "epoch": 1.471444568868981, |
| "grad_norm": 9.989140510559082, |
| "learning_rate": 2.547779022023143e-05, |
| "loss": 2.2119, |
| "step": 13140 |
| }, |
| { |
| "epoch": 1.4725643896976484, |
| "grad_norm": 8.458128929138184, |
| "learning_rate": 2.5459126539753642e-05, |
| "loss": 2.3751, |
| "step": 13150 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 10.176783561706543, |
| "learning_rate": 2.5440462859275847e-05, |
| "loss": 1.8748, |
| "step": 13160 |
| }, |
| { |
| "epoch": 1.4748040313549833, |
| "grad_norm": 8.326006889343262, |
| "learning_rate": 2.5421799178798062e-05, |
| "loss": 2.2096, |
| "step": 13170 |
| }, |
| { |
| "epoch": 1.4759238521836506, |
| "grad_norm": 4.975625514984131, |
| "learning_rate": 2.5403135498320267e-05, |
| "loss": 2.3334, |
| "step": 13180 |
| }, |
| { |
| "epoch": 1.477043673012318, |
| "grad_norm": 3.7235939502716064, |
| "learning_rate": 2.5384471817842482e-05, |
| "loss": 1.8743, |
| "step": 13190 |
| }, |
| { |
| "epoch": 1.4781634938409853, |
| "grad_norm": 4.65376615524292, |
| "learning_rate": 2.5365808137364687e-05, |
| "loss": 1.8204, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.479283314669653, |
| "grad_norm": 5.088535308837891, |
| "learning_rate": 2.5347144456886902e-05, |
| "loss": 1.9513, |
| "step": 13210 |
| }, |
| { |
| "epoch": 1.4804031354983203, |
| "grad_norm": 13.776253700256348, |
| "learning_rate": 2.5328480776409107e-05, |
| "loss": 1.7841, |
| "step": 13220 |
| }, |
| { |
| "epoch": 1.4815229563269876, |
| "grad_norm": 13.486886024475098, |
| "learning_rate": 2.5309817095931322e-05, |
| "loss": 2.1629, |
| "step": 13230 |
| }, |
| { |
| "epoch": 1.4826427771556552, |
| "grad_norm": 3.3172719478607178, |
| "learning_rate": 2.5291153415453527e-05, |
| "loss": 1.7235, |
| "step": 13240 |
| }, |
| { |
| "epoch": 1.4837625979843225, |
| "grad_norm": 3.367083787918091, |
| "learning_rate": 2.5272489734975742e-05, |
| "loss": 2.4903, |
| "step": 13250 |
| }, |
| { |
| "epoch": 1.4848824188129899, |
| "grad_norm": 13.519879341125488, |
| "learning_rate": 2.5253826054497947e-05, |
| "loss": 1.8464, |
| "step": 13260 |
| }, |
| { |
| "epoch": 1.4860022396416572, |
| "grad_norm": 6.741097450256348, |
| "learning_rate": 2.523516237402016e-05, |
| "loss": 1.8458, |
| "step": 13270 |
| }, |
| { |
| "epoch": 1.4871220604703248, |
| "grad_norm": 2.4831910133361816, |
| "learning_rate": 2.5216498693542367e-05, |
| "loss": 1.9829, |
| "step": 13280 |
| }, |
| { |
| "epoch": 1.4882418812989922, |
| "grad_norm": 14.098200798034668, |
| "learning_rate": 2.519783501306458e-05, |
| "loss": 2.2151, |
| "step": 13290 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 9.473960876464844, |
| "learning_rate": 2.5179171332586787e-05, |
| "loss": 2.3018, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.490481522956327, |
| "grad_norm": 10.849329948425293, |
| "learning_rate": 2.5160507652108995e-05, |
| "loss": 2.3845, |
| "step": 13310 |
| }, |
| { |
| "epoch": 1.4916013437849944, |
| "grad_norm": 5.084878921508789, |
| "learning_rate": 2.5141843971631207e-05, |
| "loss": 1.9048, |
| "step": 13320 |
| }, |
| { |
| "epoch": 1.4927211646136618, |
| "grad_norm": 3.708717107772827, |
| "learning_rate": 2.5123180291153416e-05, |
| "loss": 2.0126, |
| "step": 13330 |
| }, |
| { |
| "epoch": 1.4938409854423291, |
| "grad_norm": 4.616156578063965, |
| "learning_rate": 2.5104516610675627e-05, |
| "loss": 2.2239, |
| "step": 13340 |
| }, |
| { |
| "epoch": 1.4949608062709967, |
| "grad_norm": 4.087489604949951, |
| "learning_rate": 2.5085852930197836e-05, |
| "loss": 2.3669, |
| "step": 13350 |
| }, |
| { |
| "epoch": 1.496080627099664, |
| "grad_norm": 5.101741790771484, |
| "learning_rate": 2.5067189249720047e-05, |
| "loss": 2.1289, |
| "step": 13360 |
| }, |
| { |
| "epoch": 1.4972004479283314, |
| "grad_norm": 4.274835109710693, |
| "learning_rate": 2.5048525569242252e-05, |
| "loss": 2.194, |
| "step": 13370 |
| }, |
| { |
| "epoch": 1.498320268756999, |
| "grad_norm": 8.17119026184082, |
| "learning_rate": 2.5029861888764467e-05, |
| "loss": 1.924, |
| "step": 13380 |
| }, |
| { |
| "epoch": 1.4994400895856663, |
| "grad_norm": 7.936726093292236, |
| "learning_rate": 2.5011198208286672e-05, |
| "loss": 2.0178, |
| "step": 13390 |
| }, |
| { |
| "epoch": 1.5005599104143337, |
| "grad_norm": 4.710428714752197, |
| "learning_rate": 2.4992534527808887e-05, |
| "loss": 2.1567, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.501679731243001, |
| "grad_norm": 17.4936580657959, |
| "learning_rate": 2.4973870847331096e-05, |
| "loss": 2.1754, |
| "step": 13410 |
| }, |
| { |
| "epoch": 1.5027995520716684, |
| "grad_norm": 3.8005337715148926, |
| "learning_rate": 2.4955207166853304e-05, |
| "loss": 1.3665, |
| "step": 13420 |
| }, |
| { |
| "epoch": 1.503919372900336, |
| "grad_norm": 5.918506145477295, |
| "learning_rate": 2.4936543486375512e-05, |
| "loss": 1.8023, |
| "step": 13430 |
| }, |
| { |
| "epoch": 1.5050391937290035, |
| "grad_norm": 3.53682541847229, |
| "learning_rate": 2.4917879805897724e-05, |
| "loss": 2.1256, |
| "step": 13440 |
| }, |
| { |
| "epoch": 1.5061590145576709, |
| "grad_norm": 7.532880783081055, |
| "learning_rate": 2.4899216125419932e-05, |
| "loss": 1.6287, |
| "step": 13450 |
| }, |
| { |
| "epoch": 1.5072788353863382, |
| "grad_norm": 5.1376800537109375, |
| "learning_rate": 2.4880552444942144e-05, |
| "loss": 2.2407, |
| "step": 13460 |
| }, |
| { |
| "epoch": 1.5083986562150056, |
| "grad_norm": 3.9931938648223877, |
| "learning_rate": 2.4861888764464352e-05, |
| "loss": 2.172, |
| "step": 13470 |
| }, |
| { |
| "epoch": 1.509518477043673, |
| "grad_norm": 6.361204147338867, |
| "learning_rate": 2.4843225083986564e-05, |
| "loss": 1.7437, |
| "step": 13480 |
| }, |
| { |
| "epoch": 1.5106382978723403, |
| "grad_norm": 7.249978542327881, |
| "learning_rate": 2.4824561403508772e-05, |
| "loss": 1.8938, |
| "step": 13490 |
| }, |
| { |
| "epoch": 1.5117581187010078, |
| "grad_norm": 1.6755268573760986, |
| "learning_rate": 2.4805897723030984e-05, |
| "loss": 1.9075, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.5128779395296752, |
| "grad_norm": 3.8444862365722656, |
| "learning_rate": 2.4787234042553192e-05, |
| "loss": 2.2699, |
| "step": 13510 |
| }, |
| { |
| "epoch": 1.5139977603583428, |
| "grad_norm": 5.975175380706787, |
| "learning_rate": 2.4768570362075404e-05, |
| "loss": 1.9212, |
| "step": 13520 |
| }, |
| { |
| "epoch": 1.5151175811870101, |
| "grad_norm": 8.798042297363281, |
| "learning_rate": 2.4749906681597612e-05, |
| "loss": 2.1584, |
| "step": 13530 |
| }, |
| { |
| "epoch": 1.5162374020156775, |
| "grad_norm": 12.09782600402832, |
| "learning_rate": 2.473124300111982e-05, |
| "loss": 2.3702, |
| "step": 13540 |
| }, |
| { |
| "epoch": 1.5173572228443448, |
| "grad_norm": 6.850448131561279, |
| "learning_rate": 2.4712579320642032e-05, |
| "loss": 1.9944, |
| "step": 13550 |
| }, |
| { |
| "epoch": 1.5184770436730122, |
| "grad_norm": 9.481196403503418, |
| "learning_rate": 2.469391564016424e-05, |
| "loss": 1.9876, |
| "step": 13560 |
| }, |
| { |
| "epoch": 1.5195968645016797, |
| "grad_norm": 3.648925304412842, |
| "learning_rate": 2.4675251959686452e-05, |
| "loss": 2.1083, |
| "step": 13570 |
| }, |
| { |
| "epoch": 1.520716685330347, |
| "grad_norm": 4.310310363769531, |
| "learning_rate": 2.465658827920866e-05, |
| "loss": 1.7789, |
| "step": 13580 |
| }, |
| { |
| "epoch": 1.5218365061590147, |
| "grad_norm": 6.024844646453857, |
| "learning_rate": 2.4637924598730872e-05, |
| "loss": 2.0254, |
| "step": 13590 |
| }, |
| { |
| "epoch": 1.522956326987682, |
| "grad_norm": 9.881074905395508, |
| "learning_rate": 2.461926091825308e-05, |
| "loss": 2.0021, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.5240761478163494, |
| "grad_norm": 17.45454216003418, |
| "learning_rate": 2.4600597237775292e-05, |
| "loss": 1.8438, |
| "step": 13610 |
| }, |
| { |
| "epoch": 1.5251959686450167, |
| "grad_norm": 11.487627029418945, |
| "learning_rate": 2.45819335572975e-05, |
| "loss": 2.1005, |
| "step": 13620 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 4.326568126678467, |
| "learning_rate": 2.4563269876819712e-05, |
| "loss": 2.3081, |
| "step": 13630 |
| }, |
| { |
| "epoch": 1.5274356103023516, |
| "grad_norm": 4.558961868286133, |
| "learning_rate": 2.454460619634192e-05, |
| "loss": 1.6342, |
| "step": 13640 |
| }, |
| { |
| "epoch": 1.528555431131019, |
| "grad_norm": 5.035207748413086, |
| "learning_rate": 2.4525942515864132e-05, |
| "loss": 1.785, |
| "step": 13650 |
| }, |
| { |
| "epoch": 1.5296752519596866, |
| "grad_norm": 13.78886604309082, |
| "learning_rate": 2.4507278835386337e-05, |
| "loss": 2.0897, |
| "step": 13660 |
| }, |
| { |
| "epoch": 1.530795072788354, |
| "grad_norm": 9.688088417053223, |
| "learning_rate": 2.448861515490855e-05, |
| "loss": 1.8179, |
| "step": 13670 |
| }, |
| { |
| "epoch": 1.5319148936170213, |
| "grad_norm": 5.47663688659668, |
| "learning_rate": 2.4469951474430757e-05, |
| "loss": 2.2184, |
| "step": 13680 |
| }, |
| { |
| "epoch": 1.5330347144456886, |
| "grad_norm": 16.983068466186523, |
| "learning_rate": 2.445128779395297e-05, |
| "loss": 2.1049, |
| "step": 13690 |
| }, |
| { |
| "epoch": 1.534154535274356, |
| "grad_norm": 4.000819683074951, |
| "learning_rate": 2.4432624113475177e-05, |
| "loss": 1.8282, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.5352743561030235, |
| "grad_norm": 8.065638542175293, |
| "learning_rate": 2.441396043299739e-05, |
| "loss": 1.8497, |
| "step": 13710 |
| }, |
| { |
| "epoch": 1.536394176931691, |
| "grad_norm": 7.793942451477051, |
| "learning_rate": 2.4395296752519597e-05, |
| "loss": 1.6021, |
| "step": 13720 |
| }, |
| { |
| "epoch": 1.5375139977603585, |
| "grad_norm": 4.013867378234863, |
| "learning_rate": 2.437663307204181e-05, |
| "loss": 1.585, |
| "step": 13730 |
| }, |
| { |
| "epoch": 1.5386338185890258, |
| "grad_norm": 5.563870429992676, |
| "learning_rate": 2.4357969391564017e-05, |
| "loss": 1.7468, |
| "step": 13740 |
| }, |
| { |
| "epoch": 1.5397536394176932, |
| "grad_norm": 3.9095380306243896, |
| "learning_rate": 2.4339305711086226e-05, |
| "loss": 1.8195, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.5408734602463605, |
| "grad_norm": 3.52274489402771, |
| "learning_rate": 2.4320642030608437e-05, |
| "loss": 2.2316, |
| "step": 13760 |
| }, |
| { |
| "epoch": 1.5419932810750279, |
| "grad_norm": 8.85901927947998, |
| "learning_rate": 2.4301978350130646e-05, |
| "loss": 1.933, |
| "step": 13770 |
| }, |
| { |
| "epoch": 1.5431131019036954, |
| "grad_norm": 6.702213764190674, |
| "learning_rate": 2.4283314669652857e-05, |
| "loss": 1.9826, |
| "step": 13780 |
| }, |
| { |
| "epoch": 1.5442329227323628, |
| "grad_norm": 16.482467651367188, |
| "learning_rate": 2.4264650989175066e-05, |
| "loss": 1.6521, |
| "step": 13790 |
| }, |
| { |
| "epoch": 1.5453527435610304, |
| "grad_norm": 9.306038856506348, |
| "learning_rate": 2.4245987308697277e-05, |
| "loss": 1.6685, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.5464725643896977, |
| "grad_norm": 15.01627254486084, |
| "learning_rate": 2.4227323628219486e-05, |
| "loss": 1.9639, |
| "step": 13810 |
| }, |
| { |
| "epoch": 1.547592385218365, |
| "grad_norm": 6.116465091705322, |
| "learning_rate": 2.4208659947741697e-05, |
| "loss": 1.8788, |
| "step": 13820 |
| }, |
| { |
| "epoch": 1.5487122060470324, |
| "grad_norm": 8.37788200378418, |
| "learning_rate": 2.4189996267263906e-05, |
| "loss": 2.2028, |
| "step": 13830 |
| }, |
| { |
| "epoch": 1.5498320268756998, |
| "grad_norm": 11.314355850219727, |
| "learning_rate": 2.4171332586786117e-05, |
| "loss": 2.0903, |
| "step": 13840 |
| }, |
| { |
| "epoch": 1.5509518477043673, |
| "grad_norm": 4.621204853057861, |
| "learning_rate": 2.4152668906308326e-05, |
| "loss": 1.9054, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.5520716685330347, |
| "grad_norm": 5.006560325622559, |
| "learning_rate": 2.4134005225830537e-05, |
| "loss": 1.8487, |
| "step": 13860 |
| }, |
| { |
| "epoch": 1.5531914893617023, |
| "grad_norm": 6.626319885253906, |
| "learning_rate": 2.4115341545352746e-05, |
| "loss": 1.9409, |
| "step": 13870 |
| }, |
| { |
| "epoch": 1.5543113101903696, |
| "grad_norm": 2.9766428470611572, |
| "learning_rate": 2.4096677864874957e-05, |
| "loss": 1.5523, |
| "step": 13880 |
| }, |
| { |
| "epoch": 1.555431131019037, |
| "grad_norm": 15.116016387939453, |
| "learning_rate": 2.4078014184397162e-05, |
| "loss": 2.1559, |
| "step": 13890 |
| }, |
| { |
| "epoch": 1.5565509518477043, |
| "grad_norm": 4.559194564819336, |
| "learning_rate": 2.4059350503919374e-05, |
| "loss": 1.7988, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.5576707726763717, |
| "grad_norm": 8.903999328613281, |
| "learning_rate": 2.4040686823441582e-05, |
| "loss": 2.2537, |
| "step": 13910 |
| }, |
| { |
| "epoch": 1.5587905935050392, |
| "grad_norm": 17.303340911865234, |
| "learning_rate": 2.4022023142963794e-05, |
| "loss": 1.9898, |
| "step": 13920 |
| }, |
| { |
| "epoch": 1.5599104143337066, |
| "grad_norm": 5.961864948272705, |
| "learning_rate": 2.4003359462486002e-05, |
| "loss": 2.0574, |
| "step": 13930 |
| }, |
| { |
| "epoch": 1.5610302351623742, |
| "grad_norm": 14.988414764404297, |
| "learning_rate": 2.3984695782008214e-05, |
| "loss": 2.1711, |
| "step": 13940 |
| }, |
| { |
| "epoch": 1.5621500559910415, |
| "grad_norm": 4.600130081176758, |
| "learning_rate": 2.3966032101530422e-05, |
| "loss": 2.644, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.5632698768197089, |
| "grad_norm": 4.028290271759033, |
| "learning_rate": 2.394736842105263e-05, |
| "loss": 2.007, |
| "step": 13960 |
| }, |
| { |
| "epoch": 1.5643896976483762, |
| "grad_norm": 16.717845916748047, |
| "learning_rate": 2.3928704740574842e-05, |
| "loss": 2.1179, |
| "step": 13970 |
| }, |
| { |
| "epoch": 1.5655095184770436, |
| "grad_norm": 13.442608833312988, |
| "learning_rate": 2.391004106009705e-05, |
| "loss": 2.061, |
| "step": 13980 |
| }, |
| { |
| "epoch": 1.5666293393057111, |
| "grad_norm": 4.753323078155518, |
| "learning_rate": 2.3891377379619262e-05, |
| "loss": 2.1708, |
| "step": 13990 |
| }, |
| { |
| "epoch": 1.5677491601343785, |
| "grad_norm": 4.6569600105285645, |
| "learning_rate": 2.387271369914147e-05, |
| "loss": 1.9684, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.568868980963046, |
| "grad_norm": 3.8971803188323975, |
| "learning_rate": 2.3854050018663682e-05, |
| "loss": 2.4244, |
| "step": 14010 |
| }, |
| { |
| "epoch": 1.5699888017917134, |
| "grad_norm": 4.851494789123535, |
| "learning_rate": 2.383538633818589e-05, |
| "loss": 2.0636, |
| "step": 14020 |
| }, |
| { |
| "epoch": 1.5711086226203808, |
| "grad_norm": 3.54333758354187, |
| "learning_rate": 2.3816722657708102e-05, |
| "loss": 1.9378, |
| "step": 14030 |
| }, |
| { |
| "epoch": 1.572228443449048, |
| "grad_norm": 6.895486831665039, |
| "learning_rate": 2.379805897723031e-05, |
| "loss": 1.7275, |
| "step": 14040 |
| }, |
| { |
| "epoch": 1.5733482642777155, |
| "grad_norm": 4.1524739265441895, |
| "learning_rate": 2.3779395296752522e-05, |
| "loss": 2.2593, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.574468085106383, |
| "grad_norm": 4.263177394866943, |
| "learning_rate": 2.376073161627473e-05, |
| "loss": 1.8631, |
| "step": 14060 |
| }, |
| { |
| "epoch": 1.5755879059350504, |
| "grad_norm": 14.865904808044434, |
| "learning_rate": 2.3742067935796942e-05, |
| "loss": 1.6639, |
| "step": 14070 |
| }, |
| { |
| "epoch": 1.576707726763718, |
| "grad_norm": 4.426602840423584, |
| "learning_rate": 2.372340425531915e-05, |
| "loss": 1.987, |
| "step": 14080 |
| }, |
| { |
| "epoch": 1.5778275475923853, |
| "grad_norm": 4.001722812652588, |
| "learning_rate": 2.3704740574841362e-05, |
| "loss": 2.0681, |
| "step": 14090 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 3.0097544193267822, |
| "learning_rate": 2.368607689436357e-05, |
| "loss": 2.3647, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.58006718924972, |
| "grad_norm": 3.5997202396392822, |
| "learning_rate": 2.3667413213885782e-05, |
| "loss": 2.1803, |
| "step": 14110 |
| }, |
| { |
| "epoch": 1.5811870100783874, |
| "grad_norm": 4.4173784255981445, |
| "learning_rate": 2.3648749533407987e-05, |
| "loss": 1.9483, |
| "step": 14120 |
| }, |
| { |
| "epoch": 1.5823068309070547, |
| "grad_norm": 10.679607391357422, |
| "learning_rate": 2.36300858529302e-05, |
| "loss": 2.2298, |
| "step": 14130 |
| }, |
| { |
| "epoch": 1.5834266517357223, |
| "grad_norm": 11.639561653137207, |
| "learning_rate": 2.3611422172452407e-05, |
| "loss": 1.6313, |
| "step": 14140 |
| }, |
| { |
| "epoch": 1.5845464725643899, |
| "grad_norm": 7.233905792236328, |
| "learning_rate": 2.359275849197462e-05, |
| "loss": 2.2515, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.5856662933930572, |
| "grad_norm": 9.425190925598145, |
| "learning_rate": 2.3574094811496827e-05, |
| "loss": 1.7644, |
| "step": 14160 |
| }, |
| { |
| "epoch": 1.5867861142217246, |
| "grad_norm": 13.162445068359375, |
| "learning_rate": 2.355543113101904e-05, |
| "loss": 1.937, |
| "step": 14170 |
| }, |
| { |
| "epoch": 1.587905935050392, |
| "grad_norm": 5.631926536560059, |
| "learning_rate": 2.3536767450541247e-05, |
| "loss": 1.7986, |
| "step": 14180 |
| }, |
| { |
| "epoch": 1.5890257558790593, |
| "grad_norm": 14.373632431030273, |
| "learning_rate": 2.3518103770063456e-05, |
| "loss": 1.9261, |
| "step": 14190 |
| }, |
| { |
| "epoch": 1.5901455767077266, |
| "grad_norm": 10.449909210205078, |
| "learning_rate": 2.3499440089585667e-05, |
| "loss": 2.1721, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.5912653975363942, |
| "grad_norm": 3.89623761177063, |
| "learning_rate": 2.3480776409107876e-05, |
| "loss": 2.0745, |
| "step": 14210 |
| }, |
| { |
| "epoch": 1.5923852183650617, |
| "grad_norm": 4.33968448638916, |
| "learning_rate": 2.3462112728630087e-05, |
| "loss": 1.9008, |
| "step": 14220 |
| }, |
| { |
| "epoch": 1.593505039193729, |
| "grad_norm": 9.331836700439453, |
| "learning_rate": 2.3443449048152296e-05, |
| "loss": 2.0979, |
| "step": 14230 |
| }, |
| { |
| "epoch": 1.5946248600223965, |
| "grad_norm": 10.781360626220703, |
| "learning_rate": 2.3424785367674507e-05, |
| "loss": 2.2364, |
| "step": 14240 |
| }, |
| { |
| "epoch": 1.5957446808510638, |
| "grad_norm": 5.107909679412842, |
| "learning_rate": 2.3406121687196716e-05, |
| "loss": 2.0156, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.5968645016797312, |
| "grad_norm": 3.5812559127807617, |
| "learning_rate": 2.3387458006718927e-05, |
| "loss": 2.2581, |
| "step": 14260 |
| }, |
| { |
| "epoch": 1.5979843225083985, |
| "grad_norm": 13.384634017944336, |
| "learning_rate": 2.3368794326241136e-05, |
| "loss": 2.1169, |
| "step": 14270 |
| }, |
| { |
| "epoch": 1.599104143337066, |
| "grad_norm": 5.330173969268799, |
| "learning_rate": 2.3350130645763347e-05, |
| "loss": 1.7373, |
| "step": 14280 |
| }, |
| { |
| "epoch": 1.6002239641657336, |
| "grad_norm": 7.670846462249756, |
| "learning_rate": 2.3331466965285556e-05, |
| "loss": 1.9934, |
| "step": 14290 |
| }, |
| { |
| "epoch": 1.601343784994401, |
| "grad_norm": 4.610497951507568, |
| "learning_rate": 2.3312803284807767e-05, |
| "loss": 1.5182, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.6024636058230683, |
| "grad_norm": 7.437980651855469, |
| "learning_rate": 2.3294139604329976e-05, |
| "loss": 1.7472, |
| "step": 14310 |
| }, |
| { |
| "epoch": 1.6035834266517357, |
| "grad_norm": 6.804001808166504, |
| "learning_rate": 2.3275475923852187e-05, |
| "loss": 1.5694, |
| "step": 14320 |
| }, |
| { |
| "epoch": 1.604703247480403, |
| "grad_norm": 11.887704849243164, |
| "learning_rate": 2.3256812243374396e-05, |
| "loss": 1.7139, |
| "step": 14330 |
| }, |
| { |
| "epoch": 1.6058230683090704, |
| "grad_norm": 3.467298746109009, |
| "learning_rate": 2.3238148562896604e-05, |
| "loss": 2.1763, |
| "step": 14340 |
| }, |
| { |
| "epoch": 1.606942889137738, |
| "grad_norm": 8.796690940856934, |
| "learning_rate": 2.3219484882418816e-05, |
| "loss": 1.9841, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.6080627099664053, |
| "grad_norm": 4.704202651977539, |
| "learning_rate": 2.3200821201941024e-05, |
| "loss": 2.0089, |
| "step": 14360 |
| }, |
| { |
| "epoch": 1.609182530795073, |
| "grad_norm": 7.0840163230896, |
| "learning_rate": 2.3182157521463232e-05, |
| "loss": 1.9829, |
| "step": 14370 |
| }, |
| { |
| "epoch": 1.6103023516237402, |
| "grad_norm": 4.396951198577881, |
| "learning_rate": 2.3163493840985444e-05, |
| "loss": 1.8063, |
| "step": 14380 |
| }, |
| { |
| "epoch": 1.6114221724524076, |
| "grad_norm": 15.083405494689941, |
| "learning_rate": 2.3144830160507652e-05, |
| "loss": 1.6894, |
| "step": 14390 |
| }, |
| { |
| "epoch": 1.612541993281075, |
| "grad_norm": 4.376276969909668, |
| "learning_rate": 2.312616648002986e-05, |
| "loss": 1.8177, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.6136618141097423, |
| "grad_norm": 5.626823902130127, |
| "learning_rate": 2.3107502799552072e-05, |
| "loss": 2.002, |
| "step": 14410 |
| }, |
| { |
| "epoch": 1.6147816349384099, |
| "grad_norm": 6.14142370223999, |
| "learning_rate": 2.308883911907428e-05, |
| "loss": 1.798, |
| "step": 14420 |
| }, |
| { |
| "epoch": 1.6159014557670772, |
| "grad_norm": 4.125568866729736, |
| "learning_rate": 2.3070175438596492e-05, |
| "loss": 1.9081, |
| "step": 14430 |
| }, |
| { |
| "epoch": 1.6170212765957448, |
| "grad_norm": 7.834494590759277, |
| "learning_rate": 2.30515117581187e-05, |
| "loss": 2.014, |
| "step": 14440 |
| }, |
| { |
| "epoch": 1.6181410974244121, |
| "grad_norm": 14.719797134399414, |
| "learning_rate": 2.3032848077640912e-05, |
| "loss": 2.1659, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.6192609182530795, |
| "grad_norm": 10.502878189086914, |
| "learning_rate": 2.301418439716312e-05, |
| "loss": 2.0006, |
| "step": 14460 |
| }, |
| { |
| "epoch": 1.6203807390817468, |
| "grad_norm": 4.505667686462402, |
| "learning_rate": 2.2995520716685332e-05, |
| "loss": 2.0919, |
| "step": 14470 |
| }, |
| { |
| "epoch": 1.6215005599104142, |
| "grad_norm": 8.213534355163574, |
| "learning_rate": 2.297685703620754e-05, |
| "loss": 1.8744, |
| "step": 14480 |
| }, |
| { |
| "epoch": 1.6226203807390818, |
| "grad_norm": 3.4308199882507324, |
| "learning_rate": 2.2958193355729752e-05, |
| "loss": 2.0321, |
| "step": 14490 |
| }, |
| { |
| "epoch": 1.6237402015677491, |
| "grad_norm": 8.835275650024414, |
| "learning_rate": 2.293952967525196e-05, |
| "loss": 2.0342, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.6248600223964167, |
| "grad_norm": 13.789974212646484, |
| "learning_rate": 2.2920865994774172e-05, |
| "loss": 2.1533, |
| "step": 14510 |
| }, |
| { |
| "epoch": 1.625979843225084, |
| "grad_norm": 4.554170608520508, |
| "learning_rate": 2.290220231429638e-05, |
| "loss": 1.8698, |
| "step": 14520 |
| }, |
| { |
| "epoch": 1.6270996640537514, |
| "grad_norm": 4.612897872924805, |
| "learning_rate": 2.2883538633818592e-05, |
| "loss": 1.8866, |
| "step": 14530 |
| }, |
| { |
| "epoch": 1.6282194848824187, |
| "grad_norm": 3.9619531631469727, |
| "learning_rate": 2.28648749533408e-05, |
| "loss": 1.795, |
| "step": 14540 |
| }, |
| { |
| "epoch": 1.629339305711086, |
| "grad_norm": 13.929571151733398, |
| "learning_rate": 2.2846211272863012e-05, |
| "loss": 1.7292, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.6304591265397537, |
| "grad_norm": 3.8777716159820557, |
| "learning_rate": 2.282754759238522e-05, |
| "loss": 2.1625, |
| "step": 14560 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 10.221095085144043, |
| "learning_rate": 2.280888391190743e-05, |
| "loss": 1.9096, |
| "step": 14570 |
| }, |
| { |
| "epoch": 1.6326987681970886, |
| "grad_norm": 11.300918579101562, |
| "learning_rate": 2.279022023142964e-05, |
| "loss": 2.0733, |
| "step": 14580 |
| }, |
| { |
| "epoch": 1.633818589025756, |
| "grad_norm": 3.6058292388916016, |
| "learning_rate": 2.277155655095185e-05, |
| "loss": 1.7012, |
| "step": 14590 |
| }, |
| { |
| "epoch": 1.6349384098544233, |
| "grad_norm": 8.574664115905762, |
| "learning_rate": 2.2752892870474057e-05, |
| "loss": 1.7154, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.6360582306830906, |
| "grad_norm": 7.9404096603393555, |
| "learning_rate": 2.2734229189996266e-05, |
| "loss": 2.0302, |
| "step": 14610 |
| }, |
| { |
| "epoch": 1.637178051511758, |
| "grad_norm": 4.232813358306885, |
| "learning_rate": 2.2715565509518477e-05, |
| "loss": 2.2532, |
| "step": 14620 |
| }, |
| { |
| "epoch": 1.6382978723404256, |
| "grad_norm": 4.7303643226623535, |
| "learning_rate": 2.2696901829040686e-05, |
| "loss": 1.6629, |
| "step": 14630 |
| }, |
| { |
| "epoch": 1.639417693169093, |
| "grad_norm": 10.848748207092285, |
| "learning_rate": 2.2678238148562897e-05, |
| "loss": 2.2253, |
| "step": 14640 |
| }, |
| { |
| "epoch": 1.6405375139977605, |
| "grad_norm": 13.379515647888184, |
| "learning_rate": 2.2659574468085106e-05, |
| "loss": 1.9358, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.6416573348264278, |
| "grad_norm": 3.6262221336364746, |
| "learning_rate": 2.2640910787607317e-05, |
| "loss": 1.698, |
| "step": 14660 |
| }, |
| { |
| "epoch": 1.6427771556550952, |
| "grad_norm": 4.654207229614258, |
| "learning_rate": 2.2622247107129526e-05, |
| "loss": 1.6241, |
| "step": 14670 |
| }, |
| { |
| "epoch": 1.6438969764837625, |
| "grad_norm": 18.657798767089844, |
| "learning_rate": 2.2603583426651737e-05, |
| "loss": 2.1006, |
| "step": 14680 |
| }, |
| { |
| "epoch": 1.64501679731243, |
| "grad_norm": 11.848627090454102, |
| "learning_rate": 2.2584919746173946e-05, |
| "loss": 2.1113, |
| "step": 14690 |
| }, |
| { |
| "epoch": 1.6461366181410975, |
| "grad_norm": 9.06197738647461, |
| "learning_rate": 2.2566256065696157e-05, |
| "loss": 1.6563, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.6472564389697648, |
| "grad_norm": 13.184223175048828, |
| "learning_rate": 2.2547592385218366e-05, |
| "loss": 2.3761, |
| "step": 14710 |
| }, |
| { |
| "epoch": 1.6483762597984324, |
| "grad_norm": 11.905593872070312, |
| "learning_rate": 2.2528928704740577e-05, |
| "loss": 2.0665, |
| "step": 14720 |
| }, |
| { |
| "epoch": 1.6494960806270997, |
| "grad_norm": 3.726668357849121, |
| "learning_rate": 2.2510265024262786e-05, |
| "loss": 1.7046, |
| "step": 14730 |
| }, |
| { |
| "epoch": 1.650615901455767, |
| "grad_norm": 8.543424606323242, |
| "learning_rate": 2.2491601343784997e-05, |
| "loss": 1.8803, |
| "step": 14740 |
| }, |
| { |
| "epoch": 1.6517357222844344, |
| "grad_norm": 7.099303245544434, |
| "learning_rate": 2.2472937663307206e-05, |
| "loss": 1.9246, |
| "step": 14750 |
| }, |
| { |
| "epoch": 1.6528555431131018, |
| "grad_norm": 11.42622184753418, |
| "learning_rate": 2.2454273982829417e-05, |
| "loss": 1.7691, |
| "step": 14760 |
| }, |
| { |
| "epoch": 1.6539753639417694, |
| "grad_norm": 11.167017936706543, |
| "learning_rate": 2.2435610302351626e-05, |
| "loss": 2.0076, |
| "step": 14770 |
| }, |
| { |
| "epoch": 1.6550951847704367, |
| "grad_norm": 3.956162214279175, |
| "learning_rate": 2.2416946621873834e-05, |
| "loss": 2.3021, |
| "step": 14780 |
| }, |
| { |
| "epoch": 1.6562150055991043, |
| "grad_norm": 7.405086994171143, |
| "learning_rate": 2.2398282941396046e-05, |
| "loss": 2.2505, |
| "step": 14790 |
| }, |
| { |
| "epoch": 1.6573348264277716, |
| "grad_norm": 6.949024200439453, |
| "learning_rate": 2.2379619260918254e-05, |
| "loss": 1.6625, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.658454647256439, |
| "grad_norm": 5.116047382354736, |
| "learning_rate": 2.2360955580440466e-05, |
| "loss": 2.0589, |
| "step": 14810 |
| }, |
| { |
| "epoch": 1.6595744680851063, |
| "grad_norm": 4.537695407867432, |
| "learning_rate": 2.2342291899962674e-05, |
| "loss": 1.7413, |
| "step": 14820 |
| }, |
| { |
| "epoch": 1.6606942889137737, |
| "grad_norm": 4.4663004875183105, |
| "learning_rate": 2.2323628219484882e-05, |
| "loss": 1.8535, |
| "step": 14830 |
| }, |
| { |
| "epoch": 1.6618141097424413, |
| "grad_norm": 4.609202861785889, |
| "learning_rate": 2.230496453900709e-05, |
| "loss": 2.2902, |
| "step": 14840 |
| }, |
| { |
| "epoch": 1.6629339305711086, |
| "grad_norm": 4.477583885192871, |
| "learning_rate": 2.2286300858529302e-05, |
| "loss": 2.2012, |
| "step": 14850 |
| }, |
| { |
| "epoch": 1.6640537513997762, |
| "grad_norm": 19.314029693603516, |
| "learning_rate": 2.226763717805151e-05, |
| "loss": 2.3641, |
| "step": 14860 |
| }, |
| { |
| "epoch": 1.6651735722284435, |
| "grad_norm": 7.962518692016602, |
| "learning_rate": 2.2248973497573722e-05, |
| "loss": 2.1222, |
| "step": 14870 |
| }, |
| { |
| "epoch": 1.6662933930571109, |
| "grad_norm": 4.907433986663818, |
| "learning_rate": 2.223030981709593e-05, |
| "loss": 1.8145, |
| "step": 14880 |
| }, |
| { |
| "epoch": 1.6674132138857782, |
| "grad_norm": 8.89202880859375, |
| "learning_rate": 2.2211646136618142e-05, |
| "loss": 2.2891, |
| "step": 14890 |
| }, |
| { |
| "epoch": 1.6685330347144456, |
| "grad_norm": 9.831536293029785, |
| "learning_rate": 2.219298245614035e-05, |
| "loss": 2.0534, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.6696528555431132, |
| "grad_norm": 3.9551281929016113, |
| "learning_rate": 2.2174318775662562e-05, |
| "loss": 1.6471, |
| "step": 14910 |
| }, |
| { |
| "epoch": 1.6707726763717805, |
| "grad_norm": 4.45933723449707, |
| "learning_rate": 2.215565509518477e-05, |
| "loss": 2.2105, |
| "step": 14920 |
| }, |
| { |
| "epoch": 1.671892497200448, |
| "grad_norm": 4.2659783363342285, |
| "learning_rate": 2.2136991414706982e-05, |
| "loss": 1.9115, |
| "step": 14930 |
| }, |
| { |
| "epoch": 1.6730123180291154, |
| "grad_norm": 5.429946422576904, |
| "learning_rate": 2.211832773422919e-05, |
| "loss": 1.9213, |
| "step": 14940 |
| }, |
| { |
| "epoch": 1.6741321388577828, |
| "grad_norm": 12.490592956542969, |
| "learning_rate": 2.2099664053751402e-05, |
| "loss": 2.1818, |
| "step": 14950 |
| }, |
| { |
| "epoch": 1.6752519596864501, |
| "grad_norm": 5.016933917999268, |
| "learning_rate": 2.208100037327361e-05, |
| "loss": 2.3414, |
| "step": 14960 |
| }, |
| { |
| "epoch": 1.6763717805151175, |
| "grad_norm": 11.83879566192627, |
| "learning_rate": 2.2062336692795822e-05, |
| "loss": 1.5455, |
| "step": 14970 |
| }, |
| { |
| "epoch": 1.6774916013437848, |
| "grad_norm": 5.847216606140137, |
| "learning_rate": 2.204367301231803e-05, |
| "loss": 1.9266, |
| "step": 14980 |
| }, |
| { |
| "epoch": 1.6786114221724524, |
| "grad_norm": 5.979493141174316, |
| "learning_rate": 2.202500933184024e-05, |
| "loss": 1.5915, |
| "step": 14990 |
| }, |
| { |
| "epoch": 1.67973124300112, |
| "grad_norm": 10.372356414794922, |
| "learning_rate": 2.200634565136245e-05, |
| "loss": 1.5493, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.6808510638297873, |
| "grad_norm": 5.963084697723389, |
| "learning_rate": 2.198768197088466e-05, |
| "loss": 1.9619, |
| "step": 15010 |
| }, |
| { |
| "epoch": 1.6819708846584547, |
| "grad_norm": 10.619939804077148, |
| "learning_rate": 2.196901829040687e-05, |
| "loss": 2.351, |
| "step": 15020 |
| }, |
| { |
| "epoch": 1.683090705487122, |
| "grad_norm": 4.406311511993408, |
| "learning_rate": 2.195035460992908e-05, |
| "loss": 1.7913, |
| "step": 15030 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 4.74340295791626, |
| "learning_rate": 2.193169092945129e-05, |
| "loss": 1.6961, |
| "step": 15040 |
| }, |
| { |
| "epoch": 1.6853303471444567, |
| "grad_norm": 10.785073280334473, |
| "learning_rate": 2.1913027248973496e-05, |
| "loss": 1.6512, |
| "step": 15050 |
| }, |
| { |
| "epoch": 1.6864501679731243, |
| "grad_norm": 7.105363368988037, |
| "learning_rate": 2.1894363568495707e-05, |
| "loss": 1.8099, |
| "step": 15060 |
| }, |
| { |
| "epoch": 1.6875699888017919, |
| "grad_norm": 4.944157123565674, |
| "learning_rate": 2.1875699888017916e-05, |
| "loss": 1.9102, |
| "step": 15070 |
| }, |
| { |
| "epoch": 1.6886898096304592, |
| "grad_norm": 4.357661724090576, |
| "learning_rate": 2.1857036207540127e-05, |
| "loss": 2.1368, |
| "step": 15080 |
| }, |
| { |
| "epoch": 1.6898096304591266, |
| "grad_norm": 4.5606207847595215, |
| "learning_rate": 2.1838372527062336e-05, |
| "loss": 1.9584, |
| "step": 15090 |
| }, |
| { |
| "epoch": 1.690929451287794, |
| "grad_norm": 9.327258110046387, |
| "learning_rate": 2.1819708846584547e-05, |
| "loss": 1.9882, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.6920492721164613, |
| "grad_norm": 4.126927375793457, |
| "learning_rate": 2.1801045166106756e-05, |
| "loss": 2.4206, |
| "step": 15110 |
| }, |
| { |
| "epoch": 1.6931690929451286, |
| "grad_norm": 16.299972534179688, |
| "learning_rate": 2.1782381485628967e-05, |
| "loss": 2.2734, |
| "step": 15120 |
| }, |
| { |
| "epoch": 1.6942889137737962, |
| "grad_norm": 7.43623685836792, |
| "learning_rate": 2.1763717805151176e-05, |
| "loss": 1.7911, |
| "step": 15130 |
| }, |
| { |
| "epoch": 1.6954087346024636, |
| "grad_norm": 4.106508255004883, |
| "learning_rate": 2.1745054124673387e-05, |
| "loss": 2.1128, |
| "step": 15140 |
| }, |
| { |
| "epoch": 1.6965285554311311, |
| "grad_norm": 13.86871337890625, |
| "learning_rate": 2.1726390444195596e-05, |
| "loss": 1.7943, |
| "step": 15150 |
| }, |
| { |
| "epoch": 1.6976483762597985, |
| "grad_norm": 6.735291957855225, |
| "learning_rate": 2.1707726763717807e-05, |
| "loss": 1.5294, |
| "step": 15160 |
| }, |
| { |
| "epoch": 1.6987681970884658, |
| "grad_norm": 5.739629745483398, |
| "learning_rate": 2.1689063083240016e-05, |
| "loss": 2.0369, |
| "step": 15170 |
| }, |
| { |
| "epoch": 1.6998880179171332, |
| "grad_norm": 5.946849822998047, |
| "learning_rate": 2.1670399402762227e-05, |
| "loss": 2.0375, |
| "step": 15180 |
| }, |
| { |
| "epoch": 1.7010078387458005, |
| "grad_norm": 4.547854423522949, |
| "learning_rate": 2.1651735722284436e-05, |
| "loss": 2.0474, |
| "step": 15190 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 6.0930070877075195, |
| "learning_rate": 2.1633072041806644e-05, |
| "loss": 1.7421, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.7032474804031354, |
| "grad_norm": 4.635743141174316, |
| "learning_rate": 2.1614408361328856e-05, |
| "loss": 2.3968, |
| "step": 15210 |
| }, |
| { |
| "epoch": 1.704367301231803, |
| "grad_norm": 2.2271034717559814, |
| "learning_rate": 2.1595744680851064e-05, |
| "loss": 1.8041, |
| "step": 15220 |
| }, |
| { |
| "epoch": 1.7054871220604704, |
| "grad_norm": 6.688762664794922, |
| "learning_rate": 2.1577081000373276e-05, |
| "loss": 1.8699, |
| "step": 15230 |
| }, |
| { |
| "epoch": 1.7066069428891377, |
| "grad_norm": 4.520251274108887, |
| "learning_rate": 2.1558417319895484e-05, |
| "loss": 1.7934, |
| "step": 15240 |
| }, |
| { |
| "epoch": 1.707726763717805, |
| "grad_norm": 5.595422744750977, |
| "learning_rate": 2.1539753639417696e-05, |
| "loss": 1.8382, |
| "step": 15250 |
| }, |
| { |
| "epoch": 1.7088465845464724, |
| "grad_norm": 10.029720306396484, |
| "learning_rate": 2.1521089958939904e-05, |
| "loss": 1.9193, |
| "step": 15260 |
| }, |
| { |
| "epoch": 1.70996640537514, |
| "grad_norm": 5.297349452972412, |
| "learning_rate": 2.1502426278462116e-05, |
| "loss": 2.0587, |
| "step": 15270 |
| }, |
| { |
| "epoch": 1.7110862262038073, |
| "grad_norm": 16.516834259033203, |
| "learning_rate": 2.1483762597984324e-05, |
| "loss": 2.0523, |
| "step": 15280 |
| }, |
| { |
| "epoch": 1.712206047032475, |
| "grad_norm": 3.686732292175293, |
| "learning_rate": 2.1465098917506532e-05, |
| "loss": 1.8393, |
| "step": 15290 |
| }, |
| { |
| "epoch": 1.7133258678611423, |
| "grad_norm": 8.316386222839355, |
| "learning_rate": 2.144643523702874e-05, |
| "loss": 2.1096, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.7144456886898096, |
| "grad_norm": 14.509235382080078, |
| "learning_rate": 2.1427771556550952e-05, |
| "loss": 2.2452, |
| "step": 15310 |
| }, |
| { |
| "epoch": 1.715565509518477, |
| "grad_norm": 12.271526336669922, |
| "learning_rate": 2.140910787607316e-05, |
| "loss": 1.8003, |
| "step": 15320 |
| }, |
| { |
| "epoch": 1.7166853303471443, |
| "grad_norm": 16.485271453857422, |
| "learning_rate": 2.1390444195595372e-05, |
| "loss": 1.8001, |
| "step": 15330 |
| }, |
| { |
| "epoch": 1.717805151175812, |
| "grad_norm": 4.867336273193359, |
| "learning_rate": 2.137178051511758e-05, |
| "loss": 1.8425, |
| "step": 15340 |
| }, |
| { |
| "epoch": 1.7189249720044792, |
| "grad_norm": 3.5718979835510254, |
| "learning_rate": 2.1353116834639792e-05, |
| "loss": 1.6484, |
| "step": 15350 |
| }, |
| { |
| "epoch": 1.7200447928331468, |
| "grad_norm": 26.389127731323242, |
| "learning_rate": 2.1334453154162e-05, |
| "loss": 1.7185, |
| "step": 15360 |
| }, |
| { |
| "epoch": 1.7211646136618142, |
| "grad_norm": 4.237075328826904, |
| "learning_rate": 2.1315789473684212e-05, |
| "loss": 1.8523, |
| "step": 15370 |
| }, |
| { |
| "epoch": 1.7222844344904815, |
| "grad_norm": 11.237632751464844, |
| "learning_rate": 2.129712579320642e-05, |
| "loss": 2.3631, |
| "step": 15380 |
| }, |
| { |
| "epoch": 1.7234042553191489, |
| "grad_norm": 4.580799579620361, |
| "learning_rate": 2.1278462112728632e-05, |
| "loss": 2.2933, |
| "step": 15390 |
| }, |
| { |
| "epoch": 1.7245240761478162, |
| "grad_norm": 5.851457118988037, |
| "learning_rate": 2.125979843225084e-05, |
| "loss": 1.9909, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.7256438969764838, |
| "grad_norm": 4.036518573760986, |
| "learning_rate": 2.1241134751773052e-05, |
| "loss": 2.1115, |
| "step": 15410 |
| }, |
| { |
| "epoch": 1.7267637178051511, |
| "grad_norm": 10.545909881591797, |
| "learning_rate": 2.122247107129526e-05, |
| "loss": 1.7327, |
| "step": 15420 |
| }, |
| { |
| "epoch": 1.7278835386338187, |
| "grad_norm": 1.6649363040924072, |
| "learning_rate": 2.120380739081747e-05, |
| "loss": 1.7729, |
| "step": 15430 |
| }, |
| { |
| "epoch": 1.729003359462486, |
| "grad_norm": 10.285140991210938, |
| "learning_rate": 2.118514371033968e-05, |
| "loss": 1.9684, |
| "step": 15440 |
| }, |
| { |
| "epoch": 1.7301231802911534, |
| "grad_norm": 10.789081573486328, |
| "learning_rate": 2.116648002986189e-05, |
| "loss": 1.9455, |
| "step": 15450 |
| }, |
| { |
| "epoch": 1.7312430011198208, |
| "grad_norm": 12.79870891571045, |
| "learning_rate": 2.11478163493841e-05, |
| "loss": 2.0917, |
| "step": 15460 |
| }, |
| { |
| "epoch": 1.7323628219484881, |
| "grad_norm": 7.7222065925598145, |
| "learning_rate": 2.112915266890631e-05, |
| "loss": 2.064, |
| "step": 15470 |
| }, |
| { |
| "epoch": 1.7334826427771557, |
| "grad_norm": 4.771847724914551, |
| "learning_rate": 2.111048898842852e-05, |
| "loss": 1.9891, |
| "step": 15480 |
| }, |
| { |
| "epoch": 1.734602463605823, |
| "grad_norm": 3.90159273147583, |
| "learning_rate": 2.109182530795073e-05, |
| "loss": 2.0179, |
| "step": 15490 |
| }, |
| { |
| "epoch": 1.7357222844344906, |
| "grad_norm": 7.232120037078857, |
| "learning_rate": 2.107316162747294e-05, |
| "loss": 2.3234, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 5.076690196990967, |
| "learning_rate": 2.105449794699515e-05, |
| "loss": 2.2486, |
| "step": 15510 |
| }, |
| { |
| "epoch": 1.7379619260918253, |
| "grad_norm": 12.581092834472656, |
| "learning_rate": 2.1035834266517357e-05, |
| "loss": 2.1017, |
| "step": 15520 |
| }, |
| { |
| "epoch": 1.7390817469204927, |
| "grad_norm": 7.462939262390137, |
| "learning_rate": 2.1017170586039566e-05, |
| "loss": 1.8854, |
| "step": 15530 |
| }, |
| { |
| "epoch": 1.74020156774916, |
| "grad_norm": 5.599474906921387, |
| "learning_rate": 2.0998506905561777e-05, |
| "loss": 1.987, |
| "step": 15540 |
| }, |
| { |
| "epoch": 1.7413213885778276, |
| "grad_norm": 2.1986734867095947, |
| "learning_rate": 2.0979843225083986e-05, |
| "loss": 1.3435, |
| "step": 15550 |
| }, |
| { |
| "epoch": 1.742441209406495, |
| "grad_norm": 10.124311447143555, |
| "learning_rate": 2.0961179544606197e-05, |
| "loss": 2.2367, |
| "step": 15560 |
| }, |
| { |
| "epoch": 1.7435610302351625, |
| "grad_norm": 11.939183235168457, |
| "learning_rate": 2.0942515864128406e-05, |
| "loss": 2.5315, |
| "step": 15570 |
| }, |
| { |
| "epoch": 1.7446808510638299, |
| "grad_norm": 10.486783027648926, |
| "learning_rate": 2.0923852183650617e-05, |
| "loss": 2.1949, |
| "step": 15580 |
| }, |
| { |
| "epoch": 1.7458006718924972, |
| "grad_norm": 10.252884864807129, |
| "learning_rate": 2.0905188503172826e-05, |
| "loss": 1.7758, |
| "step": 15590 |
| }, |
| { |
| "epoch": 1.7469204927211646, |
| "grad_norm": 3.4020817279815674, |
| "learning_rate": 2.0886524822695037e-05, |
| "loss": 1.7837, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.748040313549832, |
| "grad_norm": 7.330861568450928, |
| "learning_rate": 2.0867861142217246e-05, |
| "loss": 1.4382, |
| "step": 15610 |
| }, |
| { |
| "epoch": 1.7491601343784995, |
| "grad_norm": 8.330341339111328, |
| "learning_rate": 2.0849197461739457e-05, |
| "loss": 2.185, |
| "step": 15620 |
| }, |
| { |
| "epoch": 1.7502799552071668, |
| "grad_norm": 4.57420015335083, |
| "learning_rate": 2.0830533781261666e-05, |
| "loss": 2.1382, |
| "step": 15630 |
| }, |
| { |
| "epoch": 1.7513997760358344, |
| "grad_norm": 14.915903091430664, |
| "learning_rate": 2.0811870100783874e-05, |
| "loss": 1.8498, |
| "step": 15640 |
| }, |
| { |
| "epoch": 1.7525195968645018, |
| "grad_norm": 4.439641952514648, |
| "learning_rate": 2.0793206420306086e-05, |
| "loss": 1.9074, |
| "step": 15650 |
| }, |
| { |
| "epoch": 1.7536394176931691, |
| "grad_norm": 4.21160364151001, |
| "learning_rate": 2.0774542739828294e-05, |
| "loss": 2.0153, |
| "step": 15660 |
| }, |
| { |
| "epoch": 1.7547592385218365, |
| "grad_norm": 3.9740211963653564, |
| "learning_rate": 2.0755879059350506e-05, |
| "loss": 2.1055, |
| "step": 15670 |
| }, |
| { |
| "epoch": 1.7558790593505038, |
| "grad_norm": 8.004166603088379, |
| "learning_rate": 2.0737215378872714e-05, |
| "loss": 1.9654, |
| "step": 15680 |
| }, |
| { |
| "epoch": 1.7569988801791714, |
| "grad_norm": 12.65368938446045, |
| "learning_rate": 2.0718551698394926e-05, |
| "loss": 2.3388, |
| "step": 15690 |
| }, |
| { |
| "epoch": 1.7581187010078387, |
| "grad_norm": 7.4648566246032715, |
| "learning_rate": 2.0699888017917134e-05, |
| "loss": 1.9942, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.7592385218365063, |
| "grad_norm": 3.306600570678711, |
| "learning_rate": 2.0681224337439346e-05, |
| "loss": 1.7987, |
| "step": 15710 |
| }, |
| { |
| "epoch": 1.7603583426651737, |
| "grad_norm": 4.179432392120361, |
| "learning_rate": 2.0662560656961554e-05, |
| "loss": 2.2372, |
| "step": 15720 |
| }, |
| { |
| "epoch": 1.761478163493841, |
| "grad_norm": 13.356534004211426, |
| "learning_rate": 2.0643896976483766e-05, |
| "loss": 1.5609, |
| "step": 15730 |
| }, |
| { |
| "epoch": 1.7625979843225084, |
| "grad_norm": 9.077022552490234, |
| "learning_rate": 2.0625233296005974e-05, |
| "loss": 1.9327, |
| "step": 15740 |
| }, |
| { |
| "epoch": 1.7637178051511757, |
| "grad_norm": 3.596141815185547, |
| "learning_rate": 2.0606569615528182e-05, |
| "loss": 1.971, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.764837625979843, |
| "grad_norm": 3.860454559326172, |
| "learning_rate": 2.058790593505039e-05, |
| "loss": 2.3288, |
| "step": 15760 |
| }, |
| { |
| "epoch": 1.7659574468085106, |
| "grad_norm": 12.444572448730469, |
| "learning_rate": 2.0569242254572602e-05, |
| "loss": 1.8277, |
| "step": 15770 |
| }, |
| { |
| "epoch": 1.7670772676371782, |
| "grad_norm": 10.383987426757812, |
| "learning_rate": 2.055057857409481e-05, |
| "loss": 2.2227, |
| "step": 15780 |
| }, |
| { |
| "epoch": 1.7681970884658456, |
| "grad_norm": 9.582054138183594, |
| "learning_rate": 2.0531914893617022e-05, |
| "loss": 1.7383, |
| "step": 15790 |
| }, |
| { |
| "epoch": 1.769316909294513, |
| "grad_norm": 12.529754638671875, |
| "learning_rate": 2.051325121313923e-05, |
| "loss": 1.9381, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.7704367301231803, |
| "grad_norm": 8.915084838867188, |
| "learning_rate": 2.0494587532661442e-05, |
| "loss": 1.668, |
| "step": 15810 |
| }, |
| { |
| "epoch": 1.7715565509518476, |
| "grad_norm": 13.440780639648438, |
| "learning_rate": 2.047592385218365e-05, |
| "loss": 2.1798, |
| "step": 15820 |
| }, |
| { |
| "epoch": 1.772676371780515, |
| "grad_norm": 7.045945167541504, |
| "learning_rate": 2.0457260171705862e-05, |
| "loss": 1.9319, |
| "step": 15830 |
| }, |
| { |
| "epoch": 1.7737961926091825, |
| "grad_norm": 2.6684410572052, |
| "learning_rate": 2.043859649122807e-05, |
| "loss": 1.7845, |
| "step": 15840 |
| }, |
| { |
| "epoch": 1.77491601343785, |
| "grad_norm": 11.95478343963623, |
| "learning_rate": 2.041993281075028e-05, |
| "loss": 2.3651, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.7760358342665175, |
| "grad_norm": 4.880320072174072, |
| "learning_rate": 2.040126913027249e-05, |
| "loss": 1.9852, |
| "step": 15860 |
| }, |
| { |
| "epoch": 1.7771556550951848, |
| "grad_norm": 12.231099128723145, |
| "learning_rate": 2.03826054497947e-05, |
| "loss": 2.0981, |
| "step": 15870 |
| }, |
| { |
| "epoch": 1.7782754759238522, |
| "grad_norm": 7.029375076293945, |
| "learning_rate": 2.036394176931691e-05, |
| "loss": 2.1145, |
| "step": 15880 |
| }, |
| { |
| "epoch": 1.7793952967525195, |
| "grad_norm": 6.398838043212891, |
| "learning_rate": 2.034527808883912e-05, |
| "loss": 2.0601, |
| "step": 15890 |
| }, |
| { |
| "epoch": 1.7805151175811869, |
| "grad_norm": 4.79886531829834, |
| "learning_rate": 2.032661440836133e-05, |
| "loss": 1.7714, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.7816349384098544, |
| "grad_norm": 5.065080642700195, |
| "learning_rate": 2.030795072788354e-05, |
| "loss": 1.5375, |
| "step": 15910 |
| }, |
| { |
| "epoch": 1.782754759238522, |
| "grad_norm": 4.638917446136475, |
| "learning_rate": 2.028928704740575e-05, |
| "loss": 2.0297, |
| "step": 15920 |
| }, |
| { |
| "epoch": 1.7838745800671894, |
| "grad_norm": 8.476948738098145, |
| "learning_rate": 2.027062336692796e-05, |
| "loss": 2.2116, |
| "step": 15930 |
| }, |
| { |
| "epoch": 1.7849944008958567, |
| "grad_norm": 14.950053215026855, |
| "learning_rate": 2.025195968645017e-05, |
| "loss": 2.2304, |
| "step": 15940 |
| }, |
| { |
| "epoch": 1.786114221724524, |
| "grad_norm": 4.521371841430664, |
| "learning_rate": 2.023329600597238e-05, |
| "loss": 2.205, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.7872340425531914, |
| "grad_norm": 4.019099712371826, |
| "learning_rate": 2.021463232549459e-05, |
| "loss": 1.9879, |
| "step": 15960 |
| }, |
| { |
| "epoch": 1.7883538633818588, |
| "grad_norm": 10.061615943908691, |
| "learning_rate": 2.01959686450168e-05, |
| "loss": 1.6731, |
| "step": 15970 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 8.204621315002441, |
| "learning_rate": 2.017730496453901e-05, |
| "loss": 1.9748, |
| "step": 15980 |
| }, |
| { |
| "epoch": 1.7905935050391937, |
| "grad_norm": 5.247344493865967, |
| "learning_rate": 2.0158641284061216e-05, |
| "loss": 1.6542, |
| "step": 15990 |
| }, |
| { |
| "epoch": 1.7917133258678613, |
| "grad_norm": 4.7308735847473145, |
| "learning_rate": 2.0139977603583427e-05, |
| "loss": 1.4543, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.7928331466965286, |
| "grad_norm": 3.514563798904419, |
| "learning_rate": 2.0121313923105636e-05, |
| "loss": 2.0833, |
| "step": 16010 |
| }, |
| { |
| "epoch": 1.793952967525196, |
| "grad_norm": 4.816470623016357, |
| "learning_rate": 2.0102650242627847e-05, |
| "loss": 2.283, |
| "step": 16020 |
| }, |
| { |
| "epoch": 1.7950727883538633, |
| "grad_norm": 11.659377098083496, |
| "learning_rate": 2.0083986562150056e-05, |
| "loss": 1.8348, |
| "step": 16030 |
| }, |
| { |
| "epoch": 1.7961926091825307, |
| "grad_norm": 5.277092933654785, |
| "learning_rate": 2.0065322881672267e-05, |
| "loss": 1.8455, |
| "step": 16040 |
| }, |
| { |
| "epoch": 1.7973124300111982, |
| "grad_norm": 10.653385162353516, |
| "learning_rate": 2.0046659201194476e-05, |
| "loss": 1.8004, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.7984322508398656, |
| "grad_norm": 5.165909767150879, |
| "learning_rate": 2.0027995520716687e-05, |
| "loss": 1.9219, |
| "step": 16060 |
| }, |
| { |
| "epoch": 1.7995520716685331, |
| "grad_norm": 12.408156394958496, |
| "learning_rate": 2.0009331840238896e-05, |
| "loss": 2.0149, |
| "step": 16070 |
| }, |
| { |
| "epoch": 1.8006718924972005, |
| "grad_norm": 3.793848752975464, |
| "learning_rate": 1.9990668159761104e-05, |
| "loss": 2.1401, |
| "step": 16080 |
| }, |
| { |
| "epoch": 1.8017917133258678, |
| "grad_norm": 4.723913192749023, |
| "learning_rate": 1.9972004479283316e-05, |
| "loss": 1.5437, |
| "step": 16090 |
| }, |
| { |
| "epoch": 1.8029115341545352, |
| "grad_norm": 5.787063121795654, |
| "learning_rate": 1.9953340798805524e-05, |
| "loss": 1.93, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.8040313549832026, |
| "grad_norm": 6.674378395080566, |
| "learning_rate": 1.9934677118327736e-05, |
| "loss": 2.3135, |
| "step": 16110 |
| }, |
| { |
| "epoch": 1.8051511758118701, |
| "grad_norm": 14.730244636535645, |
| "learning_rate": 1.9916013437849944e-05, |
| "loss": 2.1795, |
| "step": 16120 |
| }, |
| { |
| "epoch": 1.8062709966405375, |
| "grad_norm": 4.993513584136963, |
| "learning_rate": 1.9897349757372156e-05, |
| "loss": 1.7507, |
| "step": 16130 |
| }, |
| { |
| "epoch": 1.807390817469205, |
| "grad_norm": 14.580843925476074, |
| "learning_rate": 1.9878686076894364e-05, |
| "loss": 1.7252, |
| "step": 16140 |
| }, |
| { |
| "epoch": 1.8085106382978724, |
| "grad_norm": 14.65086841583252, |
| "learning_rate": 1.9860022396416576e-05, |
| "loss": 1.6178, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.8096304591265397, |
| "grad_norm": 15.6979398727417, |
| "learning_rate": 1.9841358715938784e-05, |
| "loss": 2.3194, |
| "step": 16160 |
| }, |
| { |
| "epoch": 1.810750279955207, |
| "grad_norm": 5.819782733917236, |
| "learning_rate": 1.9822695035460996e-05, |
| "loss": 1.572, |
| "step": 16170 |
| }, |
| { |
| "epoch": 1.8118701007838744, |
| "grad_norm": 4.418210983276367, |
| "learning_rate": 1.9804031354983204e-05, |
| "loss": 2.247, |
| "step": 16180 |
| }, |
| { |
| "epoch": 1.812989921612542, |
| "grad_norm": 5.038919925689697, |
| "learning_rate": 1.9785367674505416e-05, |
| "loss": 1.8143, |
| "step": 16190 |
| }, |
| { |
| "epoch": 1.8141097424412094, |
| "grad_norm": 5.138890743255615, |
| "learning_rate": 1.9766703994027624e-05, |
| "loss": 1.8542, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.815229563269877, |
| "grad_norm": 12.318073272705078, |
| "learning_rate": 1.9748040313549836e-05, |
| "loss": 2.1731, |
| "step": 16210 |
| }, |
| { |
| "epoch": 1.8163493840985443, |
| "grad_norm": 4.275629043579102, |
| "learning_rate": 1.972937663307204e-05, |
| "loss": 2.4228, |
| "step": 16220 |
| }, |
| { |
| "epoch": 1.8174692049272116, |
| "grad_norm": 4.5237016677856445, |
| "learning_rate": 1.9710712952594252e-05, |
| "loss": 1.9218, |
| "step": 16230 |
| }, |
| { |
| "epoch": 1.818589025755879, |
| "grad_norm": 7.575822353363037, |
| "learning_rate": 1.969204927211646e-05, |
| "loss": 1.7327, |
| "step": 16240 |
| }, |
| { |
| "epoch": 1.8197088465845463, |
| "grad_norm": 12.654701232910156, |
| "learning_rate": 1.9673385591638672e-05, |
| "loss": 1.993, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.820828667413214, |
| "grad_norm": 8.574930191040039, |
| "learning_rate": 1.965472191116088e-05, |
| "loss": 1.8782, |
| "step": 16260 |
| }, |
| { |
| "epoch": 1.8219484882418813, |
| "grad_norm": 4.255867958068848, |
| "learning_rate": 1.9636058230683092e-05, |
| "loss": 1.8501, |
| "step": 16270 |
| }, |
| { |
| "epoch": 1.8230683090705488, |
| "grad_norm": 6.834265232086182, |
| "learning_rate": 1.96173945502053e-05, |
| "loss": 2.3436, |
| "step": 16280 |
| }, |
| { |
| "epoch": 1.8241881298992162, |
| "grad_norm": 3.866483688354492, |
| "learning_rate": 1.959873086972751e-05, |
| "loss": 2.3195, |
| "step": 16290 |
| }, |
| { |
| "epoch": 1.8253079507278835, |
| "grad_norm": 10.645752906799316, |
| "learning_rate": 1.958006718924972e-05, |
| "loss": 1.7716, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.826427771556551, |
| "grad_norm": 15.50953197479248, |
| "learning_rate": 1.956140350877193e-05, |
| "loss": 2.5357, |
| "step": 16310 |
| }, |
| { |
| "epoch": 1.8275475923852182, |
| "grad_norm": 8.97745418548584, |
| "learning_rate": 1.954273982829414e-05, |
| "loss": 1.8631, |
| "step": 16320 |
| }, |
| { |
| "epoch": 1.8286674132138858, |
| "grad_norm": 10.974065780639648, |
| "learning_rate": 1.952407614781635e-05, |
| "loss": 1.6453, |
| "step": 16330 |
| }, |
| { |
| "epoch": 1.8297872340425532, |
| "grad_norm": 14.380806922912598, |
| "learning_rate": 1.950541246733856e-05, |
| "loss": 2.1817, |
| "step": 16340 |
| }, |
| { |
| "epoch": 1.8309070548712207, |
| "grad_norm": 3.8893136978149414, |
| "learning_rate": 1.948674878686077e-05, |
| "loss": 2.1023, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.832026875699888, |
| "grad_norm": 3.2880914211273193, |
| "learning_rate": 1.946808510638298e-05, |
| "loss": 1.466, |
| "step": 16360 |
| }, |
| { |
| "epoch": 1.8331466965285554, |
| "grad_norm": 9.581578254699707, |
| "learning_rate": 1.944942142590519e-05, |
| "loss": 1.8245, |
| "step": 16370 |
| }, |
| { |
| "epoch": 1.8342665173572228, |
| "grad_norm": 15.423023223876953, |
| "learning_rate": 1.94307577454274e-05, |
| "loss": 2.1899, |
| "step": 16380 |
| }, |
| { |
| "epoch": 1.8353863381858901, |
| "grad_norm": 5.308213233947754, |
| "learning_rate": 1.941209406494961e-05, |
| "loss": 1.6693, |
| "step": 16390 |
| }, |
| { |
| "epoch": 1.8365061590145577, |
| "grad_norm": 13.718766212463379, |
| "learning_rate": 1.939343038447182e-05, |
| "loss": 1.6922, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.837625979843225, |
| "grad_norm": 5.901851177215576, |
| "learning_rate": 1.937476670399403e-05, |
| "loss": 1.9234, |
| "step": 16410 |
| }, |
| { |
| "epoch": 1.8387458006718926, |
| "grad_norm": 4.218606948852539, |
| "learning_rate": 1.935610302351624e-05, |
| "loss": 1.7346, |
| "step": 16420 |
| }, |
| { |
| "epoch": 1.83986562150056, |
| "grad_norm": 3.545685291290283, |
| "learning_rate": 1.933743934303845e-05, |
| "loss": 1.5436, |
| "step": 16430 |
| }, |
| { |
| "epoch": 1.8409854423292273, |
| "grad_norm": 3.544178009033203, |
| "learning_rate": 1.931877566256066e-05, |
| "loss": 2.0027, |
| "step": 16440 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 16.046741485595703, |
| "learning_rate": 1.9300111982082866e-05, |
| "loss": 1.8394, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.843225083986562, |
| "grad_norm": 3.808443546295166, |
| "learning_rate": 1.9281448301605077e-05, |
| "loss": 1.5258, |
| "step": 16460 |
| }, |
| { |
| "epoch": 1.8443449048152296, |
| "grad_norm": 16.202293395996094, |
| "learning_rate": 1.9262784621127286e-05, |
| "loss": 1.8592, |
| "step": 16470 |
| }, |
| { |
| "epoch": 1.845464725643897, |
| "grad_norm": 12.9262056350708, |
| "learning_rate": 1.9244120940649498e-05, |
| "loss": 2.35, |
| "step": 16480 |
| }, |
| { |
| "epoch": 1.8465845464725645, |
| "grad_norm": 9.115686416625977, |
| "learning_rate": 1.9225457260171706e-05, |
| "loss": 2.0023, |
| "step": 16490 |
| }, |
| { |
| "epoch": 1.8477043673012319, |
| "grad_norm": 4.362748622894287, |
| "learning_rate": 1.9206793579693914e-05, |
| "loss": 2.2523, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.8488241881298992, |
| "grad_norm": 6.081763744354248, |
| "learning_rate": 1.9188129899216126e-05, |
| "loss": 1.9744, |
| "step": 16510 |
| }, |
| { |
| "epoch": 1.8499440089585666, |
| "grad_norm": 13.35545539855957, |
| "learning_rate": 1.9169466218738334e-05, |
| "loss": 2.0795, |
| "step": 16520 |
| }, |
| { |
| "epoch": 1.851063829787234, |
| "grad_norm": 4.248141765594482, |
| "learning_rate": 1.9150802538260546e-05, |
| "loss": 2.4488, |
| "step": 16530 |
| }, |
| { |
| "epoch": 1.8521836506159015, |
| "grad_norm": 10.578146934509277, |
| "learning_rate": 1.9132138857782754e-05, |
| "loss": 1.8938, |
| "step": 16540 |
| }, |
| { |
| "epoch": 1.8533034714445689, |
| "grad_norm": 8.211806297302246, |
| "learning_rate": 1.9113475177304966e-05, |
| "loss": 1.939, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.8544232922732364, |
| "grad_norm": 11.0032320022583, |
| "learning_rate": 1.9094811496827174e-05, |
| "loss": 2.0445, |
| "step": 16560 |
| }, |
| { |
| "epoch": 1.8555431131019038, |
| "grad_norm": 15.884440422058105, |
| "learning_rate": 1.9076147816349386e-05, |
| "loss": 2.1312, |
| "step": 16570 |
| }, |
| { |
| "epoch": 1.8566629339305711, |
| "grad_norm": 17.182661056518555, |
| "learning_rate": 1.9057484135871594e-05, |
| "loss": 1.8968, |
| "step": 16580 |
| }, |
| { |
| "epoch": 1.8577827547592385, |
| "grad_norm": 12.066494941711426, |
| "learning_rate": 1.9038820455393806e-05, |
| "loss": 2.1683, |
| "step": 16590 |
| }, |
| { |
| "epoch": 1.8589025755879058, |
| "grad_norm": 5.713685989379883, |
| "learning_rate": 1.9020156774916014e-05, |
| "loss": 1.8316, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.8600223964165732, |
| "grad_norm": 3.7835745811462402, |
| "learning_rate": 1.9001493094438226e-05, |
| "loss": 1.7352, |
| "step": 16610 |
| }, |
| { |
| "epoch": 1.8611422172452408, |
| "grad_norm": 5.586095809936523, |
| "learning_rate": 1.8982829413960434e-05, |
| "loss": 1.9586, |
| "step": 16620 |
| }, |
| { |
| "epoch": 1.8622620380739083, |
| "grad_norm": 10.472651481628418, |
| "learning_rate": 1.8964165733482646e-05, |
| "loss": 1.8188, |
| "step": 16630 |
| }, |
| { |
| "epoch": 1.8633818589025757, |
| "grad_norm": 8.586959838867188, |
| "learning_rate": 1.8945502053004854e-05, |
| "loss": 1.7482, |
| "step": 16640 |
| }, |
| { |
| "epoch": 1.864501679731243, |
| "grad_norm": 14.469319343566895, |
| "learning_rate": 1.8926838372527066e-05, |
| "loss": 1.9509, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.8656215005599104, |
| "grad_norm": 12.987029075622559, |
| "learning_rate": 1.8908174692049274e-05, |
| "loss": 1.505, |
| "step": 16660 |
| }, |
| { |
| "epoch": 1.8667413213885777, |
| "grad_norm": 4.787947654724121, |
| "learning_rate": 1.8889511011571483e-05, |
| "loss": 1.7582, |
| "step": 16670 |
| }, |
| { |
| "epoch": 1.867861142217245, |
| "grad_norm": 4.350035667419434, |
| "learning_rate": 1.8870847331093694e-05, |
| "loss": 2.4025, |
| "step": 16680 |
| }, |
| { |
| "epoch": 1.8689809630459127, |
| "grad_norm": 4.656111717224121, |
| "learning_rate": 1.8852183650615903e-05, |
| "loss": 1.558, |
| "step": 16690 |
| }, |
| { |
| "epoch": 1.8701007838745802, |
| "grad_norm": 5.183754920959473, |
| "learning_rate": 1.883351997013811e-05, |
| "loss": 1.6411, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.8712206047032476, |
| "grad_norm": 13.324991226196289, |
| "learning_rate": 1.8814856289660323e-05, |
| "loss": 2.254, |
| "step": 16710 |
| }, |
| { |
| "epoch": 1.872340425531915, |
| "grad_norm": 15.952241897583008, |
| "learning_rate": 1.879619260918253e-05, |
| "loss": 1.7153, |
| "step": 16720 |
| }, |
| { |
| "epoch": 1.8734602463605823, |
| "grad_norm": 8.312430381774902, |
| "learning_rate": 1.877752892870474e-05, |
| "loss": 2.2136, |
| "step": 16730 |
| }, |
| { |
| "epoch": 1.8745800671892496, |
| "grad_norm": 7.2421393394470215, |
| "learning_rate": 1.875886524822695e-05, |
| "loss": 1.8395, |
| "step": 16740 |
| }, |
| { |
| "epoch": 1.875699888017917, |
| "grad_norm": 9.180643081665039, |
| "learning_rate": 1.874020156774916e-05, |
| "loss": 2.1119, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.8768197088465846, |
| "grad_norm": 15.752584457397461, |
| "learning_rate": 1.872153788727137e-05, |
| "loss": 2.3431, |
| "step": 16760 |
| }, |
| { |
| "epoch": 1.877939529675252, |
| "grad_norm": 15.961100578308105, |
| "learning_rate": 1.870287420679358e-05, |
| "loss": 2.2094, |
| "step": 16770 |
| }, |
| { |
| "epoch": 1.8790593505039195, |
| "grad_norm": 4.183115482330322, |
| "learning_rate": 1.868421052631579e-05, |
| "loss": 1.9611, |
| "step": 16780 |
| }, |
| { |
| "epoch": 1.8801791713325868, |
| "grad_norm": 15.471096992492676, |
| "learning_rate": 1.8665546845838e-05, |
| "loss": 2.2645, |
| "step": 16790 |
| }, |
| { |
| "epoch": 1.8812989921612542, |
| "grad_norm": 15.710405349731445, |
| "learning_rate": 1.864688316536021e-05, |
| "loss": 2.1026, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.8824188129899215, |
| "grad_norm": 7.765809535980225, |
| "learning_rate": 1.862821948488242e-05, |
| "loss": 1.8401, |
| "step": 16810 |
| }, |
| { |
| "epoch": 1.8835386338185889, |
| "grad_norm": 6.538113117218018, |
| "learning_rate": 1.860955580440463e-05, |
| "loss": 1.7096, |
| "step": 16820 |
| }, |
| { |
| "epoch": 1.8846584546472565, |
| "grad_norm": 16.50730323791504, |
| "learning_rate": 1.859089212392684e-05, |
| "loss": 2.0152, |
| "step": 16830 |
| }, |
| { |
| "epoch": 1.8857782754759238, |
| "grad_norm": 3.642190933227539, |
| "learning_rate": 1.857222844344905e-05, |
| "loss": 2.0935, |
| "step": 16840 |
| }, |
| { |
| "epoch": 1.8868980963045914, |
| "grad_norm": 5.2225518226623535, |
| "learning_rate": 1.855356476297126e-05, |
| "loss": 1.7445, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.8880179171332587, |
| "grad_norm": 3.3426289558410645, |
| "learning_rate": 1.853490108249347e-05, |
| "loss": 2.3258, |
| "step": 16860 |
| }, |
| { |
| "epoch": 1.889137737961926, |
| "grad_norm": 8.263337135314941, |
| "learning_rate": 1.851623740201568e-05, |
| "loss": 1.6674, |
| "step": 16870 |
| }, |
| { |
| "epoch": 1.8902575587905934, |
| "grad_norm": 4.517258167266846, |
| "learning_rate": 1.8497573721537888e-05, |
| "loss": 2.0328, |
| "step": 16880 |
| }, |
| { |
| "epoch": 1.8913773796192608, |
| "grad_norm": 5.429361820220947, |
| "learning_rate": 1.84789100410601e-05, |
| "loss": 2.3338, |
| "step": 16890 |
| }, |
| { |
| "epoch": 1.8924972004479284, |
| "grad_norm": 11.747203826904297, |
| "learning_rate": 1.8460246360582308e-05, |
| "loss": 1.9876, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.8936170212765957, |
| "grad_norm": 14.812180519104004, |
| "learning_rate": 1.844158268010452e-05, |
| "loss": 1.8193, |
| "step": 16910 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 4.837928771972656, |
| "learning_rate": 1.8422918999626728e-05, |
| "loss": 1.8644, |
| "step": 16920 |
| }, |
| { |
| "epoch": 1.8958566629339306, |
| "grad_norm": 9.403674125671387, |
| "learning_rate": 1.8404255319148936e-05, |
| "loss": 1.7544, |
| "step": 16930 |
| }, |
| { |
| "epoch": 1.896976483762598, |
| "grad_norm": 5.102954387664795, |
| "learning_rate": 1.8385591638671144e-05, |
| "loss": 1.8039, |
| "step": 16940 |
| }, |
| { |
| "epoch": 1.8980963045912653, |
| "grad_norm": 13.829090118408203, |
| "learning_rate": 1.8366927958193356e-05, |
| "loss": 2.3956, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.8992161254199327, |
| "grad_norm": 14.055281639099121, |
| "learning_rate": 1.8348264277715564e-05, |
| "loss": 1.952, |
| "step": 16960 |
| }, |
| { |
| "epoch": 1.9003359462486002, |
| "grad_norm": 4.856631278991699, |
| "learning_rate": 1.8329600597237776e-05, |
| "loss": 1.8025, |
| "step": 16970 |
| }, |
| { |
| "epoch": 1.9014557670772676, |
| "grad_norm": 5.615917205810547, |
| "learning_rate": 1.8310936916759984e-05, |
| "loss": 2.4083, |
| "step": 16980 |
| }, |
| { |
| "epoch": 1.9025755879059352, |
| "grad_norm": 4.638927459716797, |
| "learning_rate": 1.8292273236282196e-05, |
| "loss": 1.7921, |
| "step": 16990 |
| }, |
| { |
| "epoch": 1.9036954087346025, |
| "grad_norm": 3.5314502716064453, |
| "learning_rate": 1.8273609555804404e-05, |
| "loss": 1.8788, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.9048152295632699, |
| "grad_norm": 6.3414506912231445, |
| "learning_rate": 1.8254945875326616e-05, |
| "loss": 1.8657, |
| "step": 17010 |
| }, |
| { |
| "epoch": 1.9059350503919372, |
| "grad_norm": 8.888124465942383, |
| "learning_rate": 1.8236282194848824e-05, |
| "loss": 2.4341, |
| "step": 17020 |
| }, |
| { |
| "epoch": 1.9070548712206046, |
| "grad_norm": 5.071857929229736, |
| "learning_rate": 1.8217618514371036e-05, |
| "loss": 2.0112, |
| "step": 17030 |
| }, |
| { |
| "epoch": 1.9081746920492721, |
| "grad_norm": 3.5548458099365234, |
| "learning_rate": 1.8198954833893244e-05, |
| "loss": 2.0355, |
| "step": 17040 |
| }, |
| { |
| "epoch": 1.9092945128779395, |
| "grad_norm": 13.80466079711914, |
| "learning_rate": 1.8180291153415456e-05, |
| "loss": 2.532, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.910414333706607, |
| "grad_norm": 4.249703407287598, |
| "learning_rate": 1.8161627472937664e-05, |
| "loss": 1.9727, |
| "step": 17060 |
| }, |
| { |
| "epoch": 1.9115341545352744, |
| "grad_norm": 4.494642734527588, |
| "learning_rate": 1.8142963792459876e-05, |
| "loss": 2.0125, |
| "step": 17070 |
| }, |
| { |
| "epoch": 1.9126539753639418, |
| "grad_norm": 5.063194274902344, |
| "learning_rate": 1.8124300111982084e-05, |
| "loss": 1.647, |
| "step": 17080 |
| }, |
| { |
| "epoch": 1.9137737961926091, |
| "grad_norm": 9.803994178771973, |
| "learning_rate": 1.8105636431504293e-05, |
| "loss": 2.1401, |
| "step": 17090 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 7.283653736114502, |
| "learning_rate": 1.8086972751026504e-05, |
| "loss": 1.9052, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.916013437849944, |
| "grad_norm": 11.359768867492676, |
| "learning_rate": 1.8068309070548713e-05, |
| "loss": 2.0145, |
| "step": 17110 |
| }, |
| { |
| "epoch": 1.9171332586786114, |
| "grad_norm": 10.177249908447266, |
| "learning_rate": 1.8049645390070924e-05, |
| "loss": 1.7666, |
| "step": 17120 |
| }, |
| { |
| "epoch": 1.918253079507279, |
| "grad_norm": 5.568352699279785, |
| "learning_rate": 1.8030981709593133e-05, |
| "loss": 1.7245, |
| "step": 17130 |
| }, |
| { |
| "epoch": 1.9193729003359463, |
| "grad_norm": 19.79357147216797, |
| "learning_rate": 1.8012318029115344e-05, |
| "loss": 2.4217, |
| "step": 17140 |
| }, |
| { |
| "epoch": 1.9204927211646137, |
| "grad_norm": 10.292594909667969, |
| "learning_rate": 1.799365434863755e-05, |
| "loss": 1.4904, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.921612541993281, |
| "grad_norm": 3.906355381011963, |
| "learning_rate": 1.797499066815976e-05, |
| "loss": 2.4061, |
| "step": 17160 |
| }, |
| { |
| "epoch": 1.9227323628219484, |
| "grad_norm": 10.06027889251709, |
| "learning_rate": 1.795632698768197e-05, |
| "loss": 1.826, |
| "step": 17170 |
| }, |
| { |
| "epoch": 1.923852183650616, |
| "grad_norm": 3.928687572479248, |
| "learning_rate": 1.793766330720418e-05, |
| "loss": 1.9778, |
| "step": 17180 |
| }, |
| { |
| "epoch": 1.9249720044792833, |
| "grad_norm": 11.147214889526367, |
| "learning_rate": 1.791899962672639e-05, |
| "loss": 1.6609, |
| "step": 17190 |
| }, |
| { |
| "epoch": 1.9260918253079509, |
| "grad_norm": 5.292778968811035, |
| "learning_rate": 1.79003359462486e-05, |
| "loss": 1.581, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.9272116461366182, |
| "grad_norm": 5.773550987243652, |
| "learning_rate": 1.788167226577081e-05, |
| "loss": 1.5817, |
| "step": 17210 |
| }, |
| { |
| "epoch": 1.9283314669652856, |
| "grad_norm": 14.817527770996094, |
| "learning_rate": 1.786300858529302e-05, |
| "loss": 1.9826, |
| "step": 17220 |
| }, |
| { |
| "epoch": 1.929451287793953, |
| "grad_norm": 6.223337173461914, |
| "learning_rate": 1.784434490481523e-05, |
| "loss": 2.1578, |
| "step": 17230 |
| }, |
| { |
| "epoch": 1.9305711086226203, |
| "grad_norm": 4.402294158935547, |
| "learning_rate": 1.782568122433744e-05, |
| "loss": 2.2318, |
| "step": 17240 |
| }, |
| { |
| "epoch": 1.9316909294512878, |
| "grad_norm": 7.321905136108398, |
| "learning_rate": 1.780701754385965e-05, |
| "loss": 1.6848, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.9328107502799552, |
| "grad_norm": 14.152067184448242, |
| "learning_rate": 1.778835386338186e-05, |
| "loss": 2.5409, |
| "step": 17260 |
| }, |
| { |
| "epoch": 1.9339305711086228, |
| "grad_norm": 12.283940315246582, |
| "learning_rate": 1.776969018290407e-05, |
| "loss": 1.9965, |
| "step": 17270 |
| }, |
| { |
| "epoch": 1.9350503919372901, |
| "grad_norm": 8.56460189819336, |
| "learning_rate": 1.775102650242628e-05, |
| "loss": 1.6021, |
| "step": 17280 |
| }, |
| { |
| "epoch": 1.9361702127659575, |
| "grad_norm": 4.710309982299805, |
| "learning_rate": 1.773236282194849e-05, |
| "loss": 2.2814, |
| "step": 17290 |
| }, |
| { |
| "epoch": 1.9372900335946248, |
| "grad_norm": 12.966391563415527, |
| "learning_rate": 1.77136991414707e-05, |
| "loss": 2.172, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.9384098544232922, |
| "grad_norm": 16.88652229309082, |
| "learning_rate": 1.769503546099291e-05, |
| "loss": 2.029, |
| "step": 17310 |
| }, |
| { |
| "epoch": 1.9395296752519597, |
| "grad_norm": 8.579212188720703, |
| "learning_rate": 1.7676371780515118e-05, |
| "loss": 1.47, |
| "step": 17320 |
| }, |
| { |
| "epoch": 1.940649496080627, |
| "grad_norm": 14.837044715881348, |
| "learning_rate": 1.765770810003733e-05, |
| "loss": 1.9022, |
| "step": 17330 |
| }, |
| { |
| "epoch": 1.9417693169092947, |
| "grad_norm": 14.231435775756836, |
| "learning_rate": 1.7639044419559538e-05, |
| "loss": 2.4319, |
| "step": 17340 |
| }, |
| { |
| "epoch": 1.942889137737962, |
| "grad_norm": 6.342057704925537, |
| "learning_rate": 1.762038073908175e-05, |
| "loss": 1.8036, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.9440089585666294, |
| "grad_norm": 4.24338436126709, |
| "learning_rate": 1.7601717058603958e-05, |
| "loss": 1.9694, |
| "step": 17360 |
| }, |
| { |
| "epoch": 1.9451287793952967, |
| "grad_norm": 5.161984920501709, |
| "learning_rate": 1.758305337812617e-05, |
| "loss": 1.7682, |
| "step": 17370 |
| }, |
| { |
| "epoch": 1.946248600223964, |
| "grad_norm": 7.184517860412598, |
| "learning_rate": 1.7564389697648374e-05, |
| "loss": 1.5724, |
| "step": 17380 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 7.037195682525635, |
| "learning_rate": 1.7545726017170586e-05, |
| "loss": 1.6202, |
| "step": 17390 |
| }, |
| { |
| "epoch": 1.948488241881299, |
| "grad_norm": 7.23237419128418, |
| "learning_rate": 1.7527062336692794e-05, |
| "loss": 1.7879, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.9496080627099666, |
| "grad_norm": 4.513615131378174, |
| "learning_rate": 1.7508398656215006e-05, |
| "loss": 2.1071, |
| "step": 17410 |
| }, |
| { |
| "epoch": 1.950727883538634, |
| "grad_norm": 14.149372100830078, |
| "learning_rate": 1.7489734975737214e-05, |
| "loss": 1.8393, |
| "step": 17420 |
| }, |
| { |
| "epoch": 1.9518477043673013, |
| "grad_norm": 3.9815926551818848, |
| "learning_rate": 1.7471071295259426e-05, |
| "loss": 1.9888, |
| "step": 17430 |
| }, |
| { |
| "epoch": 1.9529675251959686, |
| "grad_norm": 15.270926475524902, |
| "learning_rate": 1.7452407614781634e-05, |
| "loss": 2.2696, |
| "step": 17440 |
| }, |
| { |
| "epoch": 1.954087346024636, |
| "grad_norm": 7.519197940826416, |
| "learning_rate": 1.7433743934303846e-05, |
| "loss": 2.0431, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.9552071668533033, |
| "grad_norm": 4.564593315124512, |
| "learning_rate": 1.7415080253826054e-05, |
| "loss": 1.7656, |
| "step": 17460 |
| }, |
| { |
| "epoch": 1.9563269876819709, |
| "grad_norm": 9.020241737365723, |
| "learning_rate": 1.7396416573348266e-05, |
| "loss": 1.9072, |
| "step": 17470 |
| }, |
| { |
| "epoch": 1.9574468085106385, |
| "grad_norm": 10.36052131652832, |
| "learning_rate": 1.7377752892870474e-05, |
| "loss": 1.7954, |
| "step": 17480 |
| }, |
| { |
| "epoch": 1.9585666293393058, |
| "grad_norm": 5.499046802520752, |
| "learning_rate": 1.7359089212392686e-05, |
| "loss": 2.2032, |
| "step": 17490 |
| }, |
| { |
| "epoch": 1.9596864501679732, |
| "grad_norm": 10.584261894226074, |
| "learning_rate": 1.7340425531914894e-05, |
| "loss": 2.2121, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.9608062709966405, |
| "grad_norm": 4.816810131072998, |
| "learning_rate": 1.7321761851437106e-05, |
| "loss": 1.9277, |
| "step": 17510 |
| }, |
| { |
| "epoch": 1.9619260918253079, |
| "grad_norm": 5.484105110168457, |
| "learning_rate": 1.7303098170959314e-05, |
| "loss": 1.7758, |
| "step": 17520 |
| }, |
| { |
| "epoch": 1.9630459126539752, |
| "grad_norm": 12.183406829833984, |
| "learning_rate": 1.7284434490481523e-05, |
| "loss": 2.0495, |
| "step": 17530 |
| }, |
| { |
| "epoch": 1.9641657334826428, |
| "grad_norm": 5.112043380737305, |
| "learning_rate": 1.7265770810003734e-05, |
| "loss": 1.9576, |
| "step": 17540 |
| }, |
| { |
| "epoch": 1.9652855543113104, |
| "grad_norm": 4.796443939208984, |
| "learning_rate": 1.7247107129525943e-05, |
| "loss": 2.0039, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.9664053751399777, |
| "grad_norm": 4.247778415679932, |
| "learning_rate": 1.7228443449048154e-05, |
| "loss": 1.7593, |
| "step": 17560 |
| }, |
| { |
| "epoch": 1.967525195968645, |
| "grad_norm": 4.498353481292725, |
| "learning_rate": 1.7209779768570363e-05, |
| "loss": 1.904, |
| "step": 17570 |
| }, |
| { |
| "epoch": 1.9686450167973124, |
| "grad_norm": 12.006962776184082, |
| "learning_rate": 1.7191116088092574e-05, |
| "loss": 1.6231, |
| "step": 17580 |
| }, |
| { |
| "epoch": 1.9697648376259798, |
| "grad_norm": 4.842081069946289, |
| "learning_rate": 1.7172452407614783e-05, |
| "loss": 2.0985, |
| "step": 17590 |
| }, |
| { |
| "epoch": 1.970884658454647, |
| "grad_norm": 15.521842956542969, |
| "learning_rate": 1.7153788727136994e-05, |
| "loss": 2.1864, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.9720044792833147, |
| "grad_norm": 9.48452091217041, |
| "learning_rate": 1.7135125046659203e-05, |
| "loss": 1.8124, |
| "step": 17610 |
| }, |
| { |
| "epoch": 1.973124300111982, |
| "grad_norm": 4.017796993255615, |
| "learning_rate": 1.711646136618141e-05, |
| "loss": 2.0427, |
| "step": 17620 |
| }, |
| { |
| "epoch": 1.9742441209406496, |
| "grad_norm": 3.9394009113311768, |
| "learning_rate": 1.709779768570362e-05, |
| "loss": 2.0485, |
| "step": 17630 |
| }, |
| { |
| "epoch": 1.975363941769317, |
| "grad_norm": 14.145578384399414, |
| "learning_rate": 1.707913400522583e-05, |
| "loss": 1.5402, |
| "step": 17640 |
| }, |
| { |
| "epoch": 1.9764837625979843, |
| "grad_norm": 4.282801628112793, |
| "learning_rate": 1.706047032474804e-05, |
| "loss": 1.8805, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.9776035834266517, |
| "grad_norm": 4.898009300231934, |
| "learning_rate": 1.704180664427025e-05, |
| "loss": 2.1731, |
| "step": 17660 |
| }, |
| { |
| "epoch": 1.978723404255319, |
| "grad_norm": 4.910828590393066, |
| "learning_rate": 1.702314296379246e-05, |
| "loss": 2.0025, |
| "step": 17670 |
| }, |
| { |
| "epoch": 1.9798432250839866, |
| "grad_norm": 15.03659725189209, |
| "learning_rate": 1.700447928331467e-05, |
| "loss": 2.3384, |
| "step": 17680 |
| }, |
| { |
| "epoch": 1.980963045912654, |
| "grad_norm": 10.689837455749512, |
| "learning_rate": 1.698581560283688e-05, |
| "loss": 1.7127, |
| "step": 17690 |
| }, |
| { |
| "epoch": 1.9820828667413215, |
| "grad_norm": 10.339581489562988, |
| "learning_rate": 1.696715192235909e-05, |
| "loss": 2.0183, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.9832026875699889, |
| "grad_norm": 7.037674903869629, |
| "learning_rate": 1.69484882418813e-05, |
| "loss": 1.9685, |
| "step": 17710 |
| }, |
| { |
| "epoch": 1.9843225083986562, |
| "grad_norm": 14.190945625305176, |
| "learning_rate": 1.692982456140351e-05, |
| "loss": 1.9507, |
| "step": 17720 |
| }, |
| { |
| "epoch": 1.9854423292273236, |
| "grad_norm": 4.3056416511535645, |
| "learning_rate": 1.691116088092572e-05, |
| "loss": 1.9554, |
| "step": 17730 |
| }, |
| { |
| "epoch": 1.986562150055991, |
| "grad_norm": 14.68007755279541, |
| "learning_rate": 1.6892497200447928e-05, |
| "loss": 2.2227, |
| "step": 17740 |
| }, |
| { |
| "epoch": 1.9876819708846585, |
| "grad_norm": 4.058879852294922, |
| "learning_rate": 1.687383351997014e-05, |
| "loss": 1.9667, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.9888017917133258, |
| "grad_norm": 8.660399436950684, |
| "learning_rate": 1.6855169839492348e-05, |
| "loss": 2.0649, |
| "step": 17760 |
| }, |
| { |
| "epoch": 1.9899216125419934, |
| "grad_norm": 11.349140167236328, |
| "learning_rate": 1.683650615901456e-05, |
| "loss": 1.438, |
| "step": 17770 |
| }, |
| { |
| "epoch": 1.9910414333706608, |
| "grad_norm": 4.842729568481445, |
| "learning_rate": 1.6817842478536768e-05, |
| "loss": 1.9431, |
| "step": 17780 |
| }, |
| { |
| "epoch": 1.992161254199328, |
| "grad_norm": 4.284554958343506, |
| "learning_rate": 1.679917879805898e-05, |
| "loss": 1.8363, |
| "step": 17790 |
| }, |
| { |
| "epoch": 1.9932810750279955, |
| "grad_norm": 6.62599515914917, |
| "learning_rate": 1.6780515117581188e-05, |
| "loss": 1.6479, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.9944008958566628, |
| "grad_norm": 12.138463973999023, |
| "learning_rate": 1.67618514371034e-05, |
| "loss": 1.9355, |
| "step": 17810 |
| }, |
| { |
| "epoch": 1.9955207166853304, |
| "grad_norm": 9.465065002441406, |
| "learning_rate": 1.6743187756625608e-05, |
| "loss": 2.1428, |
| "step": 17820 |
| }, |
| { |
| "epoch": 1.9966405375139977, |
| "grad_norm": 12.322503089904785, |
| "learning_rate": 1.672452407614782e-05, |
| "loss": 2.1444, |
| "step": 17830 |
| }, |
| { |
| "epoch": 1.9977603583426653, |
| "grad_norm": 9.275611877441406, |
| "learning_rate": 1.6705860395670028e-05, |
| "loss": 1.6226, |
| "step": 17840 |
| }, |
| { |
| "epoch": 1.9988801791713326, |
| "grad_norm": 4.5713982582092285, |
| "learning_rate": 1.6687196715192236e-05, |
| "loss": 1.9358, |
| "step": 17850 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.424788951873779, |
| "learning_rate": 1.6668533034714444e-05, |
| "loss": 1.604, |
| "step": 17860 |
| }, |
| { |
| "epoch": 2.0011198208286674, |
| "grad_norm": 15.465615272521973, |
| "learning_rate": 1.6649869354236656e-05, |
| "loss": 1.7584, |
| "step": 17870 |
| }, |
| { |
| "epoch": 2.0022396416573347, |
| "grad_norm": 5.3801116943359375, |
| "learning_rate": 1.6631205673758864e-05, |
| "loss": 2.4112, |
| "step": 17880 |
| }, |
| { |
| "epoch": 2.003359462486002, |
| "grad_norm": 9.771553993225098, |
| "learning_rate": 1.6612541993281076e-05, |
| "loss": 2.3079, |
| "step": 17890 |
| }, |
| { |
| "epoch": 2.00447928331467, |
| "grad_norm": 6.370817184448242, |
| "learning_rate": 1.6593878312803284e-05, |
| "loss": 1.4866, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.005599104143337, |
| "grad_norm": 5.146578311920166, |
| "learning_rate": 1.6575214632325496e-05, |
| "loss": 1.9364, |
| "step": 17910 |
| }, |
| { |
| "epoch": 2.0067189249720045, |
| "grad_norm": 6.970976829528809, |
| "learning_rate": 1.6556550951847704e-05, |
| "loss": 1.6884, |
| "step": 17920 |
| }, |
| { |
| "epoch": 2.007838745800672, |
| "grad_norm": 9.017516136169434, |
| "learning_rate": 1.6537887271369916e-05, |
| "loss": 1.4814, |
| "step": 17930 |
| }, |
| { |
| "epoch": 2.0089585666293392, |
| "grad_norm": 5.169244289398193, |
| "learning_rate": 1.6519223590892124e-05, |
| "loss": 1.9879, |
| "step": 17940 |
| }, |
| { |
| "epoch": 2.0100783874580066, |
| "grad_norm": 3.8840739727020264, |
| "learning_rate": 1.6500559910414336e-05, |
| "loss": 1.4718, |
| "step": 17950 |
| }, |
| { |
| "epoch": 2.011198208286674, |
| "grad_norm": 9.54129409790039, |
| "learning_rate": 1.6481896229936544e-05, |
| "loss": 1.709, |
| "step": 17960 |
| }, |
| { |
| "epoch": 2.0123180291153417, |
| "grad_norm": 4.5542192459106445, |
| "learning_rate": 1.6463232549458753e-05, |
| "loss": 2.0193, |
| "step": 17970 |
| }, |
| { |
| "epoch": 2.013437849944009, |
| "grad_norm": 4.2427144050598145, |
| "learning_rate": 1.6444568868980964e-05, |
| "loss": 2.2357, |
| "step": 17980 |
| }, |
| { |
| "epoch": 2.0145576707726764, |
| "grad_norm": 8.179814338684082, |
| "learning_rate": 1.6425905188503173e-05, |
| "loss": 1.8587, |
| "step": 17990 |
| }, |
| { |
| "epoch": 2.015677491601344, |
| "grad_norm": 14.023432731628418, |
| "learning_rate": 1.6407241508025384e-05, |
| "loss": 1.9654, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.016797312430011, |
| "grad_norm": 4.784511566162109, |
| "learning_rate": 1.6388577827547593e-05, |
| "loss": 1.9038, |
| "step": 18010 |
| }, |
| { |
| "epoch": 2.0179171332586785, |
| "grad_norm": 9.668645858764648, |
| "learning_rate": 1.6369914147069804e-05, |
| "loss": 2.2853, |
| "step": 18020 |
| }, |
| { |
| "epoch": 2.019036954087346, |
| "grad_norm": 5.6623005867004395, |
| "learning_rate": 1.6351250466592013e-05, |
| "loss": 2.1568, |
| "step": 18030 |
| }, |
| { |
| "epoch": 2.0201567749160136, |
| "grad_norm": 12.06014347076416, |
| "learning_rate": 1.6332586786114224e-05, |
| "loss": 1.9534, |
| "step": 18040 |
| }, |
| { |
| "epoch": 2.021276595744681, |
| "grad_norm": 12.4910249710083, |
| "learning_rate": 1.6313923105636433e-05, |
| "loss": 1.6823, |
| "step": 18050 |
| }, |
| { |
| "epoch": 2.0223964165733483, |
| "grad_norm": 12.419768333435059, |
| "learning_rate": 1.6295259425158644e-05, |
| "loss": 2.3436, |
| "step": 18060 |
| }, |
| { |
| "epoch": 2.0235162374020157, |
| "grad_norm": 4.12880802154541, |
| "learning_rate": 1.6276595744680853e-05, |
| "loss": 2.1389, |
| "step": 18070 |
| }, |
| { |
| "epoch": 2.024636058230683, |
| "grad_norm": 6.19962739944458, |
| "learning_rate": 1.625793206420306e-05, |
| "loss": 2.0253, |
| "step": 18080 |
| }, |
| { |
| "epoch": 2.0257558790593504, |
| "grad_norm": 4.155970573425293, |
| "learning_rate": 1.623926838372527e-05, |
| "loss": 1.5464, |
| "step": 18090 |
| }, |
| { |
| "epoch": 2.0268756998880177, |
| "grad_norm": 2.5858302116394043, |
| "learning_rate": 1.622060470324748e-05, |
| "loss": 1.8217, |
| "step": 18100 |
| }, |
| { |
| "epoch": 2.0279955207166855, |
| "grad_norm": 3.9286646842956543, |
| "learning_rate": 1.620194102276969e-05, |
| "loss": 1.3943, |
| "step": 18110 |
| }, |
| { |
| "epoch": 2.029115341545353, |
| "grad_norm": 12.073657035827637, |
| "learning_rate": 1.61832773422919e-05, |
| "loss": 2.0707, |
| "step": 18120 |
| }, |
| { |
| "epoch": 2.0302351623740202, |
| "grad_norm": 6.261038780212402, |
| "learning_rate": 1.616461366181411e-05, |
| "loss": 1.9036, |
| "step": 18130 |
| }, |
| { |
| "epoch": 2.0313549832026876, |
| "grad_norm": 3.7651288509368896, |
| "learning_rate": 1.614594998133632e-05, |
| "loss": 1.3601, |
| "step": 18140 |
| }, |
| { |
| "epoch": 2.032474804031355, |
| "grad_norm": 5.616112232208252, |
| "learning_rate": 1.612728630085853e-05, |
| "loss": 1.9249, |
| "step": 18150 |
| }, |
| { |
| "epoch": 2.0335946248600223, |
| "grad_norm": 14.19587230682373, |
| "learning_rate": 1.610862262038074e-05, |
| "loss": 1.9037, |
| "step": 18160 |
| }, |
| { |
| "epoch": 2.0347144456886896, |
| "grad_norm": 14.49325942993164, |
| "learning_rate": 1.608995893990295e-05, |
| "loss": 1.9606, |
| "step": 18170 |
| }, |
| { |
| "epoch": 2.0358342665173574, |
| "grad_norm": 5.4950270652771, |
| "learning_rate": 1.6071295259425158e-05, |
| "loss": 2.1976, |
| "step": 18180 |
| }, |
| { |
| "epoch": 2.036954087346025, |
| "grad_norm": 4.107669830322266, |
| "learning_rate": 1.605263157894737e-05, |
| "loss": 1.6963, |
| "step": 18190 |
| }, |
| { |
| "epoch": 2.038073908174692, |
| "grad_norm": 5.567134857177734, |
| "learning_rate": 1.6033967898469578e-05, |
| "loss": 1.639, |
| "step": 18200 |
| }, |
| { |
| "epoch": 2.0391937290033595, |
| "grad_norm": 18.579816818237305, |
| "learning_rate": 1.601530421799179e-05, |
| "loss": 1.9576, |
| "step": 18210 |
| }, |
| { |
| "epoch": 2.040313549832027, |
| "grad_norm": 11.057695388793945, |
| "learning_rate": 1.5996640537513998e-05, |
| "loss": 1.8883, |
| "step": 18220 |
| }, |
| { |
| "epoch": 2.041433370660694, |
| "grad_norm": 6.482846260070801, |
| "learning_rate": 1.597797685703621e-05, |
| "loss": 1.812, |
| "step": 18230 |
| }, |
| { |
| "epoch": 2.0425531914893615, |
| "grad_norm": 12.868412017822266, |
| "learning_rate": 1.5959313176558418e-05, |
| "loss": 2.1451, |
| "step": 18240 |
| }, |
| { |
| "epoch": 2.0436730123180293, |
| "grad_norm": 4.0791401863098145, |
| "learning_rate": 1.594064949608063e-05, |
| "loss": 1.974, |
| "step": 18250 |
| }, |
| { |
| "epoch": 2.0447928331466967, |
| "grad_norm": 6.537319660186768, |
| "learning_rate": 1.5921985815602838e-05, |
| "loss": 1.8334, |
| "step": 18260 |
| }, |
| { |
| "epoch": 2.045912653975364, |
| "grad_norm": 8.384710311889648, |
| "learning_rate": 1.590332213512505e-05, |
| "loss": 2.1852, |
| "step": 18270 |
| }, |
| { |
| "epoch": 2.0470324748040314, |
| "grad_norm": 11.995549201965332, |
| "learning_rate": 1.5884658454647258e-05, |
| "loss": 1.9999, |
| "step": 18280 |
| }, |
| { |
| "epoch": 2.0481522956326987, |
| "grad_norm": 11.57607650756836, |
| "learning_rate": 1.586599477416947e-05, |
| "loss": 2.0097, |
| "step": 18290 |
| }, |
| { |
| "epoch": 2.049272116461366, |
| "grad_norm": 21.388427734375, |
| "learning_rate": 1.5847331093691678e-05, |
| "loss": 2.0354, |
| "step": 18300 |
| }, |
| { |
| "epoch": 2.0503919372900334, |
| "grad_norm": 4.375351428985596, |
| "learning_rate": 1.582866741321389e-05, |
| "loss": 1.8486, |
| "step": 18310 |
| }, |
| { |
| "epoch": 2.051511758118701, |
| "grad_norm": 7.059999942779541, |
| "learning_rate": 1.5810003732736094e-05, |
| "loss": 1.7516, |
| "step": 18320 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 4.776148796081543, |
| "learning_rate": 1.5791340052258306e-05, |
| "loss": 1.8479, |
| "step": 18330 |
| }, |
| { |
| "epoch": 2.053751399776036, |
| "grad_norm": 13.596695899963379, |
| "learning_rate": 1.5772676371780514e-05, |
| "loss": 1.9489, |
| "step": 18340 |
| }, |
| { |
| "epoch": 2.0548712206047033, |
| "grad_norm": 15.971503257751465, |
| "learning_rate": 1.5754012691302726e-05, |
| "loss": 2.1302, |
| "step": 18350 |
| }, |
| { |
| "epoch": 2.0559910414333706, |
| "grad_norm": 5.559121131896973, |
| "learning_rate": 1.5735349010824934e-05, |
| "loss": 1.8906, |
| "step": 18360 |
| }, |
| { |
| "epoch": 2.057110862262038, |
| "grad_norm": 11.740134239196777, |
| "learning_rate": 1.5716685330347146e-05, |
| "loss": 1.831, |
| "step": 18370 |
| }, |
| { |
| "epoch": 2.0582306830907053, |
| "grad_norm": 5.161749362945557, |
| "learning_rate": 1.5698021649869354e-05, |
| "loss": 2.0162, |
| "step": 18380 |
| }, |
| { |
| "epoch": 2.0593505039193727, |
| "grad_norm": 13.416109085083008, |
| "learning_rate": 1.5679357969391563e-05, |
| "loss": 2.0551, |
| "step": 18390 |
| }, |
| { |
| "epoch": 2.0604703247480405, |
| "grad_norm": 5.6357269287109375, |
| "learning_rate": 1.5660694288913774e-05, |
| "loss": 1.6993, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.061590145576708, |
| "grad_norm": 10.636037826538086, |
| "learning_rate": 1.5642030608435983e-05, |
| "loss": 1.8842, |
| "step": 18410 |
| }, |
| { |
| "epoch": 2.062709966405375, |
| "grad_norm": 14.341257095336914, |
| "learning_rate": 1.5623366927958194e-05, |
| "loss": 1.7778, |
| "step": 18420 |
| }, |
| { |
| "epoch": 2.0638297872340425, |
| "grad_norm": 7.4988322257995605, |
| "learning_rate": 1.5604703247480403e-05, |
| "loss": 1.7679, |
| "step": 18430 |
| }, |
| { |
| "epoch": 2.06494960806271, |
| "grad_norm": 12.500404357910156, |
| "learning_rate": 1.5586039567002614e-05, |
| "loss": 2.2651, |
| "step": 18440 |
| }, |
| { |
| "epoch": 2.0660694288913772, |
| "grad_norm": 5.027773380279541, |
| "learning_rate": 1.5567375886524823e-05, |
| "loss": 2.0421, |
| "step": 18450 |
| }, |
| { |
| "epoch": 2.0671892497200446, |
| "grad_norm": 10.962523460388184, |
| "learning_rate": 1.5548712206047034e-05, |
| "loss": 2.0656, |
| "step": 18460 |
| }, |
| { |
| "epoch": 2.0683090705487124, |
| "grad_norm": 2.7904582023620605, |
| "learning_rate": 1.5530048525569243e-05, |
| "loss": 1.9249, |
| "step": 18470 |
| }, |
| { |
| "epoch": 2.0694288913773797, |
| "grad_norm": 7.586933135986328, |
| "learning_rate": 1.5511384845091454e-05, |
| "loss": 2.0239, |
| "step": 18480 |
| }, |
| { |
| "epoch": 2.070548712206047, |
| "grad_norm": 4.160824775695801, |
| "learning_rate": 1.5492721164613663e-05, |
| "loss": 1.9334, |
| "step": 18490 |
| }, |
| { |
| "epoch": 2.0716685330347144, |
| "grad_norm": 7.0400471687316895, |
| "learning_rate": 1.5474057484135874e-05, |
| "loss": 1.8884, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.072788353863382, |
| "grad_norm": 7.399810314178467, |
| "learning_rate": 1.5455393803658083e-05, |
| "loss": 1.6476, |
| "step": 18510 |
| }, |
| { |
| "epoch": 2.073908174692049, |
| "grad_norm": 6.349668979644775, |
| "learning_rate": 1.5436730123180294e-05, |
| "loss": 1.8177, |
| "step": 18520 |
| }, |
| { |
| "epoch": 2.0750279955207165, |
| "grad_norm": 4.084234714508057, |
| "learning_rate": 1.5418066442702503e-05, |
| "loss": 1.4257, |
| "step": 18530 |
| }, |
| { |
| "epoch": 2.0761478163493843, |
| "grad_norm": 14.362452507019043, |
| "learning_rate": 1.5399402762224714e-05, |
| "loss": 2.0043, |
| "step": 18540 |
| }, |
| { |
| "epoch": 2.0772676371780516, |
| "grad_norm": 8.189460754394531, |
| "learning_rate": 1.538073908174692e-05, |
| "loss": 1.5747, |
| "step": 18550 |
| }, |
| { |
| "epoch": 2.078387458006719, |
| "grad_norm": 9.600176811218262, |
| "learning_rate": 1.536207540126913e-05, |
| "loss": 1.9165, |
| "step": 18560 |
| }, |
| { |
| "epoch": 2.0795072788353863, |
| "grad_norm": 8.519039154052734, |
| "learning_rate": 1.534341172079134e-05, |
| "loss": 1.8993, |
| "step": 18570 |
| }, |
| { |
| "epoch": 2.0806270996640537, |
| "grad_norm": 14.394335746765137, |
| "learning_rate": 1.532474804031355e-05, |
| "loss": 2.0525, |
| "step": 18580 |
| }, |
| { |
| "epoch": 2.081746920492721, |
| "grad_norm": 4.982779502868652, |
| "learning_rate": 1.530608435983576e-05, |
| "loss": 2.0925, |
| "step": 18590 |
| }, |
| { |
| "epoch": 2.0828667413213884, |
| "grad_norm": 15.897424697875977, |
| "learning_rate": 1.528742067935797e-05, |
| "loss": 1.7299, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.083986562150056, |
| "grad_norm": 12.037178993225098, |
| "learning_rate": 1.526875699888018e-05, |
| "loss": 1.7714, |
| "step": 18610 |
| }, |
| { |
| "epoch": 2.0851063829787235, |
| "grad_norm": 4.796445846557617, |
| "learning_rate": 1.525009331840239e-05, |
| "loss": 2.0635, |
| "step": 18620 |
| }, |
| { |
| "epoch": 2.086226203807391, |
| "grad_norm": 5.05470085144043, |
| "learning_rate": 1.52314296379246e-05, |
| "loss": 2.302, |
| "step": 18630 |
| }, |
| { |
| "epoch": 2.0873460246360582, |
| "grad_norm": 6.144739627838135, |
| "learning_rate": 1.521276595744681e-05, |
| "loss": 1.8878, |
| "step": 18640 |
| }, |
| { |
| "epoch": 2.0884658454647256, |
| "grad_norm": 5.468743801116943, |
| "learning_rate": 1.519410227696902e-05, |
| "loss": 2.1304, |
| "step": 18650 |
| }, |
| { |
| "epoch": 2.089585666293393, |
| "grad_norm": 6.490882873535156, |
| "learning_rate": 1.517543859649123e-05, |
| "loss": 1.913, |
| "step": 18660 |
| }, |
| { |
| "epoch": 2.0907054871220603, |
| "grad_norm": 5.277439117431641, |
| "learning_rate": 1.515677491601344e-05, |
| "loss": 1.9014, |
| "step": 18670 |
| }, |
| { |
| "epoch": 2.091825307950728, |
| "grad_norm": 5.904261112213135, |
| "learning_rate": 1.513811123553565e-05, |
| "loss": 1.963, |
| "step": 18680 |
| }, |
| { |
| "epoch": 2.0929451287793954, |
| "grad_norm": 10.203513145446777, |
| "learning_rate": 1.5119447555057858e-05, |
| "loss": 1.9816, |
| "step": 18690 |
| }, |
| { |
| "epoch": 2.0940649496080628, |
| "grad_norm": 13.511499404907227, |
| "learning_rate": 1.5100783874580068e-05, |
| "loss": 1.9453, |
| "step": 18700 |
| }, |
| { |
| "epoch": 2.09518477043673, |
| "grad_norm": 4.540700435638428, |
| "learning_rate": 1.5082120194102278e-05, |
| "loss": 1.7912, |
| "step": 18710 |
| }, |
| { |
| "epoch": 2.0963045912653975, |
| "grad_norm": 4.4151201248168945, |
| "learning_rate": 1.5063456513624488e-05, |
| "loss": 2.1123, |
| "step": 18720 |
| }, |
| { |
| "epoch": 2.097424412094065, |
| "grad_norm": 10.17835807800293, |
| "learning_rate": 1.5044792833146698e-05, |
| "loss": 2.0146, |
| "step": 18730 |
| }, |
| { |
| "epoch": 2.098544232922732, |
| "grad_norm": 5.04526424407959, |
| "learning_rate": 1.5026129152668908e-05, |
| "loss": 1.6177, |
| "step": 18740 |
| }, |
| { |
| "epoch": 2.0996640537514, |
| "grad_norm": 10.27774429321289, |
| "learning_rate": 1.5007465472191118e-05, |
| "loss": 2.0393, |
| "step": 18750 |
| }, |
| { |
| "epoch": 2.1007838745800673, |
| "grad_norm": 5.038769721984863, |
| "learning_rate": 1.4988801791713328e-05, |
| "loss": 1.741, |
| "step": 18760 |
| }, |
| { |
| "epoch": 2.1019036954087347, |
| "grad_norm": 9.592277526855469, |
| "learning_rate": 1.4970138111235538e-05, |
| "loss": 1.7934, |
| "step": 18770 |
| }, |
| { |
| "epoch": 2.103023516237402, |
| "grad_norm": 9.235641479492188, |
| "learning_rate": 1.4951474430757744e-05, |
| "loss": 1.7898, |
| "step": 18780 |
| }, |
| { |
| "epoch": 2.1041433370660694, |
| "grad_norm": 4.7292327880859375, |
| "learning_rate": 1.4932810750279954e-05, |
| "loss": 1.5251, |
| "step": 18790 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 9.268404006958008, |
| "learning_rate": 1.4914147069802164e-05, |
| "loss": 1.7425, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.106382978723404, |
| "grad_norm": 4.396312236785889, |
| "learning_rate": 1.4895483389324374e-05, |
| "loss": 1.9284, |
| "step": 18810 |
| }, |
| { |
| "epoch": 2.107502799552072, |
| "grad_norm": 5.53659725189209, |
| "learning_rate": 1.4876819708846584e-05, |
| "loss": 1.5367, |
| "step": 18820 |
| }, |
| { |
| "epoch": 2.108622620380739, |
| "grad_norm": 6.703355312347412, |
| "learning_rate": 1.4858156028368794e-05, |
| "loss": 1.808, |
| "step": 18830 |
| }, |
| { |
| "epoch": 2.1097424412094066, |
| "grad_norm": 13.882152557373047, |
| "learning_rate": 1.4839492347891004e-05, |
| "loss": 2.1221, |
| "step": 18840 |
| }, |
| { |
| "epoch": 2.110862262038074, |
| "grad_norm": 4.497895240783691, |
| "learning_rate": 1.4820828667413214e-05, |
| "loss": 1.5897, |
| "step": 18850 |
| }, |
| { |
| "epoch": 2.1119820828667413, |
| "grad_norm": 9.912936210632324, |
| "learning_rate": 1.4802164986935424e-05, |
| "loss": 1.9576, |
| "step": 18860 |
| }, |
| { |
| "epoch": 2.1131019036954086, |
| "grad_norm": 14.399587631225586, |
| "learning_rate": 1.4783501306457634e-05, |
| "loss": 1.9239, |
| "step": 18870 |
| }, |
| { |
| "epoch": 2.114221724524076, |
| "grad_norm": 17.03645133972168, |
| "learning_rate": 1.4764837625979844e-05, |
| "loss": 1.9142, |
| "step": 18880 |
| }, |
| { |
| "epoch": 2.1153415453527438, |
| "grad_norm": 12.911978721618652, |
| "learning_rate": 1.4746173945502054e-05, |
| "loss": 2.1077, |
| "step": 18890 |
| }, |
| { |
| "epoch": 2.116461366181411, |
| "grad_norm": 6.310555458068848, |
| "learning_rate": 1.4727510265024263e-05, |
| "loss": 1.6921, |
| "step": 18900 |
| }, |
| { |
| "epoch": 2.1175811870100785, |
| "grad_norm": 5.524637699127197, |
| "learning_rate": 1.4708846584546473e-05, |
| "loss": 1.7968, |
| "step": 18910 |
| }, |
| { |
| "epoch": 2.118701007838746, |
| "grad_norm": 12.02706527709961, |
| "learning_rate": 1.4690182904068683e-05, |
| "loss": 1.982, |
| "step": 18920 |
| }, |
| { |
| "epoch": 2.119820828667413, |
| "grad_norm": 16.045839309692383, |
| "learning_rate": 1.4671519223590893e-05, |
| "loss": 1.5663, |
| "step": 18930 |
| }, |
| { |
| "epoch": 2.1209406494960805, |
| "grad_norm": 5.100281715393066, |
| "learning_rate": 1.4652855543113103e-05, |
| "loss": 1.9206, |
| "step": 18940 |
| }, |
| { |
| "epoch": 2.122060470324748, |
| "grad_norm": 8.30729866027832, |
| "learning_rate": 1.4634191862635313e-05, |
| "loss": 1.9549, |
| "step": 18950 |
| }, |
| { |
| "epoch": 2.1231802911534157, |
| "grad_norm": 9.724970817565918, |
| "learning_rate": 1.4615528182157523e-05, |
| "loss": 1.8996, |
| "step": 18960 |
| }, |
| { |
| "epoch": 2.124300111982083, |
| "grad_norm": 9.640581130981445, |
| "learning_rate": 1.4596864501679733e-05, |
| "loss": 1.6712, |
| "step": 18970 |
| }, |
| { |
| "epoch": 2.1254199328107504, |
| "grad_norm": 7.71252965927124, |
| "learning_rate": 1.4578200821201943e-05, |
| "loss": 1.692, |
| "step": 18980 |
| }, |
| { |
| "epoch": 2.1265397536394177, |
| "grad_norm": 6.05610466003418, |
| "learning_rate": 1.4559537140724153e-05, |
| "loss": 1.7808, |
| "step": 18990 |
| }, |
| { |
| "epoch": 2.127659574468085, |
| "grad_norm": 12.274239540100098, |
| "learning_rate": 1.4540873460246363e-05, |
| "loss": 1.8396, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.1287793952967524, |
| "grad_norm": 5.31697416305542, |
| "learning_rate": 1.4522209779768573e-05, |
| "loss": 1.7804, |
| "step": 19010 |
| }, |
| { |
| "epoch": 2.1298992161254198, |
| "grad_norm": 19.778165817260742, |
| "learning_rate": 1.450354609929078e-05, |
| "loss": 1.6239, |
| "step": 19020 |
| }, |
| { |
| "epoch": 2.131019036954087, |
| "grad_norm": 4.198515892028809, |
| "learning_rate": 1.448488241881299e-05, |
| "loss": 1.7749, |
| "step": 19030 |
| }, |
| { |
| "epoch": 2.132138857782755, |
| "grad_norm": 5.769347667694092, |
| "learning_rate": 1.44662187383352e-05, |
| "loss": 1.7057, |
| "step": 19040 |
| }, |
| { |
| "epoch": 2.1332586786114223, |
| "grad_norm": 4.867179870605469, |
| "learning_rate": 1.444755505785741e-05, |
| "loss": 1.5719, |
| "step": 19050 |
| }, |
| { |
| "epoch": 2.1343784994400896, |
| "grad_norm": 4.64288854598999, |
| "learning_rate": 1.442889137737962e-05, |
| "loss": 1.8134, |
| "step": 19060 |
| }, |
| { |
| "epoch": 2.135498320268757, |
| "grad_norm": 5.441596031188965, |
| "learning_rate": 1.441022769690183e-05, |
| "loss": 2.0065, |
| "step": 19070 |
| }, |
| { |
| "epoch": 2.1366181410974243, |
| "grad_norm": 15.793349266052246, |
| "learning_rate": 1.439156401642404e-05, |
| "loss": 1.5416, |
| "step": 19080 |
| }, |
| { |
| "epoch": 2.1377379619260917, |
| "grad_norm": 9.388581275939941, |
| "learning_rate": 1.437290033594625e-05, |
| "loss": 1.9277, |
| "step": 19090 |
| }, |
| { |
| "epoch": 2.1388577827547595, |
| "grad_norm": 3.7332279682159424, |
| "learning_rate": 1.435423665546846e-05, |
| "loss": 1.7587, |
| "step": 19100 |
| }, |
| { |
| "epoch": 2.139977603583427, |
| "grad_norm": 4.11780309677124, |
| "learning_rate": 1.433557297499067e-05, |
| "loss": 2.2728, |
| "step": 19110 |
| }, |
| { |
| "epoch": 2.141097424412094, |
| "grad_norm": 4.731024742126465, |
| "learning_rate": 1.4316909294512878e-05, |
| "loss": 1.7996, |
| "step": 19120 |
| }, |
| { |
| "epoch": 2.1422172452407615, |
| "grad_norm": 20.14070701599121, |
| "learning_rate": 1.4298245614035088e-05, |
| "loss": 1.6665, |
| "step": 19130 |
| }, |
| { |
| "epoch": 2.143337066069429, |
| "grad_norm": 5.043517589569092, |
| "learning_rate": 1.4279581933557298e-05, |
| "loss": 1.8023, |
| "step": 19140 |
| }, |
| { |
| "epoch": 2.144456886898096, |
| "grad_norm": 15.140097618103027, |
| "learning_rate": 1.4260918253079508e-05, |
| "loss": 1.9381, |
| "step": 19150 |
| }, |
| { |
| "epoch": 2.1455767077267636, |
| "grad_norm": 5.910915374755859, |
| "learning_rate": 1.4242254572601718e-05, |
| "loss": 1.5582, |
| "step": 19160 |
| }, |
| { |
| "epoch": 2.146696528555431, |
| "grad_norm": 4.935706615447998, |
| "learning_rate": 1.4223590892123928e-05, |
| "loss": 1.6278, |
| "step": 19170 |
| }, |
| { |
| "epoch": 2.1478163493840987, |
| "grad_norm": 7.664555549621582, |
| "learning_rate": 1.4204927211646138e-05, |
| "loss": 2.1267, |
| "step": 19180 |
| }, |
| { |
| "epoch": 2.148936170212766, |
| "grad_norm": 5.317629337310791, |
| "learning_rate": 1.4186263531168348e-05, |
| "loss": 1.6991, |
| "step": 19190 |
| }, |
| { |
| "epoch": 2.1500559910414334, |
| "grad_norm": 8.390786170959473, |
| "learning_rate": 1.4167599850690558e-05, |
| "loss": 1.666, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.1511758118701008, |
| "grad_norm": 4.987608432769775, |
| "learning_rate": 1.4148936170212768e-05, |
| "loss": 1.9875, |
| "step": 19210 |
| }, |
| { |
| "epoch": 2.152295632698768, |
| "grad_norm": 13.22926139831543, |
| "learning_rate": 1.4130272489734978e-05, |
| "loss": 2.5654, |
| "step": 19220 |
| }, |
| { |
| "epoch": 2.1534154535274355, |
| "grad_norm": 5.794547080993652, |
| "learning_rate": 1.4111608809257188e-05, |
| "loss": 1.5586, |
| "step": 19230 |
| }, |
| { |
| "epoch": 2.1545352743561033, |
| "grad_norm": 18.272071838378906, |
| "learning_rate": 1.4092945128779398e-05, |
| "loss": 1.9604, |
| "step": 19240 |
| }, |
| { |
| "epoch": 2.1556550951847706, |
| "grad_norm": 8.715819358825684, |
| "learning_rate": 1.4074281448301604e-05, |
| "loss": 2.167, |
| "step": 19250 |
| }, |
| { |
| "epoch": 2.156774916013438, |
| "grad_norm": 5.621458530426025, |
| "learning_rate": 1.4055617767823814e-05, |
| "loss": 1.7484, |
| "step": 19260 |
| }, |
| { |
| "epoch": 2.1578947368421053, |
| "grad_norm": 5.077019214630127, |
| "learning_rate": 1.4036954087346024e-05, |
| "loss": 1.7654, |
| "step": 19270 |
| }, |
| { |
| "epoch": 2.1590145576707727, |
| "grad_norm": 4.050748825073242, |
| "learning_rate": 1.4018290406868234e-05, |
| "loss": 1.594, |
| "step": 19280 |
| }, |
| { |
| "epoch": 2.16013437849944, |
| "grad_norm": 8.234112739562988, |
| "learning_rate": 1.3999626726390444e-05, |
| "loss": 2.0443, |
| "step": 19290 |
| }, |
| { |
| "epoch": 2.1612541993281074, |
| "grad_norm": 9.419720649719238, |
| "learning_rate": 1.3980963045912654e-05, |
| "loss": 2.0494, |
| "step": 19300 |
| }, |
| { |
| "epoch": 2.1623740201567747, |
| "grad_norm": 5.222434997558594, |
| "learning_rate": 1.3962299365434864e-05, |
| "loss": 2.1207, |
| "step": 19310 |
| }, |
| { |
| "epoch": 2.1634938409854425, |
| "grad_norm": 4.949707508087158, |
| "learning_rate": 1.3943635684957074e-05, |
| "loss": 1.9951, |
| "step": 19320 |
| }, |
| { |
| "epoch": 2.16461366181411, |
| "grad_norm": 5.496902942657471, |
| "learning_rate": 1.3924972004479284e-05, |
| "loss": 2.0267, |
| "step": 19330 |
| }, |
| { |
| "epoch": 2.165733482642777, |
| "grad_norm": 21.034757614135742, |
| "learning_rate": 1.3906308324001493e-05, |
| "loss": 1.8192, |
| "step": 19340 |
| }, |
| { |
| "epoch": 2.1668533034714446, |
| "grad_norm": 16.238187789916992, |
| "learning_rate": 1.3887644643523703e-05, |
| "loss": 1.9281, |
| "step": 19350 |
| }, |
| { |
| "epoch": 2.167973124300112, |
| "grad_norm": 5.808258056640625, |
| "learning_rate": 1.3868980963045913e-05, |
| "loss": 1.7075, |
| "step": 19360 |
| }, |
| { |
| "epoch": 2.1690929451287793, |
| "grad_norm": 4.748766899108887, |
| "learning_rate": 1.3850317282568123e-05, |
| "loss": 2.0612, |
| "step": 19370 |
| }, |
| { |
| "epoch": 2.1702127659574466, |
| "grad_norm": 6.410683631896973, |
| "learning_rate": 1.3831653602090333e-05, |
| "loss": 1.8451, |
| "step": 19380 |
| }, |
| { |
| "epoch": 2.1713325867861144, |
| "grad_norm": 9.00479507446289, |
| "learning_rate": 1.3812989921612543e-05, |
| "loss": 1.8231, |
| "step": 19390 |
| }, |
| { |
| "epoch": 2.1724524076147818, |
| "grad_norm": 10.912854194641113, |
| "learning_rate": 1.3794326241134753e-05, |
| "loss": 1.75, |
| "step": 19400 |
| }, |
| { |
| "epoch": 2.173572228443449, |
| "grad_norm": 4.568667888641357, |
| "learning_rate": 1.3775662560656963e-05, |
| "loss": 1.8237, |
| "step": 19410 |
| }, |
| { |
| "epoch": 2.1746920492721165, |
| "grad_norm": 6.0548930168151855, |
| "learning_rate": 1.3756998880179173e-05, |
| "loss": 2.0656, |
| "step": 19420 |
| }, |
| { |
| "epoch": 2.175811870100784, |
| "grad_norm": 6.4514031410217285, |
| "learning_rate": 1.3738335199701383e-05, |
| "loss": 1.8537, |
| "step": 19430 |
| }, |
| { |
| "epoch": 2.176931690929451, |
| "grad_norm": 7.510464668273926, |
| "learning_rate": 1.3719671519223593e-05, |
| "loss": 2.1201, |
| "step": 19440 |
| }, |
| { |
| "epoch": 2.1780515117581185, |
| "grad_norm": 6.162042617797852, |
| "learning_rate": 1.3701007838745803e-05, |
| "loss": 1.9171, |
| "step": 19450 |
| }, |
| { |
| "epoch": 2.1791713325867863, |
| "grad_norm": 4.513441562652588, |
| "learning_rate": 1.3682344158268013e-05, |
| "loss": 1.6582, |
| "step": 19460 |
| }, |
| { |
| "epoch": 2.1802911534154537, |
| "grad_norm": 5.428256988525391, |
| "learning_rate": 1.3663680477790223e-05, |
| "loss": 1.807, |
| "step": 19470 |
| }, |
| { |
| "epoch": 2.181410974244121, |
| "grad_norm": 4.469424247741699, |
| "learning_rate": 1.364501679731243e-05, |
| "loss": 1.7969, |
| "step": 19480 |
| }, |
| { |
| "epoch": 2.1825307950727884, |
| "grad_norm": 18.87086296081543, |
| "learning_rate": 1.362635311683464e-05, |
| "loss": 2.0421, |
| "step": 19490 |
| }, |
| { |
| "epoch": 2.1836506159014557, |
| "grad_norm": 14.870500564575195, |
| "learning_rate": 1.360768943635685e-05, |
| "loss": 2.0896, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.184770436730123, |
| "grad_norm": 4.639315605163574, |
| "learning_rate": 1.358902575587906e-05, |
| "loss": 1.6858, |
| "step": 19510 |
| }, |
| { |
| "epoch": 2.1858902575587904, |
| "grad_norm": 5.512360572814941, |
| "learning_rate": 1.357036207540127e-05, |
| "loss": 1.7873, |
| "step": 19520 |
| }, |
| { |
| "epoch": 2.187010078387458, |
| "grad_norm": 4.680398464202881, |
| "learning_rate": 1.355169839492348e-05, |
| "loss": 1.7172, |
| "step": 19530 |
| }, |
| { |
| "epoch": 2.1881298992161256, |
| "grad_norm": 6.576661586761475, |
| "learning_rate": 1.353303471444569e-05, |
| "loss": 1.4995, |
| "step": 19540 |
| }, |
| { |
| "epoch": 2.189249720044793, |
| "grad_norm": 5.627395153045654, |
| "learning_rate": 1.3514371033967898e-05, |
| "loss": 2.0323, |
| "step": 19550 |
| }, |
| { |
| "epoch": 2.1903695408734603, |
| "grad_norm": 9.551543235778809, |
| "learning_rate": 1.3495707353490108e-05, |
| "loss": 1.9394, |
| "step": 19560 |
| }, |
| { |
| "epoch": 2.1914893617021276, |
| "grad_norm": 3.9927797317504883, |
| "learning_rate": 1.3477043673012318e-05, |
| "loss": 1.7925, |
| "step": 19570 |
| }, |
| { |
| "epoch": 2.192609182530795, |
| "grad_norm": 5.432565212249756, |
| "learning_rate": 1.3458379992534528e-05, |
| "loss": 1.9283, |
| "step": 19580 |
| }, |
| { |
| "epoch": 2.1937290033594623, |
| "grad_norm": 16.03640365600586, |
| "learning_rate": 1.3439716312056738e-05, |
| "loss": 1.554, |
| "step": 19590 |
| }, |
| { |
| "epoch": 2.19484882418813, |
| "grad_norm": 9.58271598815918, |
| "learning_rate": 1.3421052631578948e-05, |
| "loss": 2.2815, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.1959686450167974, |
| "grad_norm": 5.797165393829346, |
| "learning_rate": 1.3402388951101158e-05, |
| "loss": 2.107, |
| "step": 19610 |
| }, |
| { |
| "epoch": 2.197088465845465, |
| "grad_norm": 11.725902557373047, |
| "learning_rate": 1.3383725270623368e-05, |
| "loss": 1.8659, |
| "step": 19620 |
| }, |
| { |
| "epoch": 2.198208286674132, |
| "grad_norm": 16.76238441467285, |
| "learning_rate": 1.3365061590145578e-05, |
| "loss": 1.9411, |
| "step": 19630 |
| }, |
| { |
| "epoch": 2.1993281075027995, |
| "grad_norm": 4.064399242401123, |
| "learning_rate": 1.3346397909667788e-05, |
| "loss": 2.0996, |
| "step": 19640 |
| }, |
| { |
| "epoch": 2.200447928331467, |
| "grad_norm": 12.260157585144043, |
| "learning_rate": 1.3327734229189998e-05, |
| "loss": 1.8016, |
| "step": 19650 |
| }, |
| { |
| "epoch": 2.201567749160134, |
| "grad_norm": 4.968259811401367, |
| "learning_rate": 1.3309070548712208e-05, |
| "loss": 2.5253, |
| "step": 19660 |
| }, |
| { |
| "epoch": 2.202687569988802, |
| "grad_norm": 15.491079330444336, |
| "learning_rate": 1.3290406868234418e-05, |
| "loss": 1.9006, |
| "step": 19670 |
| }, |
| { |
| "epoch": 2.2038073908174693, |
| "grad_norm": 16.073698043823242, |
| "learning_rate": 1.3271743187756628e-05, |
| "loss": 2.1384, |
| "step": 19680 |
| }, |
| { |
| "epoch": 2.2049272116461367, |
| "grad_norm": 4.668467998504639, |
| "learning_rate": 1.3253079507278838e-05, |
| "loss": 1.7092, |
| "step": 19690 |
| }, |
| { |
| "epoch": 2.206047032474804, |
| "grad_norm": 16.72428321838379, |
| "learning_rate": 1.3234415826801048e-05, |
| "loss": 2.2848, |
| "step": 19700 |
| }, |
| { |
| "epoch": 2.2071668533034714, |
| "grad_norm": 16.04388999938965, |
| "learning_rate": 1.3215752146323254e-05, |
| "loss": 1.7384, |
| "step": 19710 |
| }, |
| { |
| "epoch": 2.2082866741321387, |
| "grad_norm": 7.498695373535156, |
| "learning_rate": 1.3197088465845464e-05, |
| "loss": 1.7484, |
| "step": 19720 |
| }, |
| { |
| "epoch": 2.209406494960806, |
| "grad_norm": 4.43148136138916, |
| "learning_rate": 1.3178424785367674e-05, |
| "loss": 1.845, |
| "step": 19730 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 3.5520262718200684, |
| "learning_rate": 1.3159761104889884e-05, |
| "loss": 2.0745, |
| "step": 19740 |
| }, |
| { |
| "epoch": 2.2116461366181412, |
| "grad_norm": 8.417689323425293, |
| "learning_rate": 1.3141097424412094e-05, |
| "loss": 2.0317, |
| "step": 19750 |
| }, |
| { |
| "epoch": 2.2127659574468086, |
| "grad_norm": 6.288638114929199, |
| "learning_rate": 1.3122433743934304e-05, |
| "loss": 1.9475, |
| "step": 19760 |
| }, |
| { |
| "epoch": 2.213885778275476, |
| "grad_norm": 9.5358304977417, |
| "learning_rate": 1.3103770063456513e-05, |
| "loss": 1.5572, |
| "step": 19770 |
| }, |
| { |
| "epoch": 2.2150055991041433, |
| "grad_norm": 6.784647464752197, |
| "learning_rate": 1.3085106382978723e-05, |
| "loss": 1.6474, |
| "step": 19780 |
| }, |
| { |
| "epoch": 2.2161254199328106, |
| "grad_norm": 6.584368705749512, |
| "learning_rate": 1.3066442702500933e-05, |
| "loss": 1.625, |
| "step": 19790 |
| }, |
| { |
| "epoch": 2.217245240761478, |
| "grad_norm": 10.06530475616455, |
| "learning_rate": 1.3047779022023143e-05, |
| "loss": 2.4165, |
| "step": 19800 |
| }, |
| { |
| "epoch": 2.218365061590146, |
| "grad_norm": 8.728497505187988, |
| "learning_rate": 1.3029115341545353e-05, |
| "loss": 2.192, |
| "step": 19810 |
| }, |
| { |
| "epoch": 2.219484882418813, |
| "grad_norm": 4.747127532958984, |
| "learning_rate": 1.3010451661067563e-05, |
| "loss": 1.8832, |
| "step": 19820 |
| }, |
| { |
| "epoch": 2.2206047032474805, |
| "grad_norm": 4.508890628814697, |
| "learning_rate": 1.2991787980589773e-05, |
| "loss": 1.9551, |
| "step": 19830 |
| }, |
| { |
| "epoch": 2.221724524076148, |
| "grad_norm": 9.029202461242676, |
| "learning_rate": 1.2973124300111983e-05, |
| "loss": 1.9141, |
| "step": 19840 |
| }, |
| { |
| "epoch": 2.222844344904815, |
| "grad_norm": 4.136125087738037, |
| "learning_rate": 1.2954460619634193e-05, |
| "loss": 1.9544, |
| "step": 19850 |
| }, |
| { |
| "epoch": 2.2239641657334825, |
| "grad_norm": 4.724370002746582, |
| "learning_rate": 1.2935796939156403e-05, |
| "loss": 1.5162, |
| "step": 19860 |
| }, |
| { |
| "epoch": 2.22508398656215, |
| "grad_norm": 5.846231937408447, |
| "learning_rate": 1.2917133258678613e-05, |
| "loss": 2.1023, |
| "step": 19870 |
| }, |
| { |
| "epoch": 2.2262038073908172, |
| "grad_norm": 5.567933082580566, |
| "learning_rate": 1.2898469578200823e-05, |
| "loss": 1.5832, |
| "step": 19880 |
| }, |
| { |
| "epoch": 2.227323628219485, |
| "grad_norm": 13.980506896972656, |
| "learning_rate": 1.2879805897723033e-05, |
| "loss": 1.5861, |
| "step": 19890 |
| }, |
| { |
| "epoch": 2.2284434490481524, |
| "grad_norm": 14.191877365112305, |
| "learning_rate": 1.2861142217245243e-05, |
| "loss": 1.6714, |
| "step": 19900 |
| }, |
| { |
| "epoch": 2.2295632698768197, |
| "grad_norm": 10.855998992919922, |
| "learning_rate": 1.2842478536767453e-05, |
| "loss": 1.9644, |
| "step": 19910 |
| }, |
| { |
| "epoch": 2.230683090705487, |
| "grad_norm": 5.852384090423584, |
| "learning_rate": 1.2823814856289663e-05, |
| "loss": 1.8953, |
| "step": 19920 |
| }, |
| { |
| "epoch": 2.2318029115341544, |
| "grad_norm": 5.915239334106445, |
| "learning_rate": 1.2805151175811871e-05, |
| "loss": 1.5698, |
| "step": 19930 |
| }, |
| { |
| "epoch": 2.232922732362822, |
| "grad_norm": 5.294043064117432, |
| "learning_rate": 1.2786487495334081e-05, |
| "loss": 2.0655, |
| "step": 19940 |
| }, |
| { |
| "epoch": 2.2340425531914896, |
| "grad_norm": 8.937568664550781, |
| "learning_rate": 1.276782381485629e-05, |
| "loss": 1.6406, |
| "step": 19950 |
| }, |
| { |
| "epoch": 2.235162374020157, |
| "grad_norm": 3.592744827270508, |
| "learning_rate": 1.27491601343785e-05, |
| "loss": 1.5429, |
| "step": 19960 |
| }, |
| { |
| "epoch": 2.2362821948488243, |
| "grad_norm": 5.134018898010254, |
| "learning_rate": 1.273049645390071e-05, |
| "loss": 2.1943, |
| "step": 19970 |
| }, |
| { |
| "epoch": 2.2374020156774916, |
| "grad_norm": 4.749664306640625, |
| "learning_rate": 1.271183277342292e-05, |
| "loss": 1.8952, |
| "step": 19980 |
| }, |
| { |
| "epoch": 2.238521836506159, |
| "grad_norm": 14.125395774841309, |
| "learning_rate": 1.2693169092945128e-05, |
| "loss": 1.9117, |
| "step": 19990 |
| }, |
| { |
| "epoch": 2.2396416573348263, |
| "grad_norm": 5.6524858474731445, |
| "learning_rate": 1.2674505412467338e-05, |
| "loss": 1.8199, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.2407614781634937, |
| "grad_norm": 9.836930274963379, |
| "learning_rate": 1.2655841731989548e-05, |
| "loss": 1.6642, |
| "step": 20010 |
| }, |
| { |
| "epoch": 2.241881298992161, |
| "grad_norm": 19.449764251708984, |
| "learning_rate": 1.2637178051511758e-05, |
| "loss": 1.9992, |
| "step": 20020 |
| }, |
| { |
| "epoch": 2.243001119820829, |
| "grad_norm": 6.832662105560303, |
| "learning_rate": 1.2618514371033968e-05, |
| "loss": 1.5874, |
| "step": 20030 |
| }, |
| { |
| "epoch": 2.244120940649496, |
| "grad_norm": 17.8643856048584, |
| "learning_rate": 1.2599850690556178e-05, |
| "loss": 2.1804, |
| "step": 20040 |
| }, |
| { |
| "epoch": 2.2452407614781635, |
| "grad_norm": 5.4305620193481445, |
| "learning_rate": 1.2581187010078388e-05, |
| "loss": 1.7595, |
| "step": 20050 |
| }, |
| { |
| "epoch": 2.246360582306831, |
| "grad_norm": 5.813434600830078, |
| "learning_rate": 1.2562523329600598e-05, |
| "loss": 1.8339, |
| "step": 20060 |
| }, |
| { |
| "epoch": 2.2474804031354982, |
| "grad_norm": 22.452621459960938, |
| "learning_rate": 1.2543859649122808e-05, |
| "loss": 2.0927, |
| "step": 20070 |
| }, |
| { |
| "epoch": 2.2486002239641656, |
| "grad_norm": 5.384066104888916, |
| "learning_rate": 1.2525195968645018e-05, |
| "loss": 1.8251, |
| "step": 20080 |
| }, |
| { |
| "epoch": 2.249720044792833, |
| "grad_norm": 16.19381332397461, |
| "learning_rate": 1.2506532288167228e-05, |
| "loss": 1.9519, |
| "step": 20090 |
| }, |
| { |
| "epoch": 2.2508398656215007, |
| "grad_norm": 5.359135627746582, |
| "learning_rate": 1.2487868607689438e-05, |
| "loss": 1.7556, |
| "step": 20100 |
| }, |
| { |
| "epoch": 2.251959686450168, |
| "grad_norm": 8.93488597869873, |
| "learning_rate": 1.2469204927211648e-05, |
| "loss": 2.0605, |
| "step": 20110 |
| }, |
| { |
| "epoch": 2.2530795072788354, |
| "grad_norm": 7.26114559173584, |
| "learning_rate": 1.2450541246733856e-05, |
| "loss": 1.9192, |
| "step": 20120 |
| }, |
| { |
| "epoch": 2.254199328107503, |
| "grad_norm": 10.906415939331055, |
| "learning_rate": 1.2431877566256066e-05, |
| "loss": 1.653, |
| "step": 20130 |
| }, |
| { |
| "epoch": 2.25531914893617, |
| "grad_norm": 5.915148735046387, |
| "learning_rate": 1.2413213885778276e-05, |
| "loss": 2.0476, |
| "step": 20140 |
| }, |
| { |
| "epoch": 2.2564389697648375, |
| "grad_norm": 10.197397232055664, |
| "learning_rate": 1.2394550205300486e-05, |
| "loss": 1.9027, |
| "step": 20150 |
| }, |
| { |
| "epoch": 2.257558790593505, |
| "grad_norm": 14.30677318572998, |
| "learning_rate": 1.2375886524822696e-05, |
| "loss": 1.4843, |
| "step": 20160 |
| }, |
| { |
| "epoch": 2.2586786114221726, |
| "grad_norm": 4.197308540344238, |
| "learning_rate": 1.2357222844344905e-05, |
| "loss": 1.9333, |
| "step": 20170 |
| }, |
| { |
| "epoch": 2.25979843225084, |
| "grad_norm": 6.416319847106934, |
| "learning_rate": 1.2338559163867115e-05, |
| "loss": 2.047, |
| "step": 20180 |
| }, |
| { |
| "epoch": 2.2609182530795073, |
| "grad_norm": 3.727569818496704, |
| "learning_rate": 1.2319895483389325e-05, |
| "loss": 2.0547, |
| "step": 20190 |
| }, |
| { |
| "epoch": 2.2620380739081747, |
| "grad_norm": 4.975082874298096, |
| "learning_rate": 1.2301231802911535e-05, |
| "loss": 1.9228, |
| "step": 20200 |
| }, |
| { |
| "epoch": 2.263157894736842, |
| "grad_norm": 10.25108528137207, |
| "learning_rate": 1.2282568122433745e-05, |
| "loss": 1.7067, |
| "step": 20210 |
| }, |
| { |
| "epoch": 2.2642777155655094, |
| "grad_norm": 6.914962291717529, |
| "learning_rate": 1.2263904441955955e-05, |
| "loss": 1.9841, |
| "step": 20220 |
| }, |
| { |
| "epoch": 2.265397536394177, |
| "grad_norm": 5.214871883392334, |
| "learning_rate": 1.2245240761478165e-05, |
| "loss": 1.6495, |
| "step": 20230 |
| }, |
| { |
| "epoch": 2.2665173572228445, |
| "grad_norm": 11.081496238708496, |
| "learning_rate": 1.2226577081000373e-05, |
| "loss": 1.9833, |
| "step": 20240 |
| }, |
| { |
| "epoch": 2.267637178051512, |
| "grad_norm": 7.861496448516846, |
| "learning_rate": 1.2207913400522583e-05, |
| "loss": 2.2553, |
| "step": 20250 |
| }, |
| { |
| "epoch": 2.2687569988801792, |
| "grad_norm": 4.18981409072876, |
| "learning_rate": 1.2189249720044793e-05, |
| "loss": 1.9001, |
| "step": 20260 |
| }, |
| { |
| "epoch": 2.2698768197088466, |
| "grad_norm": 15.897494316101074, |
| "learning_rate": 1.2170586039567003e-05, |
| "loss": 1.943, |
| "step": 20270 |
| }, |
| { |
| "epoch": 2.270996640537514, |
| "grad_norm": 14.775899887084961, |
| "learning_rate": 1.2151922359089213e-05, |
| "loss": 2.1118, |
| "step": 20280 |
| }, |
| { |
| "epoch": 2.2721164613661813, |
| "grad_norm": 10.079516410827637, |
| "learning_rate": 1.2133258678611423e-05, |
| "loss": 1.9167, |
| "step": 20290 |
| }, |
| { |
| "epoch": 2.2732362821948486, |
| "grad_norm": 4.894615173339844, |
| "learning_rate": 1.2114594998133633e-05, |
| "loss": 2.1881, |
| "step": 20300 |
| }, |
| { |
| "epoch": 2.2743561030235164, |
| "grad_norm": 16.707927703857422, |
| "learning_rate": 1.2095931317655843e-05, |
| "loss": 2.2987, |
| "step": 20310 |
| }, |
| { |
| "epoch": 2.275475923852184, |
| "grad_norm": 7.284656524658203, |
| "learning_rate": 1.2077267637178053e-05, |
| "loss": 1.7505, |
| "step": 20320 |
| }, |
| { |
| "epoch": 2.276595744680851, |
| "grad_norm": 5.649649143218994, |
| "learning_rate": 1.2058603956700263e-05, |
| "loss": 2.2449, |
| "step": 20330 |
| }, |
| { |
| "epoch": 2.2777155655095185, |
| "grad_norm": 5.512756824493408, |
| "learning_rate": 1.2039940276222473e-05, |
| "loss": 1.8765, |
| "step": 20340 |
| }, |
| { |
| "epoch": 2.278835386338186, |
| "grad_norm": 15.581269264221191, |
| "learning_rate": 1.2021276595744681e-05, |
| "loss": 2.1552, |
| "step": 20350 |
| }, |
| { |
| "epoch": 2.279955207166853, |
| "grad_norm": 13.498668670654297, |
| "learning_rate": 1.2002612915266891e-05, |
| "loss": 2.213, |
| "step": 20360 |
| }, |
| { |
| "epoch": 2.2810750279955205, |
| "grad_norm": 9.995055198669434, |
| "learning_rate": 1.1983949234789101e-05, |
| "loss": 2.079, |
| "step": 20370 |
| }, |
| { |
| "epoch": 2.2821948488241883, |
| "grad_norm": 3.790062189102173, |
| "learning_rate": 1.1965285554311311e-05, |
| "loss": 1.6197, |
| "step": 20380 |
| }, |
| { |
| "epoch": 2.2833146696528557, |
| "grad_norm": 4.261139392852783, |
| "learning_rate": 1.194662187383352e-05, |
| "loss": 2.0012, |
| "step": 20390 |
| }, |
| { |
| "epoch": 2.284434490481523, |
| "grad_norm": 3.16943621635437, |
| "learning_rate": 1.192795819335573e-05, |
| "loss": 1.9648, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.2855543113101904, |
| "grad_norm": 5.687836647033691, |
| "learning_rate": 1.190929451287794e-05, |
| "loss": 1.6828, |
| "step": 20410 |
| }, |
| { |
| "epoch": 2.2866741321388577, |
| "grad_norm": 5.451729774475098, |
| "learning_rate": 1.189063083240015e-05, |
| "loss": 2.2748, |
| "step": 20420 |
| }, |
| { |
| "epoch": 2.287793952967525, |
| "grad_norm": 10.777783393859863, |
| "learning_rate": 1.187196715192236e-05, |
| "loss": 2.0554, |
| "step": 20430 |
| }, |
| { |
| "epoch": 2.2889137737961924, |
| "grad_norm": 5.5777459144592285, |
| "learning_rate": 1.185330347144457e-05, |
| "loss": 1.9674, |
| "step": 20440 |
| }, |
| { |
| "epoch": 2.29003359462486, |
| "grad_norm": 11.55379581451416, |
| "learning_rate": 1.183463979096678e-05, |
| "loss": 1.8068, |
| "step": 20450 |
| }, |
| { |
| "epoch": 2.2911534154535276, |
| "grad_norm": 4.206369876861572, |
| "learning_rate": 1.181597611048899e-05, |
| "loss": 2.1127, |
| "step": 20460 |
| }, |
| { |
| "epoch": 2.292273236282195, |
| "grad_norm": 6.190250873565674, |
| "learning_rate": 1.1797312430011198e-05, |
| "loss": 2.1131, |
| "step": 20470 |
| }, |
| { |
| "epoch": 2.2933930571108623, |
| "grad_norm": 4.842706203460693, |
| "learning_rate": 1.1778648749533408e-05, |
| "loss": 1.7894, |
| "step": 20480 |
| }, |
| { |
| "epoch": 2.2945128779395296, |
| "grad_norm": 9.449223518371582, |
| "learning_rate": 1.1759985069055618e-05, |
| "loss": 1.7278, |
| "step": 20490 |
| }, |
| { |
| "epoch": 2.295632698768197, |
| "grad_norm": 11.598458290100098, |
| "learning_rate": 1.1741321388577828e-05, |
| "loss": 1.9876, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.2967525195968643, |
| "grad_norm": 4.6162238121032715, |
| "learning_rate": 1.1722657708100038e-05, |
| "loss": 1.4452, |
| "step": 20510 |
| }, |
| { |
| "epoch": 2.297872340425532, |
| "grad_norm": 17.614953994750977, |
| "learning_rate": 1.1703994027622248e-05, |
| "loss": 2.0197, |
| "step": 20520 |
| }, |
| { |
| "epoch": 2.2989921612541995, |
| "grad_norm": 13.69670295715332, |
| "learning_rate": 1.1685330347144458e-05, |
| "loss": 1.661, |
| "step": 20530 |
| }, |
| { |
| "epoch": 2.300111982082867, |
| "grad_norm": 3.7186923027038574, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 2.0852, |
| "step": 20540 |
| }, |
| { |
| "epoch": 2.301231802911534, |
| "grad_norm": 7.386007308959961, |
| "learning_rate": 1.1648002986188878e-05, |
| "loss": 1.8018, |
| "step": 20550 |
| }, |
| { |
| "epoch": 2.3023516237402015, |
| "grad_norm": 4.978532791137695, |
| "learning_rate": 1.1629339305711088e-05, |
| "loss": 1.8555, |
| "step": 20560 |
| }, |
| { |
| "epoch": 2.303471444568869, |
| "grad_norm": 11.135411262512207, |
| "learning_rate": 1.1610675625233298e-05, |
| "loss": 2.0469, |
| "step": 20570 |
| }, |
| { |
| "epoch": 2.3045912653975362, |
| "grad_norm": 4.908180236816406, |
| "learning_rate": 1.1592011944755506e-05, |
| "loss": 1.5782, |
| "step": 20580 |
| }, |
| { |
| "epoch": 2.3057110862262036, |
| "grad_norm": 9.538016319274902, |
| "learning_rate": 1.1573348264277716e-05, |
| "loss": 2.1164, |
| "step": 20590 |
| }, |
| { |
| "epoch": 2.3068309070548714, |
| "grad_norm": 10.626128196716309, |
| "learning_rate": 1.1554684583799926e-05, |
| "loss": 2.0624, |
| "step": 20600 |
| }, |
| { |
| "epoch": 2.3079507278835387, |
| "grad_norm": 6.66682243347168, |
| "learning_rate": 1.1536020903322135e-05, |
| "loss": 1.7405, |
| "step": 20610 |
| }, |
| { |
| "epoch": 2.309070548712206, |
| "grad_norm": 5.158255100250244, |
| "learning_rate": 1.1517357222844345e-05, |
| "loss": 1.9986, |
| "step": 20620 |
| }, |
| { |
| "epoch": 2.3101903695408734, |
| "grad_norm": 12.696172714233398, |
| "learning_rate": 1.1498693542366555e-05, |
| "loss": 1.9899, |
| "step": 20630 |
| }, |
| { |
| "epoch": 2.3113101903695408, |
| "grad_norm": 10.744148254394531, |
| "learning_rate": 1.1480029861888765e-05, |
| "loss": 2.0481, |
| "step": 20640 |
| }, |
| { |
| "epoch": 2.312430011198208, |
| "grad_norm": 10.337085723876953, |
| "learning_rate": 1.1461366181410975e-05, |
| "loss": 2.08, |
| "step": 20650 |
| }, |
| { |
| "epoch": 2.313549832026876, |
| "grad_norm": 9.263678550720215, |
| "learning_rate": 1.1442702500933185e-05, |
| "loss": 1.7892, |
| "step": 20660 |
| }, |
| { |
| "epoch": 2.3146696528555433, |
| "grad_norm": 12.324502944946289, |
| "learning_rate": 1.1424038820455395e-05, |
| "loss": 1.9377, |
| "step": 20670 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 6.701484203338623, |
| "learning_rate": 1.1405375139977605e-05, |
| "loss": 1.8667, |
| "step": 20680 |
| }, |
| { |
| "epoch": 2.316909294512878, |
| "grad_norm": 7.743568420410156, |
| "learning_rate": 1.1386711459499815e-05, |
| "loss": 1.5235, |
| "step": 20690 |
| }, |
| { |
| "epoch": 2.3180291153415453, |
| "grad_norm": 4.805120468139648, |
| "learning_rate": 1.1368047779022023e-05, |
| "loss": 1.8663, |
| "step": 20700 |
| }, |
| { |
| "epoch": 2.3191489361702127, |
| "grad_norm": 6.219134330749512, |
| "learning_rate": 1.1349384098544233e-05, |
| "loss": 1.5116, |
| "step": 20710 |
| }, |
| { |
| "epoch": 2.32026875699888, |
| "grad_norm": 12.677251815795898, |
| "learning_rate": 1.1330720418066443e-05, |
| "loss": 1.8188, |
| "step": 20720 |
| }, |
| { |
| "epoch": 2.3213885778275474, |
| "grad_norm": 17.74958038330078, |
| "learning_rate": 1.1312056737588653e-05, |
| "loss": 2.178, |
| "step": 20730 |
| }, |
| { |
| "epoch": 2.322508398656215, |
| "grad_norm": 11.15487289428711, |
| "learning_rate": 1.1293393057110863e-05, |
| "loss": 2.0488, |
| "step": 20740 |
| }, |
| { |
| "epoch": 2.3236282194848825, |
| "grad_norm": 15.36052417755127, |
| "learning_rate": 1.1274729376633073e-05, |
| "loss": 1.7865, |
| "step": 20750 |
| }, |
| { |
| "epoch": 2.32474804031355, |
| "grad_norm": 14.987112998962402, |
| "learning_rate": 1.1256065696155283e-05, |
| "loss": 1.6118, |
| "step": 20760 |
| }, |
| { |
| "epoch": 2.325867861142217, |
| "grad_norm": 4.916079998016357, |
| "learning_rate": 1.1237402015677493e-05, |
| "loss": 2.0262, |
| "step": 20770 |
| }, |
| { |
| "epoch": 2.3269876819708846, |
| "grad_norm": 20.49549102783203, |
| "learning_rate": 1.1218738335199703e-05, |
| "loss": 2.1032, |
| "step": 20780 |
| }, |
| { |
| "epoch": 2.328107502799552, |
| "grad_norm": 16.82073402404785, |
| "learning_rate": 1.1200074654721911e-05, |
| "loss": 2.0666, |
| "step": 20790 |
| }, |
| { |
| "epoch": 2.3292273236282197, |
| "grad_norm": 4.178780555725098, |
| "learning_rate": 1.1181410974244121e-05, |
| "loss": 1.8655, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.330347144456887, |
| "grad_norm": 16.733497619628906, |
| "learning_rate": 1.1162747293766331e-05, |
| "loss": 1.7731, |
| "step": 20810 |
| }, |
| { |
| "epoch": 2.3314669652855544, |
| "grad_norm": 5.161161422729492, |
| "learning_rate": 1.114408361328854e-05, |
| "loss": 1.6687, |
| "step": 20820 |
| }, |
| { |
| "epoch": 2.3325867861142218, |
| "grad_norm": 4.8293304443359375, |
| "learning_rate": 1.112541993281075e-05, |
| "loss": 2.1226, |
| "step": 20830 |
| }, |
| { |
| "epoch": 2.333706606942889, |
| "grad_norm": 9.590071678161621, |
| "learning_rate": 1.110675625233296e-05, |
| "loss": 1.528, |
| "step": 20840 |
| }, |
| { |
| "epoch": 2.3348264277715565, |
| "grad_norm": 6.294408321380615, |
| "learning_rate": 1.108809257185517e-05, |
| "loss": 1.73, |
| "step": 20850 |
| }, |
| { |
| "epoch": 2.335946248600224, |
| "grad_norm": 10.485013008117676, |
| "learning_rate": 1.106942889137738e-05, |
| "loss": 2.159, |
| "step": 20860 |
| }, |
| { |
| "epoch": 2.337066069428891, |
| "grad_norm": 4.454178333282471, |
| "learning_rate": 1.105076521089959e-05, |
| "loss": 1.6548, |
| "step": 20870 |
| }, |
| { |
| "epoch": 2.338185890257559, |
| "grad_norm": 5.956608772277832, |
| "learning_rate": 1.10321015304218e-05, |
| "loss": 1.5492, |
| "step": 20880 |
| }, |
| { |
| "epoch": 2.3393057110862263, |
| "grad_norm": 7.09451150894165, |
| "learning_rate": 1.101343784994401e-05, |
| "loss": 1.6076, |
| "step": 20890 |
| }, |
| { |
| "epoch": 2.3404255319148937, |
| "grad_norm": 13.640632629394531, |
| "learning_rate": 1.099477416946622e-05, |
| "loss": 2.6322, |
| "step": 20900 |
| }, |
| { |
| "epoch": 2.341545352743561, |
| "grad_norm": 13.958121299743652, |
| "learning_rate": 1.097611048898843e-05, |
| "loss": 2.269, |
| "step": 20910 |
| }, |
| { |
| "epoch": 2.3426651735722284, |
| "grad_norm": 4.459420680999756, |
| "learning_rate": 1.095744680851064e-05, |
| "loss": 2.1053, |
| "step": 20920 |
| }, |
| { |
| "epoch": 2.3437849944008957, |
| "grad_norm": 6.627596855163574, |
| "learning_rate": 1.093878312803285e-05, |
| "loss": 1.8687, |
| "step": 20930 |
| }, |
| { |
| "epoch": 2.3449048152295635, |
| "grad_norm": 11.237924575805664, |
| "learning_rate": 1.0920119447555058e-05, |
| "loss": 1.8006, |
| "step": 20940 |
| }, |
| { |
| "epoch": 2.346024636058231, |
| "grad_norm": 6.611232757568359, |
| "learning_rate": 1.0901455767077268e-05, |
| "loss": 1.7491, |
| "step": 20950 |
| }, |
| { |
| "epoch": 2.347144456886898, |
| "grad_norm": 4.241340160369873, |
| "learning_rate": 1.0882792086599478e-05, |
| "loss": 1.9278, |
| "step": 20960 |
| }, |
| { |
| "epoch": 2.3482642777155656, |
| "grad_norm": 5.34893274307251, |
| "learning_rate": 1.0864128406121688e-05, |
| "loss": 1.9011, |
| "step": 20970 |
| }, |
| { |
| "epoch": 2.349384098544233, |
| "grad_norm": 15.109663009643555, |
| "learning_rate": 1.0845464725643898e-05, |
| "loss": 1.8797, |
| "step": 20980 |
| }, |
| { |
| "epoch": 2.3505039193729003, |
| "grad_norm": 6.220200061798096, |
| "learning_rate": 1.0826801045166108e-05, |
| "loss": 2.1459, |
| "step": 20990 |
| }, |
| { |
| "epoch": 2.3516237402015676, |
| "grad_norm": 4.657541751861572, |
| "learning_rate": 1.0808137364688318e-05, |
| "loss": 1.7789, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.352743561030235, |
| "grad_norm": 3.8326404094696045, |
| "learning_rate": 1.0789473684210526e-05, |
| "loss": 2.0583, |
| "step": 21010 |
| }, |
| { |
| "epoch": 2.3538633818589028, |
| "grad_norm": 6.379500865936279, |
| "learning_rate": 1.0770810003732736e-05, |
| "loss": 1.9574, |
| "step": 21020 |
| }, |
| { |
| "epoch": 2.35498320268757, |
| "grad_norm": 8.482017517089844, |
| "learning_rate": 1.0752146323254946e-05, |
| "loss": 1.7585, |
| "step": 21030 |
| }, |
| { |
| "epoch": 2.3561030235162375, |
| "grad_norm": 10.17912769317627, |
| "learning_rate": 1.0733482642777156e-05, |
| "loss": 2.1129, |
| "step": 21040 |
| }, |
| { |
| "epoch": 2.357222844344905, |
| "grad_norm": 5.909788131713867, |
| "learning_rate": 1.0714818962299365e-05, |
| "loss": 1.8167, |
| "step": 21050 |
| }, |
| { |
| "epoch": 2.358342665173572, |
| "grad_norm": 4.550489902496338, |
| "learning_rate": 1.0696155281821575e-05, |
| "loss": 1.4302, |
| "step": 21060 |
| }, |
| { |
| "epoch": 2.3594624860022395, |
| "grad_norm": 9.305154800415039, |
| "learning_rate": 1.0677491601343785e-05, |
| "loss": 1.5549, |
| "step": 21070 |
| }, |
| { |
| "epoch": 2.360582306830907, |
| "grad_norm": 6.617506504058838, |
| "learning_rate": 1.0658827920865995e-05, |
| "loss": 1.8111, |
| "step": 21080 |
| }, |
| { |
| "epoch": 2.3617021276595747, |
| "grad_norm": 4.230848789215088, |
| "learning_rate": 1.0640164240388205e-05, |
| "loss": 1.604, |
| "step": 21090 |
| }, |
| { |
| "epoch": 2.362821948488242, |
| "grad_norm": 5.817328929901123, |
| "learning_rate": 1.0621500559910415e-05, |
| "loss": 2.2684, |
| "step": 21100 |
| }, |
| { |
| "epoch": 2.3639417693169094, |
| "grad_norm": 11.449498176574707, |
| "learning_rate": 1.0602836879432625e-05, |
| "loss": 1.7141, |
| "step": 21110 |
| }, |
| { |
| "epoch": 2.3650615901455767, |
| "grad_norm": 11.167801856994629, |
| "learning_rate": 1.0584173198954835e-05, |
| "loss": 2.0209, |
| "step": 21120 |
| }, |
| { |
| "epoch": 2.366181410974244, |
| "grad_norm": 10.6549711227417, |
| "learning_rate": 1.0565509518477045e-05, |
| "loss": 1.5182, |
| "step": 21130 |
| }, |
| { |
| "epoch": 2.3673012318029114, |
| "grad_norm": 7.9820122718811035, |
| "learning_rate": 1.0546845837999255e-05, |
| "loss": 1.5622, |
| "step": 21140 |
| }, |
| { |
| "epoch": 2.3684210526315788, |
| "grad_norm": 3.4148659706115723, |
| "learning_rate": 1.0528182157521465e-05, |
| "loss": 2.0534, |
| "step": 21150 |
| }, |
| { |
| "epoch": 2.369540873460246, |
| "grad_norm": 11.469594955444336, |
| "learning_rate": 1.0509518477043675e-05, |
| "loss": 2.0005, |
| "step": 21160 |
| }, |
| { |
| "epoch": 2.370660694288914, |
| "grad_norm": 6.6395697593688965, |
| "learning_rate": 1.0490854796565883e-05, |
| "loss": 2.0595, |
| "step": 21170 |
| }, |
| { |
| "epoch": 2.3717805151175813, |
| "grad_norm": 6.698371410369873, |
| "learning_rate": 1.0472191116088093e-05, |
| "loss": 2.0216, |
| "step": 21180 |
| }, |
| { |
| "epoch": 2.3729003359462486, |
| "grad_norm": 12.63893985748291, |
| "learning_rate": 1.0453527435610303e-05, |
| "loss": 2.1768, |
| "step": 21190 |
| }, |
| { |
| "epoch": 2.374020156774916, |
| "grad_norm": 18.96299171447754, |
| "learning_rate": 1.0434863755132513e-05, |
| "loss": 1.8743, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.3751399776035833, |
| "grad_norm": 5.689150810241699, |
| "learning_rate": 1.0416200074654723e-05, |
| "loss": 1.7398, |
| "step": 21210 |
| }, |
| { |
| "epoch": 2.3762597984322507, |
| "grad_norm": 7.150450229644775, |
| "learning_rate": 1.0397536394176933e-05, |
| "loss": 1.4524, |
| "step": 21220 |
| }, |
| { |
| "epoch": 2.3773796192609185, |
| "grad_norm": 11.126862525939941, |
| "learning_rate": 1.0378872713699141e-05, |
| "loss": 2.1519, |
| "step": 21230 |
| }, |
| { |
| "epoch": 2.378499440089586, |
| "grad_norm": 5.954022407531738, |
| "learning_rate": 1.0360209033221351e-05, |
| "loss": 2.1018, |
| "step": 21240 |
| }, |
| { |
| "epoch": 2.379619260918253, |
| "grad_norm": 13.803711891174316, |
| "learning_rate": 1.0341545352743561e-05, |
| "loss": 1.5974, |
| "step": 21250 |
| }, |
| { |
| "epoch": 2.3807390817469205, |
| "grad_norm": 8.766247749328613, |
| "learning_rate": 1.0322881672265771e-05, |
| "loss": 2.0202, |
| "step": 21260 |
| }, |
| { |
| "epoch": 2.381858902575588, |
| "grad_norm": 10.347888946533203, |
| "learning_rate": 1.0304217991787981e-05, |
| "loss": 1.5816, |
| "step": 21270 |
| }, |
| { |
| "epoch": 2.382978723404255, |
| "grad_norm": 14.29037094116211, |
| "learning_rate": 1.0285554311310191e-05, |
| "loss": 1.8299, |
| "step": 21280 |
| }, |
| { |
| "epoch": 2.3840985442329226, |
| "grad_norm": 11.084178924560547, |
| "learning_rate": 1.02668906308324e-05, |
| "loss": 2.0029, |
| "step": 21290 |
| }, |
| { |
| "epoch": 2.38521836506159, |
| "grad_norm": 4.837276935577393, |
| "learning_rate": 1.024822695035461e-05, |
| "loss": 1.931, |
| "step": 21300 |
| }, |
| { |
| "epoch": 2.3863381858902577, |
| "grad_norm": 13.397496223449707, |
| "learning_rate": 1.022956326987682e-05, |
| "loss": 1.6302, |
| "step": 21310 |
| }, |
| { |
| "epoch": 2.387458006718925, |
| "grad_norm": 14.555484771728516, |
| "learning_rate": 1.021089958939903e-05, |
| "loss": 1.988, |
| "step": 21320 |
| }, |
| { |
| "epoch": 2.3885778275475924, |
| "grad_norm": 15.178104400634766, |
| "learning_rate": 1.019223590892124e-05, |
| "loss": 2.0671, |
| "step": 21330 |
| }, |
| { |
| "epoch": 2.3896976483762598, |
| "grad_norm": 19.128128051757812, |
| "learning_rate": 1.017357222844345e-05, |
| "loss": 1.8196, |
| "step": 21340 |
| }, |
| { |
| "epoch": 2.390817469204927, |
| "grad_norm": 5.763383865356445, |
| "learning_rate": 1.015490854796566e-05, |
| "loss": 1.6087, |
| "step": 21350 |
| }, |
| { |
| "epoch": 2.3919372900335945, |
| "grad_norm": 8.197660446166992, |
| "learning_rate": 1.013624486748787e-05, |
| "loss": 1.7122, |
| "step": 21360 |
| }, |
| { |
| "epoch": 2.3930571108622622, |
| "grad_norm": 14.964632987976074, |
| "learning_rate": 1.011758118701008e-05, |
| "loss": 1.8517, |
| "step": 21370 |
| }, |
| { |
| "epoch": 2.3941769316909296, |
| "grad_norm": 4.898643970489502, |
| "learning_rate": 1.009891750653229e-05, |
| "loss": 1.7062, |
| "step": 21380 |
| }, |
| { |
| "epoch": 2.395296752519597, |
| "grad_norm": 6.609580039978027, |
| "learning_rate": 1.00802538260545e-05, |
| "loss": 1.8959, |
| "step": 21390 |
| }, |
| { |
| "epoch": 2.3964165733482643, |
| "grad_norm": 5.1216912269592285, |
| "learning_rate": 1.0061590145576708e-05, |
| "loss": 2.305, |
| "step": 21400 |
| }, |
| { |
| "epoch": 2.3975363941769317, |
| "grad_norm": 12.660892486572266, |
| "learning_rate": 1.0042926465098918e-05, |
| "loss": 1.8053, |
| "step": 21410 |
| }, |
| { |
| "epoch": 2.398656215005599, |
| "grad_norm": 14.176844596862793, |
| "learning_rate": 1.0024262784621128e-05, |
| "loss": 1.9157, |
| "step": 21420 |
| }, |
| { |
| "epoch": 2.3997760358342664, |
| "grad_norm": 3.574338674545288, |
| "learning_rate": 1.0005599104143338e-05, |
| "loss": 2.1178, |
| "step": 21430 |
| }, |
| { |
| "epoch": 2.4008958566629337, |
| "grad_norm": 7.545993804931641, |
| "learning_rate": 9.986935423665546e-06, |
| "loss": 1.9478, |
| "step": 21440 |
| }, |
| { |
| "epoch": 2.4020156774916015, |
| "grad_norm": 5.757676601409912, |
| "learning_rate": 9.968271743187756e-06, |
| "loss": 1.6439, |
| "step": 21450 |
| }, |
| { |
| "epoch": 2.403135498320269, |
| "grad_norm": 8.382559776306152, |
| "learning_rate": 9.949608062709966e-06, |
| "loss": 2.1037, |
| "step": 21460 |
| }, |
| { |
| "epoch": 2.404255319148936, |
| "grad_norm": 20.870803833007812, |
| "learning_rate": 9.930944382232176e-06, |
| "loss": 2.1286, |
| "step": 21470 |
| }, |
| { |
| "epoch": 2.4053751399776035, |
| "grad_norm": 10.343074798583984, |
| "learning_rate": 9.912280701754386e-06, |
| "loss": 1.7321, |
| "step": 21480 |
| }, |
| { |
| "epoch": 2.406494960806271, |
| "grad_norm": 11.975387573242188, |
| "learning_rate": 9.893617021276596e-06, |
| "loss": 1.8359, |
| "step": 21490 |
| }, |
| { |
| "epoch": 2.4076147816349383, |
| "grad_norm": 9.322503089904785, |
| "learning_rate": 9.874953340798806e-06, |
| "loss": 1.7441, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.408734602463606, |
| "grad_norm": 5.631967544555664, |
| "learning_rate": 9.856289660321016e-06, |
| "loss": 1.8915, |
| "step": 21510 |
| }, |
| { |
| "epoch": 2.4098544232922734, |
| "grad_norm": 11.873062133789062, |
| "learning_rate": 9.837625979843225e-06, |
| "loss": 1.924, |
| "step": 21520 |
| }, |
| { |
| "epoch": 2.4109742441209407, |
| "grad_norm": 5.398472785949707, |
| "learning_rate": 9.818962299365435e-06, |
| "loss": 1.8141, |
| "step": 21530 |
| }, |
| { |
| "epoch": 2.412094064949608, |
| "grad_norm": 5.431132793426514, |
| "learning_rate": 9.800298618887645e-06, |
| "loss": 2.0006, |
| "step": 21540 |
| }, |
| { |
| "epoch": 2.4132138857782754, |
| "grad_norm": 6.424263000488281, |
| "learning_rate": 9.781634938409855e-06, |
| "loss": 2.2687, |
| "step": 21550 |
| }, |
| { |
| "epoch": 2.414333706606943, |
| "grad_norm": 15.934911727905273, |
| "learning_rate": 9.762971257932065e-06, |
| "loss": 1.5339, |
| "step": 21560 |
| }, |
| { |
| "epoch": 2.41545352743561, |
| "grad_norm": 8.640837669372559, |
| "learning_rate": 9.744307577454275e-06, |
| "loss": 1.9506, |
| "step": 21570 |
| }, |
| { |
| "epoch": 2.4165733482642775, |
| "grad_norm": 10.464945793151855, |
| "learning_rate": 9.725643896976485e-06, |
| "loss": 2.1268, |
| "step": 21580 |
| }, |
| { |
| "epoch": 2.4176931690929453, |
| "grad_norm": 10.642047882080078, |
| "learning_rate": 9.706980216498695e-06, |
| "loss": 1.7478, |
| "step": 21590 |
| }, |
| { |
| "epoch": 2.4188129899216126, |
| "grad_norm": 10.365204811096191, |
| "learning_rate": 9.688316536020905e-06, |
| "loss": 2.1777, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.41993281075028, |
| "grad_norm": 13.834842681884766, |
| "learning_rate": 9.669652855543115e-06, |
| "loss": 2.0466, |
| "step": 21610 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 11.793619155883789, |
| "learning_rate": 9.650989175065325e-06, |
| "loss": 1.707, |
| "step": 21620 |
| }, |
| { |
| "epoch": 2.4221724524076147, |
| "grad_norm": 12.311315536499023, |
| "learning_rate": 9.632325494587533e-06, |
| "loss": 1.8893, |
| "step": 21630 |
| }, |
| { |
| "epoch": 2.423292273236282, |
| "grad_norm": 11.055502891540527, |
| "learning_rate": 9.613661814109743e-06, |
| "loss": 1.9288, |
| "step": 21640 |
| }, |
| { |
| "epoch": 2.42441209406495, |
| "grad_norm": 4.9164934158325195, |
| "learning_rate": 9.594998133631953e-06, |
| "loss": 2.414, |
| "step": 21650 |
| }, |
| { |
| "epoch": 2.425531914893617, |
| "grad_norm": 15.42969036102295, |
| "learning_rate": 9.576334453154161e-06, |
| "loss": 1.9903, |
| "step": 21660 |
| }, |
| { |
| "epoch": 2.4266517357222845, |
| "grad_norm": 10.449507713317871, |
| "learning_rate": 9.557670772676371e-06, |
| "loss": 1.6622, |
| "step": 21670 |
| }, |
| { |
| "epoch": 2.427771556550952, |
| "grad_norm": 14.963556289672852, |
| "learning_rate": 9.539007092198581e-06, |
| "loss": 1.6082, |
| "step": 21680 |
| }, |
| { |
| "epoch": 2.4288913773796192, |
| "grad_norm": 13.123733520507812, |
| "learning_rate": 9.520343411720791e-06, |
| "loss": 1.9141, |
| "step": 21690 |
| }, |
| { |
| "epoch": 2.4300111982082866, |
| "grad_norm": 6.848084449768066, |
| "learning_rate": 9.501679731243001e-06, |
| "loss": 1.83, |
| "step": 21700 |
| }, |
| { |
| "epoch": 2.431131019036954, |
| "grad_norm": 5.545853137969971, |
| "learning_rate": 9.483016050765211e-06, |
| "loss": 2.2289, |
| "step": 21710 |
| }, |
| { |
| "epoch": 2.4322508398656213, |
| "grad_norm": 11.716573715209961, |
| "learning_rate": 9.464352370287421e-06, |
| "loss": 1.954, |
| "step": 21720 |
| }, |
| { |
| "epoch": 2.433370660694289, |
| "grad_norm": 7.332011699676514, |
| "learning_rate": 9.445688689809631e-06, |
| "loss": 1.6223, |
| "step": 21730 |
| }, |
| { |
| "epoch": 2.4344904815229564, |
| "grad_norm": 8.037787437438965, |
| "learning_rate": 9.427025009331841e-06, |
| "loss": 1.8996, |
| "step": 21740 |
| }, |
| { |
| "epoch": 2.435610302351624, |
| "grad_norm": 5.458945274353027, |
| "learning_rate": 9.40836132885405e-06, |
| "loss": 1.9408, |
| "step": 21750 |
| }, |
| { |
| "epoch": 2.436730123180291, |
| "grad_norm": 16.545921325683594, |
| "learning_rate": 9.38969764837626e-06, |
| "loss": 1.7479, |
| "step": 21760 |
| }, |
| { |
| "epoch": 2.4378499440089585, |
| "grad_norm": 12.776642799377441, |
| "learning_rate": 9.37103396789847e-06, |
| "loss": 1.8733, |
| "step": 21770 |
| }, |
| { |
| "epoch": 2.438969764837626, |
| "grad_norm": 3.9929423332214355, |
| "learning_rate": 9.35237028742068e-06, |
| "loss": 1.4235, |
| "step": 21780 |
| }, |
| { |
| "epoch": 2.4400895856662936, |
| "grad_norm": 12.087785720825195, |
| "learning_rate": 9.33370660694289e-06, |
| "loss": 1.8805, |
| "step": 21790 |
| }, |
| { |
| "epoch": 2.441209406494961, |
| "grad_norm": 4.026576519012451, |
| "learning_rate": 9.3150429264651e-06, |
| "loss": 2.21, |
| "step": 21800 |
| }, |
| { |
| "epoch": 2.4423292273236283, |
| "grad_norm": 5.352035999298096, |
| "learning_rate": 9.29637924598731e-06, |
| "loss": 1.8218, |
| "step": 21810 |
| }, |
| { |
| "epoch": 2.4434490481522957, |
| "grad_norm": 9.776129722595215, |
| "learning_rate": 9.27771556550952e-06, |
| "loss": 2.0464, |
| "step": 21820 |
| }, |
| { |
| "epoch": 2.444568868980963, |
| "grad_norm": 14.493003845214844, |
| "learning_rate": 9.25905188503173e-06, |
| "loss": 1.6871, |
| "step": 21830 |
| }, |
| { |
| "epoch": 2.4456886898096304, |
| "grad_norm": 17.30157470703125, |
| "learning_rate": 9.24038820455394e-06, |
| "loss": 2.5444, |
| "step": 21840 |
| }, |
| { |
| "epoch": 2.4468085106382977, |
| "grad_norm": 3.3516924381256104, |
| "learning_rate": 9.221724524076148e-06, |
| "loss": 1.6969, |
| "step": 21850 |
| }, |
| { |
| "epoch": 2.447928331466965, |
| "grad_norm": 12.714744567871094, |
| "learning_rate": 9.203060843598358e-06, |
| "loss": 2.1882, |
| "step": 21860 |
| }, |
| { |
| "epoch": 2.449048152295633, |
| "grad_norm": 5.864091396331787, |
| "learning_rate": 9.184397163120568e-06, |
| "loss": 1.9062, |
| "step": 21870 |
| }, |
| { |
| "epoch": 2.4501679731243002, |
| "grad_norm": 14.587536811828613, |
| "learning_rate": 9.165733482642776e-06, |
| "loss": 1.6267, |
| "step": 21880 |
| }, |
| { |
| "epoch": 2.4512877939529676, |
| "grad_norm": 9.073355674743652, |
| "learning_rate": 9.147069802164986e-06, |
| "loss": 1.619, |
| "step": 21890 |
| }, |
| { |
| "epoch": 2.452407614781635, |
| "grad_norm": 13.970871925354004, |
| "learning_rate": 9.128406121687196e-06, |
| "loss": 1.7811, |
| "step": 21900 |
| }, |
| { |
| "epoch": 2.4535274356103023, |
| "grad_norm": 13.218069076538086, |
| "learning_rate": 9.109742441209406e-06, |
| "loss": 2.1777, |
| "step": 21910 |
| }, |
| { |
| "epoch": 2.4546472564389696, |
| "grad_norm": 7.0698981285095215, |
| "learning_rate": 9.091078760731616e-06, |
| "loss": 1.672, |
| "step": 21920 |
| }, |
| { |
| "epoch": 2.455767077267637, |
| "grad_norm": 6.958558082580566, |
| "learning_rate": 9.072415080253826e-06, |
| "loss": 1.8189, |
| "step": 21930 |
| }, |
| { |
| "epoch": 2.456886898096305, |
| "grad_norm": 11.154871940612793, |
| "learning_rate": 9.053751399776036e-06, |
| "loss": 1.9092, |
| "step": 21940 |
| }, |
| { |
| "epoch": 2.458006718924972, |
| "grad_norm": 8.34592056274414, |
| "learning_rate": 9.035087719298246e-06, |
| "loss": 2.0183, |
| "step": 21950 |
| }, |
| { |
| "epoch": 2.4591265397536395, |
| "grad_norm": 17.590225219726562, |
| "learning_rate": 9.016424038820456e-06, |
| "loss": 2.0918, |
| "step": 21960 |
| }, |
| { |
| "epoch": 2.460246360582307, |
| "grad_norm": 16.22553825378418, |
| "learning_rate": 8.997760358342666e-06, |
| "loss": 1.9656, |
| "step": 21970 |
| }, |
| { |
| "epoch": 2.461366181410974, |
| "grad_norm": 18.10919952392578, |
| "learning_rate": 8.979096677864876e-06, |
| "loss": 1.6443, |
| "step": 21980 |
| }, |
| { |
| "epoch": 2.4624860022396415, |
| "grad_norm": 5.232290267944336, |
| "learning_rate": 8.960432997387085e-06, |
| "loss": 1.6849, |
| "step": 21990 |
| }, |
| { |
| "epoch": 2.463605823068309, |
| "grad_norm": 9.676012992858887, |
| "learning_rate": 8.941769316909295e-06, |
| "loss": 1.4555, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.4647256438969762, |
| "grad_norm": 6.006662845611572, |
| "learning_rate": 8.923105636431505e-06, |
| "loss": 1.5401, |
| "step": 22010 |
| }, |
| { |
| "epoch": 2.465845464725644, |
| "grad_norm": 17.258005142211914, |
| "learning_rate": 8.904441955953715e-06, |
| "loss": 2.4357, |
| "step": 22020 |
| }, |
| { |
| "epoch": 2.4669652855543114, |
| "grad_norm": 6.448751926422119, |
| "learning_rate": 8.885778275475925e-06, |
| "loss": 2.1087, |
| "step": 22030 |
| }, |
| { |
| "epoch": 2.4680851063829787, |
| "grad_norm": 5.431372165679932, |
| "learning_rate": 8.867114594998135e-06, |
| "loss": 1.9074, |
| "step": 22040 |
| }, |
| { |
| "epoch": 2.469204927211646, |
| "grad_norm": 12.926970481872559, |
| "learning_rate": 8.848450914520345e-06, |
| "loss": 1.9923, |
| "step": 22050 |
| }, |
| { |
| "epoch": 2.4703247480403134, |
| "grad_norm": 5.959377288818359, |
| "learning_rate": 8.829787234042553e-06, |
| "loss": 1.672, |
| "step": 22060 |
| }, |
| { |
| "epoch": 2.471444568868981, |
| "grad_norm": 16.465486526489258, |
| "learning_rate": 8.811123553564763e-06, |
| "loss": 1.8695, |
| "step": 22070 |
| }, |
| { |
| "epoch": 2.4725643896976486, |
| "grad_norm": 5.742716312408447, |
| "learning_rate": 8.792459873086973e-06, |
| "loss": 1.8781, |
| "step": 22080 |
| }, |
| { |
| "epoch": 2.473684210526316, |
| "grad_norm": 10.750136375427246, |
| "learning_rate": 8.773796192609183e-06, |
| "loss": 1.9953, |
| "step": 22090 |
| }, |
| { |
| "epoch": 2.4748040313549833, |
| "grad_norm": 5.631348609924316, |
| "learning_rate": 8.755132512131391e-06, |
| "loss": 1.6181, |
| "step": 22100 |
| }, |
| { |
| "epoch": 2.4759238521836506, |
| "grad_norm": 9.884969711303711, |
| "learning_rate": 8.736468831653601e-06, |
| "loss": 1.5844, |
| "step": 22110 |
| }, |
| { |
| "epoch": 2.477043673012318, |
| "grad_norm": 4.6717376708984375, |
| "learning_rate": 8.717805151175811e-06, |
| "loss": 1.8376, |
| "step": 22120 |
| }, |
| { |
| "epoch": 2.4781634938409853, |
| "grad_norm": 4.163270473480225, |
| "learning_rate": 8.699141470698021e-06, |
| "loss": 2.1804, |
| "step": 22130 |
| }, |
| { |
| "epoch": 2.4792833146696527, |
| "grad_norm": 7.534999847412109, |
| "learning_rate": 8.680477790220231e-06, |
| "loss": 1.8526, |
| "step": 22140 |
| }, |
| { |
| "epoch": 2.48040313549832, |
| "grad_norm": 9.90027904510498, |
| "learning_rate": 8.661814109742441e-06, |
| "loss": 1.8137, |
| "step": 22150 |
| }, |
| { |
| "epoch": 2.481522956326988, |
| "grad_norm": 19.237462997436523, |
| "learning_rate": 8.643150429264651e-06, |
| "loss": 2.6685, |
| "step": 22160 |
| }, |
| { |
| "epoch": 2.482642777155655, |
| "grad_norm": 9.682941436767578, |
| "learning_rate": 8.624486748786861e-06, |
| "loss": 1.826, |
| "step": 22170 |
| }, |
| { |
| "epoch": 2.4837625979843225, |
| "grad_norm": 3.538151264190674, |
| "learning_rate": 8.605823068309071e-06, |
| "loss": 1.801, |
| "step": 22180 |
| }, |
| { |
| "epoch": 2.48488241881299, |
| "grad_norm": 5.478328704833984, |
| "learning_rate": 8.587159387831281e-06, |
| "loss": 1.8762, |
| "step": 22190 |
| }, |
| { |
| "epoch": 2.4860022396416572, |
| "grad_norm": 7.476826190948486, |
| "learning_rate": 8.568495707353491e-06, |
| "loss": 2.1028, |
| "step": 22200 |
| }, |
| { |
| "epoch": 2.4871220604703246, |
| "grad_norm": 5.20843505859375, |
| "learning_rate": 8.549832026875701e-06, |
| "loss": 1.6034, |
| "step": 22210 |
| }, |
| { |
| "epoch": 2.4882418812989924, |
| "grad_norm": 5.28059720993042, |
| "learning_rate": 8.53116834639791e-06, |
| "loss": 1.7654, |
| "step": 22220 |
| }, |
| { |
| "epoch": 2.4893617021276597, |
| "grad_norm": 6.444010257720947, |
| "learning_rate": 8.51250466592012e-06, |
| "loss": 1.8655, |
| "step": 22230 |
| }, |
| { |
| "epoch": 2.490481522956327, |
| "grad_norm": 6.940224647521973, |
| "learning_rate": 8.49384098544233e-06, |
| "loss": 1.7356, |
| "step": 22240 |
| }, |
| { |
| "epoch": 2.4916013437849944, |
| "grad_norm": 23.465511322021484, |
| "learning_rate": 8.47517730496454e-06, |
| "loss": 1.846, |
| "step": 22250 |
| }, |
| { |
| "epoch": 2.4927211646136618, |
| "grad_norm": 8.295751571655273, |
| "learning_rate": 8.45651362448675e-06, |
| "loss": 1.9118, |
| "step": 22260 |
| }, |
| { |
| "epoch": 2.493840985442329, |
| "grad_norm": 12.125283241271973, |
| "learning_rate": 8.43784994400896e-06, |
| "loss": 2.0026, |
| "step": 22270 |
| }, |
| { |
| "epoch": 2.4949608062709965, |
| "grad_norm": 5.106156349182129, |
| "learning_rate": 8.419186263531168e-06, |
| "loss": 1.8797, |
| "step": 22280 |
| }, |
| { |
| "epoch": 2.496080627099664, |
| "grad_norm": 5.151834011077881, |
| "learning_rate": 8.400522583053378e-06, |
| "loss": 2.063, |
| "step": 22290 |
| }, |
| { |
| "epoch": 2.4972004479283316, |
| "grad_norm": 13.763957023620605, |
| "learning_rate": 8.381858902575588e-06, |
| "loss": 1.6182, |
| "step": 22300 |
| }, |
| { |
| "epoch": 2.498320268756999, |
| "grad_norm": 3.533198356628418, |
| "learning_rate": 8.363195222097798e-06, |
| "loss": 1.6865, |
| "step": 22310 |
| }, |
| { |
| "epoch": 2.4994400895856663, |
| "grad_norm": 5.562738418579102, |
| "learning_rate": 8.344531541620008e-06, |
| "loss": 1.6556, |
| "step": 22320 |
| }, |
| { |
| "epoch": 2.5005599104143337, |
| "grad_norm": 4.3271613121032715, |
| "learning_rate": 8.325867861142216e-06, |
| "loss": 2.088, |
| "step": 22330 |
| }, |
| { |
| "epoch": 2.501679731243001, |
| "grad_norm": 12.457752227783203, |
| "learning_rate": 8.307204180664426e-06, |
| "loss": 2.2114, |
| "step": 22340 |
| }, |
| { |
| "epoch": 2.5027995520716684, |
| "grad_norm": 5.431798934936523, |
| "learning_rate": 8.288540500186636e-06, |
| "loss": 1.5092, |
| "step": 22350 |
| }, |
| { |
| "epoch": 2.503919372900336, |
| "grad_norm": 13.543461799621582, |
| "learning_rate": 8.269876819708846e-06, |
| "loss": 1.9257, |
| "step": 22360 |
| }, |
| { |
| "epoch": 2.5050391937290035, |
| "grad_norm": 8.819217681884766, |
| "learning_rate": 8.251213139231056e-06, |
| "loss": 1.817, |
| "step": 22370 |
| }, |
| { |
| "epoch": 2.506159014557671, |
| "grad_norm": 7.270272254943848, |
| "learning_rate": 8.232549458753266e-06, |
| "loss": 1.7905, |
| "step": 22380 |
| }, |
| { |
| "epoch": 2.5072788353863382, |
| "grad_norm": 7.694066524505615, |
| "learning_rate": 8.213885778275476e-06, |
| "loss": 2.2374, |
| "step": 22390 |
| }, |
| { |
| "epoch": 2.5083986562150056, |
| "grad_norm": 7.0074286460876465, |
| "learning_rate": 8.195222097797686e-06, |
| "loss": 1.8355, |
| "step": 22400 |
| }, |
| { |
| "epoch": 2.509518477043673, |
| "grad_norm": 9.260007858276367, |
| "learning_rate": 8.176558417319896e-06, |
| "loss": 1.9881, |
| "step": 22410 |
| }, |
| { |
| "epoch": 2.5106382978723403, |
| "grad_norm": 11.184020042419434, |
| "learning_rate": 8.157894736842106e-06, |
| "loss": 2.0734, |
| "step": 22420 |
| }, |
| { |
| "epoch": 2.5117581187010076, |
| "grad_norm": 4.612696647644043, |
| "learning_rate": 8.139231056364316e-06, |
| "loss": 2.1045, |
| "step": 22430 |
| }, |
| { |
| "epoch": 2.512877939529675, |
| "grad_norm": 4.042170524597168, |
| "learning_rate": 8.120567375886526e-06, |
| "loss": 2.1674, |
| "step": 22440 |
| }, |
| { |
| "epoch": 2.5139977603583428, |
| "grad_norm": 6.870834827423096, |
| "learning_rate": 8.101903695408735e-06, |
| "loss": 1.5094, |
| "step": 22450 |
| }, |
| { |
| "epoch": 2.51511758118701, |
| "grad_norm": 11.569465637207031, |
| "learning_rate": 8.083240014930945e-06, |
| "loss": 1.7942, |
| "step": 22460 |
| }, |
| { |
| "epoch": 2.5162374020156775, |
| "grad_norm": 9.036044120788574, |
| "learning_rate": 8.064576334453155e-06, |
| "loss": 1.8934, |
| "step": 22470 |
| }, |
| { |
| "epoch": 2.517357222844345, |
| "grad_norm": 16.669771194458008, |
| "learning_rate": 8.045912653975365e-06, |
| "loss": 2.087, |
| "step": 22480 |
| }, |
| { |
| "epoch": 2.518477043673012, |
| "grad_norm": 11.910612106323242, |
| "learning_rate": 8.027248973497575e-06, |
| "loss": 2.067, |
| "step": 22490 |
| }, |
| { |
| "epoch": 2.51959686450168, |
| "grad_norm": 8.747892379760742, |
| "learning_rate": 8.008585293019783e-06, |
| "loss": 2.064, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.5207166853303473, |
| "grad_norm": 8.751014709472656, |
| "learning_rate": 7.989921612541993e-06, |
| "loss": 1.9492, |
| "step": 22510 |
| }, |
| { |
| "epoch": 2.5218365061590147, |
| "grad_norm": 16.253923416137695, |
| "learning_rate": 7.971257932064203e-06, |
| "loss": 2.2059, |
| "step": 22520 |
| }, |
| { |
| "epoch": 2.522956326987682, |
| "grad_norm": 5.416139125823975, |
| "learning_rate": 7.952594251586413e-06, |
| "loss": 2.3393, |
| "step": 22530 |
| }, |
| { |
| "epoch": 2.5240761478163494, |
| "grad_norm": 5.805497169494629, |
| "learning_rate": 7.933930571108623e-06, |
| "loss": 2.3075, |
| "step": 22540 |
| }, |
| { |
| "epoch": 2.5251959686450167, |
| "grad_norm": 14.180325508117676, |
| "learning_rate": 7.915266890630833e-06, |
| "loss": 1.3238, |
| "step": 22550 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 18.959636688232422, |
| "learning_rate": 7.896603210153043e-06, |
| "loss": 2.4995, |
| "step": 22560 |
| }, |
| { |
| "epoch": 2.5274356103023514, |
| "grad_norm": 15.236656188964844, |
| "learning_rate": 7.877939529675251e-06, |
| "loss": 1.979, |
| "step": 22570 |
| }, |
| { |
| "epoch": 2.5285554311310188, |
| "grad_norm": 11.582307815551758, |
| "learning_rate": 7.859275849197461e-06, |
| "loss": 1.8858, |
| "step": 22580 |
| }, |
| { |
| "epoch": 2.5296752519596866, |
| "grad_norm": 4.920597553253174, |
| "learning_rate": 7.840612168719671e-06, |
| "loss": 1.9993, |
| "step": 22590 |
| }, |
| { |
| "epoch": 2.530795072788354, |
| "grad_norm": 16.651355743408203, |
| "learning_rate": 7.821948488241881e-06, |
| "loss": 1.8391, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.5319148936170213, |
| "grad_norm": 4.262025356292725, |
| "learning_rate": 7.803284807764091e-06, |
| "loss": 1.8714, |
| "step": 22610 |
| }, |
| { |
| "epoch": 2.5330347144456886, |
| "grad_norm": 16.481779098510742, |
| "learning_rate": 7.784621127286301e-06, |
| "loss": 1.8893, |
| "step": 22620 |
| }, |
| { |
| "epoch": 2.534154535274356, |
| "grad_norm": 6.779279708862305, |
| "learning_rate": 7.765957446808511e-06, |
| "loss": 1.7062, |
| "step": 22630 |
| }, |
| { |
| "epoch": 2.5352743561030238, |
| "grad_norm": 11.93194580078125, |
| "learning_rate": 7.747293766330721e-06, |
| "loss": 1.9667, |
| "step": 22640 |
| }, |
| { |
| "epoch": 2.536394176931691, |
| "grad_norm": 8.4479341506958, |
| "learning_rate": 7.728630085852931e-06, |
| "loss": 1.6635, |
| "step": 22650 |
| }, |
| { |
| "epoch": 2.5375139977603585, |
| "grad_norm": 9.052682876586914, |
| "learning_rate": 7.709966405375141e-06, |
| "loss": 1.7176, |
| "step": 22660 |
| }, |
| { |
| "epoch": 2.538633818589026, |
| "grad_norm": 17.69319725036621, |
| "learning_rate": 7.691302724897351e-06, |
| "loss": 2.0191, |
| "step": 22670 |
| }, |
| { |
| "epoch": 2.539753639417693, |
| "grad_norm": 8.785430908203125, |
| "learning_rate": 7.67263904441956e-06, |
| "loss": 1.7666, |
| "step": 22680 |
| }, |
| { |
| "epoch": 2.5408734602463605, |
| "grad_norm": 8.51176929473877, |
| "learning_rate": 7.65397536394177e-06, |
| "loss": 1.8087, |
| "step": 22690 |
| }, |
| { |
| "epoch": 2.541993281075028, |
| "grad_norm": 5.72242546081543, |
| "learning_rate": 7.63531168346398e-06, |
| "loss": 2.0391, |
| "step": 22700 |
| }, |
| { |
| "epoch": 2.543113101903695, |
| "grad_norm": 4.739030838012695, |
| "learning_rate": 7.616648002986189e-06, |
| "loss": 2.0501, |
| "step": 22710 |
| }, |
| { |
| "epoch": 2.5442329227323626, |
| "grad_norm": 7.8822736740112305, |
| "learning_rate": 7.597984322508399e-06, |
| "loss": 1.7549, |
| "step": 22720 |
| }, |
| { |
| "epoch": 2.5453527435610304, |
| "grad_norm": 14.290916442871094, |
| "learning_rate": 7.579320642030609e-06, |
| "loss": 1.606, |
| "step": 22730 |
| }, |
| { |
| "epoch": 2.5464725643896977, |
| "grad_norm": 13.635068893432617, |
| "learning_rate": 7.560656961552819e-06, |
| "loss": 1.795, |
| "step": 22740 |
| }, |
| { |
| "epoch": 2.547592385218365, |
| "grad_norm": 5.094437599182129, |
| "learning_rate": 7.541993281075028e-06, |
| "loss": 1.9598, |
| "step": 22750 |
| }, |
| { |
| "epoch": 2.5487122060470324, |
| "grad_norm": 5.03619384765625, |
| "learning_rate": 7.523329600597238e-06, |
| "loss": 2.3806, |
| "step": 22760 |
| }, |
| { |
| "epoch": 2.5498320268756998, |
| "grad_norm": 9.20356273651123, |
| "learning_rate": 7.504665920119448e-06, |
| "loss": 1.8016, |
| "step": 22770 |
| }, |
| { |
| "epoch": 2.5509518477043676, |
| "grad_norm": 7.040286540985107, |
| "learning_rate": 7.486002239641658e-06, |
| "loss": 1.924, |
| "step": 22780 |
| }, |
| { |
| "epoch": 2.552071668533035, |
| "grad_norm": 6.913671970367432, |
| "learning_rate": 7.467338559163868e-06, |
| "loss": 1.9914, |
| "step": 22790 |
| }, |
| { |
| "epoch": 2.5531914893617023, |
| "grad_norm": 6.331127643585205, |
| "learning_rate": 7.4486748786860764e-06, |
| "loss": 1.7911, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.5543113101903696, |
| "grad_norm": 6.453745365142822, |
| "learning_rate": 7.4300111982082864e-06, |
| "loss": 1.985, |
| "step": 22810 |
| }, |
| { |
| "epoch": 2.555431131019037, |
| "grad_norm": 15.195472717285156, |
| "learning_rate": 7.4113475177304964e-06, |
| "loss": 2.2203, |
| "step": 22820 |
| }, |
| { |
| "epoch": 2.5565509518477043, |
| "grad_norm": 13.667654037475586, |
| "learning_rate": 7.3926838372527064e-06, |
| "loss": 1.6591, |
| "step": 22830 |
| }, |
| { |
| "epoch": 2.5576707726763717, |
| "grad_norm": 4.465586185455322, |
| "learning_rate": 7.3740201567749165e-06, |
| "loss": 2.0768, |
| "step": 22840 |
| }, |
| { |
| "epoch": 2.558790593505039, |
| "grad_norm": 15.293925285339355, |
| "learning_rate": 7.3553564762971265e-06, |
| "loss": 1.9697, |
| "step": 22850 |
| }, |
| { |
| "epoch": 2.5599104143337064, |
| "grad_norm": 4.840792655944824, |
| "learning_rate": 7.336692795819336e-06, |
| "loss": 1.5074, |
| "step": 22860 |
| }, |
| { |
| "epoch": 2.561030235162374, |
| "grad_norm": 8.356353759765625, |
| "learning_rate": 7.318029115341546e-06, |
| "loss": 1.9765, |
| "step": 22870 |
| }, |
| { |
| "epoch": 2.5621500559910415, |
| "grad_norm": 6.289432525634766, |
| "learning_rate": 7.299365434863756e-06, |
| "loss": 1.7277, |
| "step": 22880 |
| }, |
| { |
| "epoch": 2.563269876819709, |
| "grad_norm": 14.32654094696045, |
| "learning_rate": 7.280701754385966e-06, |
| "loss": 2.1023, |
| "step": 22890 |
| }, |
| { |
| "epoch": 2.564389697648376, |
| "grad_norm": 7.886980056762695, |
| "learning_rate": 7.262038073908176e-06, |
| "loss": 1.8874, |
| "step": 22900 |
| }, |
| { |
| "epoch": 2.5655095184770436, |
| "grad_norm": 11.411437034606934, |
| "learning_rate": 7.243374393430386e-06, |
| "loss": 2.1147, |
| "step": 22910 |
| }, |
| { |
| "epoch": 2.5666293393057114, |
| "grad_norm": 7.815008640289307, |
| "learning_rate": 7.224710712952594e-06, |
| "loss": 2.302, |
| "step": 22920 |
| }, |
| { |
| "epoch": 2.5677491601343787, |
| "grad_norm": 13.516090393066406, |
| "learning_rate": 7.206047032474804e-06, |
| "loss": 1.6051, |
| "step": 22930 |
| }, |
| { |
| "epoch": 2.568868980963046, |
| "grad_norm": 5.94198751449585, |
| "learning_rate": 7.187383351997014e-06, |
| "loss": 1.8294, |
| "step": 22940 |
| }, |
| { |
| "epoch": 2.5699888017917134, |
| "grad_norm": 10.252525329589844, |
| "learning_rate": 7.168719671519224e-06, |
| "loss": 1.4627, |
| "step": 22950 |
| }, |
| { |
| "epoch": 2.5711086226203808, |
| "grad_norm": 8.05044174194336, |
| "learning_rate": 7.150055991041434e-06, |
| "loss": 1.7721, |
| "step": 22960 |
| }, |
| { |
| "epoch": 2.572228443449048, |
| "grad_norm": 5.872049331665039, |
| "learning_rate": 7.131392310563643e-06, |
| "loss": 2.0512, |
| "step": 22970 |
| }, |
| { |
| "epoch": 2.5733482642777155, |
| "grad_norm": 12.562164306640625, |
| "learning_rate": 7.112728630085853e-06, |
| "loss": 2.3197, |
| "step": 22980 |
| }, |
| { |
| "epoch": 2.574468085106383, |
| "grad_norm": 7.482940196990967, |
| "learning_rate": 7.094064949608063e-06, |
| "loss": 2.022, |
| "step": 22990 |
| }, |
| { |
| "epoch": 2.57558790593505, |
| "grad_norm": 11.764483451843262, |
| "learning_rate": 7.075401269130273e-06, |
| "loss": 1.9567, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.576707726763718, |
| "grad_norm": 10.071126937866211, |
| "learning_rate": 7.056737588652483e-06, |
| "loss": 1.8271, |
| "step": 23010 |
| }, |
| { |
| "epoch": 2.5778275475923853, |
| "grad_norm": 10.216212272644043, |
| "learning_rate": 7.038073908174693e-06, |
| "loss": 1.789, |
| "step": 23020 |
| }, |
| { |
| "epoch": 2.5789473684210527, |
| "grad_norm": 8.066825866699219, |
| "learning_rate": 7.0194102276969015e-06, |
| "loss": 1.6432, |
| "step": 23030 |
| }, |
| { |
| "epoch": 2.58006718924972, |
| "grad_norm": 16.877470016479492, |
| "learning_rate": 7.0007465472191115e-06, |
| "loss": 1.7376, |
| "step": 23040 |
| }, |
| { |
| "epoch": 2.5811870100783874, |
| "grad_norm": 5.387278079986572, |
| "learning_rate": 6.9820828667413215e-06, |
| "loss": 1.5875, |
| "step": 23050 |
| }, |
| { |
| "epoch": 2.5823068309070547, |
| "grad_norm": 15.250850677490234, |
| "learning_rate": 6.9634191862635315e-06, |
| "loss": 1.6844, |
| "step": 23060 |
| }, |
| { |
| "epoch": 2.5834266517357225, |
| "grad_norm": 7.900568962097168, |
| "learning_rate": 6.9447555057857415e-06, |
| "loss": 1.8616, |
| "step": 23070 |
| }, |
| { |
| "epoch": 2.58454647256439, |
| "grad_norm": 6.394863128662109, |
| "learning_rate": 6.926091825307951e-06, |
| "loss": 1.4066, |
| "step": 23080 |
| }, |
| { |
| "epoch": 2.585666293393057, |
| "grad_norm": 5.246687412261963, |
| "learning_rate": 6.907428144830161e-06, |
| "loss": 2.0123, |
| "step": 23090 |
| }, |
| { |
| "epoch": 2.5867861142217246, |
| "grad_norm": 13.717708587646484, |
| "learning_rate": 6.888764464352371e-06, |
| "loss": 1.8026, |
| "step": 23100 |
| }, |
| { |
| "epoch": 2.587905935050392, |
| "grad_norm": 5.991678237915039, |
| "learning_rate": 6.870100783874581e-06, |
| "loss": 1.9593, |
| "step": 23110 |
| }, |
| { |
| "epoch": 2.5890257558790593, |
| "grad_norm": 6.467216491699219, |
| "learning_rate": 6.851437103396791e-06, |
| "loss": 1.8865, |
| "step": 23120 |
| }, |
| { |
| "epoch": 2.5901455767077266, |
| "grad_norm": 11.342000961303711, |
| "learning_rate": 6.832773422919001e-06, |
| "loss": 2.0429, |
| "step": 23130 |
| }, |
| { |
| "epoch": 2.591265397536394, |
| "grad_norm": 6.355552673339844, |
| "learning_rate": 6.814109742441211e-06, |
| "loss": 1.5285, |
| "step": 23140 |
| }, |
| { |
| "epoch": 2.5923852183650617, |
| "grad_norm": 4.930696964263916, |
| "learning_rate": 6.795446061963419e-06, |
| "loss": 2.0815, |
| "step": 23150 |
| }, |
| { |
| "epoch": 2.593505039193729, |
| "grad_norm": 6.06488037109375, |
| "learning_rate": 6.776782381485629e-06, |
| "loss": 1.6929, |
| "step": 23160 |
| }, |
| { |
| "epoch": 2.5946248600223965, |
| "grad_norm": 17.244834899902344, |
| "learning_rate": 6.758118701007839e-06, |
| "loss": 2.1941, |
| "step": 23170 |
| }, |
| { |
| "epoch": 2.595744680851064, |
| "grad_norm": 6.777196407318115, |
| "learning_rate": 6.739455020530049e-06, |
| "loss": 1.6116, |
| "step": 23180 |
| }, |
| { |
| "epoch": 2.596864501679731, |
| "grad_norm": 10.06576156616211, |
| "learning_rate": 6.720791340052258e-06, |
| "loss": 1.6001, |
| "step": 23190 |
| }, |
| { |
| "epoch": 2.5979843225083985, |
| "grad_norm": 17.110258102416992, |
| "learning_rate": 6.702127659574468e-06, |
| "loss": 1.9066, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.5991041433370663, |
| "grad_norm": 10.628520011901855, |
| "learning_rate": 6.683463979096678e-06, |
| "loss": 1.9434, |
| "step": 23210 |
| }, |
| { |
| "epoch": 2.6002239641657336, |
| "grad_norm": 6.576961040496826, |
| "learning_rate": 6.664800298618888e-06, |
| "loss": 1.2507, |
| "step": 23220 |
| }, |
| { |
| "epoch": 2.601343784994401, |
| "grad_norm": 5.920810699462891, |
| "learning_rate": 6.646136618141098e-06, |
| "loss": 1.9261, |
| "step": 23230 |
| }, |
| { |
| "epoch": 2.6024636058230683, |
| "grad_norm": 4.780271530151367, |
| "learning_rate": 6.627472937663308e-06, |
| "loss": 1.9078, |
| "step": 23240 |
| }, |
| { |
| "epoch": 2.6035834266517357, |
| "grad_norm": 3.30251145362854, |
| "learning_rate": 6.608809257185518e-06, |
| "loss": 2.2392, |
| "step": 23250 |
| }, |
| { |
| "epoch": 2.604703247480403, |
| "grad_norm": 6.361575126647949, |
| "learning_rate": 6.590145576707728e-06, |
| "loss": 1.5298, |
| "step": 23260 |
| }, |
| { |
| "epoch": 2.6058230683090704, |
| "grad_norm": 5.133968830108643, |
| "learning_rate": 6.5714818962299365e-06, |
| "loss": 2.0936, |
| "step": 23270 |
| }, |
| { |
| "epoch": 2.6069428891377378, |
| "grad_norm": 11.35536003112793, |
| "learning_rate": 6.5528182157521465e-06, |
| "loss": 2.1213, |
| "step": 23280 |
| }, |
| { |
| "epoch": 2.608062709966405, |
| "grad_norm": 14.275880813598633, |
| "learning_rate": 6.534154535274356e-06, |
| "loss": 1.5543, |
| "step": 23290 |
| }, |
| { |
| "epoch": 2.609182530795073, |
| "grad_norm": 11.91288948059082, |
| "learning_rate": 6.515490854796566e-06, |
| "loss": 1.8959, |
| "step": 23300 |
| }, |
| { |
| "epoch": 2.6103023516237402, |
| "grad_norm": 7.54849910736084, |
| "learning_rate": 6.496827174318776e-06, |
| "loss": 1.9089, |
| "step": 23310 |
| }, |
| { |
| "epoch": 2.6114221724524076, |
| "grad_norm": 4.366549968719482, |
| "learning_rate": 6.478163493840986e-06, |
| "loss": 1.6129, |
| "step": 23320 |
| }, |
| { |
| "epoch": 2.612541993281075, |
| "grad_norm": 4.882798671722412, |
| "learning_rate": 6.459499813363196e-06, |
| "loss": 1.857, |
| "step": 23330 |
| }, |
| { |
| "epoch": 2.6136618141097423, |
| "grad_norm": 18.530513763427734, |
| "learning_rate": 6.440836132885406e-06, |
| "loss": 2.0854, |
| "step": 23340 |
| }, |
| { |
| "epoch": 2.61478163493841, |
| "grad_norm": 7.46307897567749, |
| "learning_rate": 6.422172452407616e-06, |
| "loss": 1.5743, |
| "step": 23350 |
| }, |
| { |
| "epoch": 2.6159014557670774, |
| "grad_norm": 16.533849716186523, |
| "learning_rate": 6.403508771929826e-06, |
| "loss": 1.9256, |
| "step": 23360 |
| }, |
| { |
| "epoch": 2.617021276595745, |
| "grad_norm": 10.886700630187988, |
| "learning_rate": 6.384845091452035e-06, |
| "loss": 1.8017, |
| "step": 23370 |
| }, |
| { |
| "epoch": 2.618141097424412, |
| "grad_norm": 12.989828109741211, |
| "learning_rate": 6.366181410974244e-06, |
| "loss": 1.6722, |
| "step": 23380 |
| }, |
| { |
| "epoch": 2.6192609182530795, |
| "grad_norm": 6.532835960388184, |
| "learning_rate": 6.347517730496454e-06, |
| "loss": 1.4352, |
| "step": 23390 |
| }, |
| { |
| "epoch": 2.620380739081747, |
| "grad_norm": 4.241251468658447, |
| "learning_rate": 6.328854050018663e-06, |
| "loss": 1.3917, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.621500559910414, |
| "grad_norm": 14.074196815490723, |
| "learning_rate": 6.310190369540873e-06, |
| "loss": 1.6827, |
| "step": 23410 |
| }, |
| { |
| "epoch": 2.6226203807390815, |
| "grad_norm": 12.457422256469727, |
| "learning_rate": 6.291526689063083e-06, |
| "loss": 1.9047, |
| "step": 23420 |
| }, |
| { |
| "epoch": 2.623740201567749, |
| "grad_norm": 5.373779296875, |
| "learning_rate": 6.272863008585293e-06, |
| "loss": 1.9629, |
| "step": 23430 |
| }, |
| { |
| "epoch": 2.6248600223964167, |
| "grad_norm": 9.897968292236328, |
| "learning_rate": 6.254199328107503e-06, |
| "loss": 2.0395, |
| "step": 23440 |
| }, |
| { |
| "epoch": 2.625979843225084, |
| "grad_norm": 8.989608764648438, |
| "learning_rate": 6.235535647629713e-06, |
| "loss": 2.2198, |
| "step": 23450 |
| }, |
| { |
| "epoch": 2.6270996640537514, |
| "grad_norm": 4.570735931396484, |
| "learning_rate": 6.216871967151923e-06, |
| "loss": 1.5039, |
| "step": 23460 |
| }, |
| { |
| "epoch": 2.6282194848824187, |
| "grad_norm": 5.221905708312988, |
| "learning_rate": 6.198208286674132e-06, |
| "loss": 1.9416, |
| "step": 23470 |
| }, |
| { |
| "epoch": 2.629339305711086, |
| "grad_norm": 12.316040992736816, |
| "learning_rate": 6.179544606196342e-06, |
| "loss": 1.5188, |
| "step": 23480 |
| }, |
| { |
| "epoch": 2.630459126539754, |
| "grad_norm": 7.457785606384277, |
| "learning_rate": 6.160880925718552e-06, |
| "loss": 1.8277, |
| "step": 23490 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 2.610050916671753, |
| "learning_rate": 6.1422172452407615e-06, |
| "loss": 1.8489, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.6326987681970886, |
| "grad_norm": 18.59623908996582, |
| "learning_rate": 6.1235535647629715e-06, |
| "loss": 2.0608, |
| "step": 23510 |
| }, |
| { |
| "epoch": 2.633818589025756, |
| "grad_norm": 12.009276390075684, |
| "learning_rate": 6.1048898842851815e-06, |
| "loss": 1.4504, |
| "step": 23520 |
| }, |
| { |
| "epoch": 2.6349384098544233, |
| "grad_norm": 5.837096214294434, |
| "learning_rate": 6.086226203807391e-06, |
| "loss": 1.832, |
| "step": 23530 |
| }, |
| { |
| "epoch": 2.6360582306830906, |
| "grad_norm": 7.26812744140625, |
| "learning_rate": 6.067562523329601e-06, |
| "loss": 1.9916, |
| "step": 23540 |
| }, |
| { |
| "epoch": 2.637178051511758, |
| "grad_norm": 12.086437225341797, |
| "learning_rate": 6.048898842851811e-06, |
| "loss": 2.0041, |
| "step": 23550 |
| }, |
| { |
| "epoch": 2.6382978723404253, |
| "grad_norm": 13.357325553894043, |
| "learning_rate": 6.030235162374021e-06, |
| "loss": 1.9994, |
| "step": 23560 |
| }, |
| { |
| "epoch": 2.6394176931690927, |
| "grad_norm": 5.371031284332275, |
| "learning_rate": 6.011571481896231e-06, |
| "loss": 2.0408, |
| "step": 23570 |
| }, |
| { |
| "epoch": 2.6405375139977605, |
| "grad_norm": 5.823469638824463, |
| "learning_rate": 5.992907801418441e-06, |
| "loss": 1.8143, |
| "step": 23580 |
| }, |
| { |
| "epoch": 2.641657334826428, |
| "grad_norm": 14.469114303588867, |
| "learning_rate": 5.97424412094065e-06, |
| "loss": 1.7717, |
| "step": 23590 |
| }, |
| { |
| "epoch": 2.642777155655095, |
| "grad_norm": 5.268034934997559, |
| "learning_rate": 5.955580440462859e-06, |
| "loss": 1.4735, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.6438969764837625, |
| "grad_norm": 9.155729293823242, |
| "learning_rate": 5.936916759985069e-06, |
| "loss": 1.6729, |
| "step": 23610 |
| }, |
| { |
| "epoch": 2.64501679731243, |
| "grad_norm": 6.211864471435547, |
| "learning_rate": 5.918253079507279e-06, |
| "loss": 1.8493, |
| "step": 23620 |
| }, |
| { |
| "epoch": 2.6461366181410977, |
| "grad_norm": 6.068511962890625, |
| "learning_rate": 5.899589399029489e-06, |
| "loss": 1.5881, |
| "step": 23630 |
| }, |
| { |
| "epoch": 2.647256438969765, |
| "grad_norm": 9.574570655822754, |
| "learning_rate": 5.880925718551699e-06, |
| "loss": 2.2456, |
| "step": 23640 |
| }, |
| { |
| "epoch": 2.6483762597984324, |
| "grad_norm": 17.160005569458008, |
| "learning_rate": 5.862262038073908e-06, |
| "loss": 2.0387, |
| "step": 23650 |
| }, |
| { |
| "epoch": 2.6494960806270997, |
| "grad_norm": 9.527430534362793, |
| "learning_rate": 5.843598357596118e-06, |
| "loss": 1.9547, |
| "step": 23660 |
| }, |
| { |
| "epoch": 2.650615901455767, |
| "grad_norm": 4.618621349334717, |
| "learning_rate": 5.824934677118328e-06, |
| "loss": 2.0931, |
| "step": 23670 |
| }, |
| { |
| "epoch": 2.6517357222844344, |
| "grad_norm": 8.613204002380371, |
| "learning_rate": 5.806270996640538e-06, |
| "loss": 1.9036, |
| "step": 23680 |
| }, |
| { |
| "epoch": 2.652855543113102, |
| "grad_norm": 6.867215156555176, |
| "learning_rate": 5.787607316162748e-06, |
| "loss": 1.6778, |
| "step": 23690 |
| }, |
| { |
| "epoch": 2.653975363941769, |
| "grad_norm": 7.841908931732178, |
| "learning_rate": 5.768943635684957e-06, |
| "loss": 1.8852, |
| "step": 23700 |
| }, |
| { |
| "epoch": 2.6550951847704365, |
| "grad_norm": 16.442686080932617, |
| "learning_rate": 5.7502799552071665e-06, |
| "loss": 2.0865, |
| "step": 23710 |
| }, |
| { |
| "epoch": 2.6562150055991043, |
| "grad_norm": 6.369174957275391, |
| "learning_rate": 5.7316162747293765e-06, |
| "loss": 2.0048, |
| "step": 23720 |
| }, |
| { |
| "epoch": 2.6573348264277716, |
| "grad_norm": 9.311386108398438, |
| "learning_rate": 5.7129525942515865e-06, |
| "loss": 2.0094, |
| "step": 23730 |
| }, |
| { |
| "epoch": 2.658454647256439, |
| "grad_norm": 20.149980545043945, |
| "learning_rate": 5.6942889137737965e-06, |
| "loss": 2.0632, |
| "step": 23740 |
| }, |
| { |
| "epoch": 2.6595744680851063, |
| "grad_norm": 6.561110496520996, |
| "learning_rate": 5.6756252332960065e-06, |
| "loss": 1.6642, |
| "step": 23750 |
| }, |
| { |
| "epoch": 2.6606942889137737, |
| "grad_norm": 12.71684455871582, |
| "learning_rate": 5.656961552818216e-06, |
| "loss": 1.8664, |
| "step": 23760 |
| }, |
| { |
| "epoch": 2.6618141097424415, |
| "grad_norm": 11.952552795410156, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 2.1605, |
| "step": 23770 |
| }, |
| { |
| "epoch": 2.662933930571109, |
| "grad_norm": 6.831346035003662, |
| "learning_rate": 5.619634191862636e-06, |
| "loss": 2.0142, |
| "step": 23780 |
| }, |
| { |
| "epoch": 2.664053751399776, |
| "grad_norm": 6.235137939453125, |
| "learning_rate": 5.600970511384846e-06, |
| "loss": 1.8274, |
| "step": 23790 |
| }, |
| { |
| "epoch": 2.6651735722284435, |
| "grad_norm": 6.040173530578613, |
| "learning_rate": 5.582306830907056e-06, |
| "loss": 1.8131, |
| "step": 23800 |
| }, |
| { |
| "epoch": 2.666293393057111, |
| "grad_norm": 5.556991100311279, |
| "learning_rate": 5.563643150429265e-06, |
| "loss": 1.8855, |
| "step": 23810 |
| }, |
| { |
| "epoch": 2.6674132138857782, |
| "grad_norm": 12.537089347839355, |
| "learning_rate": 5.544979469951474e-06, |
| "loss": 2.0187, |
| "step": 23820 |
| }, |
| { |
| "epoch": 2.6685330347144456, |
| "grad_norm": 6.158973693847656, |
| "learning_rate": 5.526315789473684e-06, |
| "loss": 1.9405, |
| "step": 23830 |
| }, |
| { |
| "epoch": 2.669652855543113, |
| "grad_norm": 7.678884029388428, |
| "learning_rate": 5.507652108995894e-06, |
| "loss": 1.6122, |
| "step": 23840 |
| }, |
| { |
| "epoch": 2.6707726763717803, |
| "grad_norm": 7.427279472351074, |
| "learning_rate": 5.488988428518104e-06, |
| "loss": 2.3574, |
| "step": 23850 |
| }, |
| { |
| "epoch": 2.671892497200448, |
| "grad_norm": 7.1111369132995605, |
| "learning_rate": 5.470324748040314e-06, |
| "loss": 2.1574, |
| "step": 23860 |
| }, |
| { |
| "epoch": 2.6730123180291154, |
| "grad_norm": 6.275892734527588, |
| "learning_rate": 5.451661067562524e-06, |
| "loss": 2.0668, |
| "step": 23870 |
| }, |
| { |
| "epoch": 2.674132138857783, |
| "grad_norm": 6.145971775054932, |
| "learning_rate": 5.432997387084733e-06, |
| "loss": 1.7284, |
| "step": 23880 |
| }, |
| { |
| "epoch": 2.67525195968645, |
| "grad_norm": 15.736746788024902, |
| "learning_rate": 5.414333706606943e-06, |
| "loss": 1.9066, |
| "step": 23890 |
| }, |
| { |
| "epoch": 2.6763717805151175, |
| "grad_norm": 15.421698570251465, |
| "learning_rate": 5.395670026129153e-06, |
| "loss": 1.8727, |
| "step": 23900 |
| }, |
| { |
| "epoch": 2.677491601343785, |
| "grad_norm": 8.590100288391113, |
| "learning_rate": 5.377006345651362e-06, |
| "loss": 2.0051, |
| "step": 23910 |
| }, |
| { |
| "epoch": 2.6786114221724526, |
| "grad_norm": 5.847489356994629, |
| "learning_rate": 5.358342665173572e-06, |
| "loss": 1.8494, |
| "step": 23920 |
| }, |
| { |
| "epoch": 2.67973124300112, |
| "grad_norm": 7.782309055328369, |
| "learning_rate": 5.339678984695782e-06, |
| "loss": 1.5807, |
| "step": 23930 |
| }, |
| { |
| "epoch": 2.6808510638297873, |
| "grad_norm": 12.278881072998047, |
| "learning_rate": 5.3210153042179915e-06, |
| "loss": 1.7016, |
| "step": 23940 |
| }, |
| { |
| "epoch": 2.6819708846584547, |
| "grad_norm": 8.289608001708984, |
| "learning_rate": 5.3023516237402016e-06, |
| "loss": 1.9277, |
| "step": 23950 |
| }, |
| { |
| "epoch": 2.683090705487122, |
| "grad_norm": 10.736886024475098, |
| "learning_rate": 5.2836879432624116e-06, |
| "loss": 1.6973, |
| "step": 23960 |
| }, |
| { |
| "epoch": 2.6842105263157894, |
| "grad_norm": 10.661380767822266, |
| "learning_rate": 5.2650242627846216e-06, |
| "loss": 1.8526, |
| "step": 23970 |
| }, |
| { |
| "epoch": 2.6853303471444567, |
| "grad_norm": 13.474076271057129, |
| "learning_rate": 5.2463605823068316e-06, |
| "loss": 1.901, |
| "step": 23980 |
| }, |
| { |
| "epoch": 2.686450167973124, |
| "grad_norm": 18.428157806396484, |
| "learning_rate": 5.227696901829041e-06, |
| "loss": 1.6495, |
| "step": 23990 |
| }, |
| { |
| "epoch": 2.687569988801792, |
| "grad_norm": 9.425440788269043, |
| "learning_rate": 5.209033221351251e-06, |
| "loss": 1.563, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.6886898096304592, |
| "grad_norm": 6.908605575561523, |
| "learning_rate": 5.190369540873461e-06, |
| "loss": 1.6201, |
| "step": 24010 |
| }, |
| { |
| "epoch": 2.6898096304591266, |
| "grad_norm": 14.704828262329102, |
| "learning_rate": 5.17170586039567e-06, |
| "loss": 1.9403, |
| "step": 24020 |
| }, |
| { |
| "epoch": 2.690929451287794, |
| "grad_norm": 6.244283676147461, |
| "learning_rate": 5.15304217991788e-06, |
| "loss": 1.8803, |
| "step": 24030 |
| }, |
| { |
| "epoch": 2.6920492721164613, |
| "grad_norm": 5.735403537750244, |
| "learning_rate": 5.13437849944009e-06, |
| "loss": 1.7747, |
| "step": 24040 |
| }, |
| { |
| "epoch": 2.6931690929451286, |
| "grad_norm": 10.276829719543457, |
| "learning_rate": 5.115714818962299e-06, |
| "loss": 1.4198, |
| "step": 24050 |
| }, |
| { |
| "epoch": 2.6942889137737964, |
| "grad_norm": 12.467009544372559, |
| "learning_rate": 5.097051138484509e-06, |
| "loss": 1.7507, |
| "step": 24060 |
| }, |
| { |
| "epoch": 2.6954087346024638, |
| "grad_norm": 5.5821685791015625, |
| "learning_rate": 5.078387458006719e-06, |
| "loss": 1.9415, |
| "step": 24070 |
| }, |
| { |
| "epoch": 2.696528555431131, |
| "grad_norm": 5.4152092933654785, |
| "learning_rate": 5.059723777528929e-06, |
| "loss": 1.8896, |
| "step": 24080 |
| }, |
| { |
| "epoch": 2.6976483762597985, |
| "grad_norm": 4.517449855804443, |
| "learning_rate": 5.041060097051139e-06, |
| "loss": 1.4663, |
| "step": 24090 |
| }, |
| { |
| "epoch": 2.698768197088466, |
| "grad_norm": 4.125208377838135, |
| "learning_rate": 5.022396416573349e-06, |
| "loss": 1.7891, |
| "step": 24100 |
| }, |
| { |
| "epoch": 2.699888017917133, |
| "grad_norm": 9.702006340026855, |
| "learning_rate": 5.003732736095558e-06, |
| "loss": 1.8673, |
| "step": 24110 |
| }, |
| { |
| "epoch": 2.7010078387458005, |
| "grad_norm": 6.5520524978637695, |
| "learning_rate": 4.985069055617768e-06, |
| "loss": 1.216, |
| "step": 24120 |
| }, |
| { |
| "epoch": 2.702127659574468, |
| "grad_norm": 6.257602214813232, |
| "learning_rate": 4.966405375139977e-06, |
| "loss": 1.8753, |
| "step": 24130 |
| }, |
| { |
| "epoch": 2.7032474804031352, |
| "grad_norm": 14.424186706542969, |
| "learning_rate": 4.947741694662187e-06, |
| "loss": 1.7425, |
| "step": 24140 |
| }, |
| { |
| "epoch": 2.704367301231803, |
| "grad_norm": 8.422646522521973, |
| "learning_rate": 4.929078014184397e-06, |
| "loss": 1.8206, |
| "step": 24150 |
| }, |
| { |
| "epoch": 2.7054871220604704, |
| "grad_norm": 12.657279968261719, |
| "learning_rate": 4.910414333706607e-06, |
| "loss": 1.8706, |
| "step": 24160 |
| }, |
| { |
| "epoch": 2.7066069428891377, |
| "grad_norm": 11.817652702331543, |
| "learning_rate": 4.8917506532288166e-06, |
| "loss": 1.5599, |
| "step": 24170 |
| }, |
| { |
| "epoch": 2.707726763717805, |
| "grad_norm": 10.890949249267578, |
| "learning_rate": 4.873086972751027e-06, |
| "loss": 1.8615, |
| "step": 24180 |
| }, |
| { |
| "epoch": 2.7088465845464724, |
| "grad_norm": 23.385997772216797, |
| "learning_rate": 4.854423292273237e-06, |
| "loss": 2.2412, |
| "step": 24190 |
| }, |
| { |
| "epoch": 2.70996640537514, |
| "grad_norm": 5.702988147735596, |
| "learning_rate": 4.835759611795447e-06, |
| "loss": 2.1498, |
| "step": 24200 |
| }, |
| { |
| "epoch": 2.7110862262038076, |
| "grad_norm": 14.075940132141113, |
| "learning_rate": 4.817095931317657e-06, |
| "loss": 2.0775, |
| "step": 24210 |
| }, |
| { |
| "epoch": 2.712206047032475, |
| "grad_norm": 7.462947845458984, |
| "learning_rate": 4.798432250839866e-06, |
| "loss": 1.7159, |
| "step": 24220 |
| }, |
| { |
| "epoch": 2.7133258678611423, |
| "grad_norm": 7.062658786773682, |
| "learning_rate": 4.779768570362076e-06, |
| "loss": 2.0002, |
| "step": 24230 |
| }, |
| { |
| "epoch": 2.7144456886898096, |
| "grad_norm": 4.55973482131958, |
| "learning_rate": 4.761104889884285e-06, |
| "loss": 1.6569, |
| "step": 24240 |
| }, |
| { |
| "epoch": 2.715565509518477, |
| "grad_norm": 5.43080997467041, |
| "learning_rate": 4.742441209406495e-06, |
| "loss": 1.8489, |
| "step": 24250 |
| }, |
| { |
| "epoch": 2.7166853303471443, |
| "grad_norm": 2.3447320461273193, |
| "learning_rate": 4.723777528928705e-06, |
| "loss": 1.8083, |
| "step": 24260 |
| }, |
| { |
| "epoch": 2.7178051511758117, |
| "grad_norm": 5.452301979064941, |
| "learning_rate": 4.705113848450915e-06, |
| "loss": 1.2504, |
| "step": 24270 |
| }, |
| { |
| "epoch": 2.718924972004479, |
| "grad_norm": 5.787363052368164, |
| "learning_rate": 4.686450167973125e-06, |
| "loss": 1.8861, |
| "step": 24280 |
| }, |
| { |
| "epoch": 2.720044792833147, |
| "grad_norm": 10.703194618225098, |
| "learning_rate": 4.667786487495334e-06, |
| "loss": 1.8052, |
| "step": 24290 |
| }, |
| { |
| "epoch": 2.721164613661814, |
| "grad_norm": 4.210628032684326, |
| "learning_rate": 4.649122807017544e-06, |
| "loss": 1.3951, |
| "step": 24300 |
| }, |
| { |
| "epoch": 2.7222844344904815, |
| "grad_norm": 10.037901878356934, |
| "learning_rate": 4.630459126539754e-06, |
| "loss": 1.4718, |
| "step": 24310 |
| }, |
| { |
| "epoch": 2.723404255319149, |
| "grad_norm": 8.047080993652344, |
| "learning_rate": 4.611795446061964e-06, |
| "loss": 1.9238, |
| "step": 24320 |
| }, |
| { |
| "epoch": 2.724524076147816, |
| "grad_norm": 7.7797980308532715, |
| "learning_rate": 4.593131765584173e-06, |
| "loss": 1.3415, |
| "step": 24330 |
| }, |
| { |
| "epoch": 2.725643896976484, |
| "grad_norm": 10.15149211883545, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 1.6493, |
| "step": 24340 |
| }, |
| { |
| "epoch": 2.7267637178051514, |
| "grad_norm": 9.764996528625488, |
| "learning_rate": 4.555804404628592e-06, |
| "loss": 1.9219, |
| "step": 24350 |
| }, |
| { |
| "epoch": 2.7278835386338187, |
| "grad_norm": 5.193393230438232, |
| "learning_rate": 4.5371407241508024e-06, |
| "loss": 1.9921, |
| "step": 24360 |
| }, |
| { |
| "epoch": 2.729003359462486, |
| "grad_norm": 6.264823913574219, |
| "learning_rate": 4.5184770436730124e-06, |
| "loss": 1.8523, |
| "step": 24370 |
| }, |
| { |
| "epoch": 2.7301231802911534, |
| "grad_norm": 7.00139045715332, |
| "learning_rate": 4.4998133631952224e-06, |
| "loss": 1.6758, |
| "step": 24380 |
| }, |
| { |
| "epoch": 2.7312430011198208, |
| "grad_norm": 12.917388916015625, |
| "learning_rate": 4.4811496827174324e-06, |
| "loss": 1.4985, |
| "step": 24390 |
| }, |
| { |
| "epoch": 2.732362821948488, |
| "grad_norm": 8.675110816955566, |
| "learning_rate": 4.462486002239642e-06, |
| "loss": 1.711, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.7334826427771555, |
| "grad_norm": 19.992246627807617, |
| "learning_rate": 4.443822321761852e-06, |
| "loss": 1.9475, |
| "step": 24410 |
| }, |
| { |
| "epoch": 2.734602463605823, |
| "grad_norm": 11.201859474182129, |
| "learning_rate": 4.425158641284062e-06, |
| "loss": 1.5635, |
| "step": 24420 |
| }, |
| { |
| "epoch": 2.7357222844344906, |
| "grad_norm": 16.30078887939453, |
| "learning_rate": 4.406494960806272e-06, |
| "loss": 1.5639, |
| "step": 24430 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 6.638637542724609, |
| "learning_rate": 4.387831280328481e-06, |
| "loss": 1.5362, |
| "step": 24440 |
| }, |
| { |
| "epoch": 2.7379619260918253, |
| "grad_norm": 4.872713088989258, |
| "learning_rate": 4.369167599850691e-06, |
| "loss": 1.8645, |
| "step": 24450 |
| }, |
| { |
| "epoch": 2.7390817469204927, |
| "grad_norm": 7.324185848236084, |
| "learning_rate": 4.3505039193729e-06, |
| "loss": 1.8918, |
| "step": 24460 |
| }, |
| { |
| "epoch": 2.74020156774916, |
| "grad_norm": 4.039488792419434, |
| "learning_rate": 4.33184023889511e-06, |
| "loss": 1.6727, |
| "step": 24470 |
| }, |
| { |
| "epoch": 2.741321388577828, |
| "grad_norm": 11.2632417678833, |
| "learning_rate": 4.31317655841732e-06, |
| "loss": 1.7408, |
| "step": 24480 |
| }, |
| { |
| "epoch": 2.742441209406495, |
| "grad_norm": 5.795408725738525, |
| "learning_rate": 4.29451287793953e-06, |
| "loss": 1.7775, |
| "step": 24490 |
| }, |
| { |
| "epoch": 2.7435610302351625, |
| "grad_norm": 11.392952919006348, |
| "learning_rate": 4.27584919746174e-06, |
| "loss": 2.0474, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.74468085106383, |
| "grad_norm": 13.791424751281738, |
| "learning_rate": 4.25718551698395e-06, |
| "loss": 1.8472, |
| "step": 24510 |
| }, |
| { |
| "epoch": 2.745800671892497, |
| "grad_norm": 5.293121337890625, |
| "learning_rate": 4.238521836506159e-06, |
| "loss": 1.7227, |
| "step": 24520 |
| }, |
| { |
| "epoch": 2.7469204927211646, |
| "grad_norm": 12.450265884399414, |
| "learning_rate": 4.219858156028369e-06, |
| "loss": 1.8579, |
| "step": 24530 |
| }, |
| { |
| "epoch": 2.748040313549832, |
| "grad_norm": 15.723026275634766, |
| "learning_rate": 4.201194475550579e-06, |
| "loss": 1.8304, |
| "step": 24540 |
| }, |
| { |
| "epoch": 2.7491601343784993, |
| "grad_norm": 9.901751518249512, |
| "learning_rate": 4.182530795072788e-06, |
| "loss": 1.7486, |
| "step": 24550 |
| }, |
| { |
| "epoch": 2.7502799552071666, |
| "grad_norm": 4.100079536437988, |
| "learning_rate": 4.163867114594998e-06, |
| "loss": 1.9, |
| "step": 24560 |
| }, |
| { |
| "epoch": 2.7513997760358344, |
| "grad_norm": 12.591753005981445, |
| "learning_rate": 4.145203434117208e-06, |
| "loss": 1.8743, |
| "step": 24570 |
| }, |
| { |
| "epoch": 2.7525195968645018, |
| "grad_norm": 15.76544189453125, |
| "learning_rate": 4.1265397536394174e-06, |
| "loss": 1.8585, |
| "step": 24580 |
| }, |
| { |
| "epoch": 2.753639417693169, |
| "grad_norm": 16.583786010742188, |
| "learning_rate": 4.1078760731616274e-06, |
| "loss": 2.0335, |
| "step": 24590 |
| }, |
| { |
| "epoch": 2.7547592385218365, |
| "grad_norm": 4.9236741065979, |
| "learning_rate": 4.0892123926838375e-06, |
| "loss": 1.7419, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.755879059350504, |
| "grad_norm": 11.055110931396484, |
| "learning_rate": 4.0705487122060475e-06, |
| "loss": 1.8933, |
| "step": 24610 |
| }, |
| { |
| "epoch": 2.7569988801791716, |
| "grad_norm": 5.0752153396606445, |
| "learning_rate": 4.0518850317282575e-06, |
| "loss": 2.1183, |
| "step": 24620 |
| }, |
| { |
| "epoch": 2.758118701007839, |
| "grad_norm": 16.526071548461914, |
| "learning_rate": 4.0332213512504675e-06, |
| "loss": 2.0595, |
| "step": 24630 |
| }, |
| { |
| "epoch": 2.7592385218365063, |
| "grad_norm": 9.256998062133789, |
| "learning_rate": 4.014557670772677e-06, |
| "loss": 2.0778, |
| "step": 24640 |
| }, |
| { |
| "epoch": 2.7603583426651737, |
| "grad_norm": 5.081698894500732, |
| "learning_rate": 3.995893990294887e-06, |
| "loss": 2.3567, |
| "step": 24650 |
| }, |
| { |
| "epoch": 2.761478163493841, |
| "grad_norm": 6.34022855758667, |
| "learning_rate": 3.977230309817096e-06, |
| "loss": 1.7605, |
| "step": 24660 |
| }, |
| { |
| "epoch": 2.7625979843225084, |
| "grad_norm": 13.629969596862793, |
| "learning_rate": 3.958566629339306e-06, |
| "loss": 1.7667, |
| "step": 24670 |
| }, |
| { |
| "epoch": 2.7637178051511757, |
| "grad_norm": 2.7139463424682617, |
| "learning_rate": 3.939902948861516e-06, |
| "loss": 1.9269, |
| "step": 24680 |
| }, |
| { |
| "epoch": 2.764837625979843, |
| "grad_norm": 8.121241569519043, |
| "learning_rate": 3.921239268383725e-06, |
| "loss": 2.0188, |
| "step": 24690 |
| }, |
| { |
| "epoch": 2.7659574468085104, |
| "grad_norm": 8.049278259277344, |
| "learning_rate": 3.902575587905935e-06, |
| "loss": 1.6694, |
| "step": 24700 |
| }, |
| { |
| "epoch": 2.767077267637178, |
| "grad_norm": 11.107040405273438, |
| "learning_rate": 3.883911907428145e-06, |
| "loss": 2.3154, |
| "step": 24710 |
| }, |
| { |
| "epoch": 2.7681970884658456, |
| "grad_norm": 6.505083084106445, |
| "learning_rate": 3.865248226950355e-06, |
| "loss": 1.8864, |
| "step": 24720 |
| }, |
| { |
| "epoch": 2.769316909294513, |
| "grad_norm": 10.971221923828125, |
| "learning_rate": 3.846584546472565e-06, |
| "loss": 1.9134, |
| "step": 24730 |
| }, |
| { |
| "epoch": 2.7704367301231803, |
| "grad_norm": 4.81821870803833, |
| "learning_rate": 3.827920865994775e-06, |
| "loss": 1.8286, |
| "step": 24740 |
| }, |
| { |
| "epoch": 2.7715565509518476, |
| "grad_norm": 11.385892868041992, |
| "learning_rate": 3.8092571855169837e-06, |
| "loss": 2.0084, |
| "step": 24750 |
| }, |
| { |
| "epoch": 2.772676371780515, |
| "grad_norm": 5.368199825286865, |
| "learning_rate": 3.7905935050391937e-06, |
| "loss": 1.6912, |
| "step": 24760 |
| }, |
| { |
| "epoch": 2.7737961926091828, |
| "grad_norm": 8.81826400756836, |
| "learning_rate": 3.7719298245614037e-06, |
| "loss": 1.9957, |
| "step": 24770 |
| }, |
| { |
| "epoch": 2.77491601343785, |
| "grad_norm": 11.901360511779785, |
| "learning_rate": 3.7532661440836137e-06, |
| "loss": 2.0144, |
| "step": 24780 |
| }, |
| { |
| "epoch": 2.7760358342665175, |
| "grad_norm": 4.292434215545654, |
| "learning_rate": 3.7346024636058233e-06, |
| "loss": 1.7567, |
| "step": 24790 |
| }, |
| { |
| "epoch": 2.777155655095185, |
| "grad_norm": 11.836398124694824, |
| "learning_rate": 3.7159387831280333e-06, |
| "loss": 2.2476, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.778275475923852, |
| "grad_norm": 5.869718551635742, |
| "learning_rate": 3.6972751026502425e-06, |
| "loss": 1.7245, |
| "step": 24810 |
| }, |
| { |
| "epoch": 2.7793952967525195, |
| "grad_norm": 17.10307502746582, |
| "learning_rate": 3.6786114221724525e-06, |
| "loss": 1.8758, |
| "step": 24820 |
| }, |
| { |
| "epoch": 2.780515117581187, |
| "grad_norm": 12.18902587890625, |
| "learning_rate": 3.6599477416946625e-06, |
| "loss": 1.7381, |
| "step": 24830 |
| }, |
| { |
| "epoch": 2.781634938409854, |
| "grad_norm": 6.835455894470215, |
| "learning_rate": 3.641284061216872e-06, |
| "loss": 1.7884, |
| "step": 24840 |
| }, |
| { |
| "epoch": 2.782754759238522, |
| "grad_norm": 4.9272541999816895, |
| "learning_rate": 3.622620380739082e-06, |
| "loss": 1.7644, |
| "step": 24850 |
| }, |
| { |
| "epoch": 2.7838745800671894, |
| "grad_norm": 8.834504127502441, |
| "learning_rate": 3.603956700261292e-06, |
| "loss": 1.7862, |
| "step": 24860 |
| }, |
| { |
| "epoch": 2.7849944008958567, |
| "grad_norm": 4.071360111236572, |
| "learning_rate": 3.5852930197835012e-06, |
| "loss": 1.8609, |
| "step": 24870 |
| }, |
| { |
| "epoch": 2.786114221724524, |
| "grad_norm": 10.676229476928711, |
| "learning_rate": 3.5666293393057112e-06, |
| "loss": 2.1334, |
| "step": 24880 |
| }, |
| { |
| "epoch": 2.7872340425531914, |
| "grad_norm": 15.198076248168945, |
| "learning_rate": 3.547965658827921e-06, |
| "loss": 2.2799, |
| "step": 24890 |
| }, |
| { |
| "epoch": 2.7883538633818588, |
| "grad_norm": 6.402156829833984, |
| "learning_rate": 3.529301978350131e-06, |
| "loss": 2.0125, |
| "step": 24900 |
| }, |
| { |
| "epoch": 2.7894736842105265, |
| "grad_norm": 6.411627292633057, |
| "learning_rate": 3.510638297872341e-06, |
| "loss": 1.7944, |
| "step": 24910 |
| }, |
| { |
| "epoch": 2.790593505039194, |
| "grad_norm": 8.75882625579834, |
| "learning_rate": 3.491974617394551e-06, |
| "loss": 1.9815, |
| "step": 24920 |
| }, |
| { |
| "epoch": 2.7917133258678613, |
| "grad_norm": 7.310870170593262, |
| "learning_rate": 3.47331093691676e-06, |
| "loss": 1.8766, |
| "step": 24930 |
| }, |
| { |
| "epoch": 2.7928331466965286, |
| "grad_norm": 10.259556770324707, |
| "learning_rate": 3.45464725643897e-06, |
| "loss": 1.7763, |
| "step": 24940 |
| }, |
| { |
| "epoch": 2.793952967525196, |
| "grad_norm": 16.690889358520508, |
| "learning_rate": 3.4359835759611796e-06, |
| "loss": 1.8072, |
| "step": 24950 |
| }, |
| { |
| "epoch": 2.7950727883538633, |
| "grad_norm": 3.8095781803131104, |
| "learning_rate": 3.4173198954833896e-06, |
| "loss": 1.7118, |
| "step": 24960 |
| }, |
| { |
| "epoch": 2.7961926091825307, |
| "grad_norm": 14.5068941116333, |
| "learning_rate": 3.3986562150055996e-06, |
| "loss": 1.7467, |
| "step": 24970 |
| }, |
| { |
| "epoch": 2.797312430011198, |
| "grad_norm": 6.723850250244141, |
| "learning_rate": 3.3799925345278087e-06, |
| "loss": 1.9952, |
| "step": 24980 |
| }, |
| { |
| "epoch": 2.7984322508398654, |
| "grad_norm": 11.487224578857422, |
| "learning_rate": 3.3613288540500187e-06, |
| "loss": 2.0564, |
| "step": 24990 |
| }, |
| { |
| "epoch": 2.799552071668533, |
| "grad_norm": 8.214585304260254, |
| "learning_rate": 3.3426651735722283e-06, |
| "loss": 1.7448, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.8006718924972005, |
| "grad_norm": 17.024662017822266, |
| "learning_rate": 3.3240014930944383e-06, |
| "loss": 1.5265, |
| "step": 25010 |
| }, |
| { |
| "epoch": 2.801791713325868, |
| "grad_norm": 8.459342002868652, |
| "learning_rate": 3.3053378126166483e-06, |
| "loss": 1.9152, |
| "step": 25020 |
| }, |
| { |
| "epoch": 2.802911534154535, |
| "grad_norm": 8.775456428527832, |
| "learning_rate": 3.2866741321388583e-06, |
| "loss": 2.038, |
| "step": 25030 |
| }, |
| { |
| "epoch": 2.8040313549832026, |
| "grad_norm": 5.435427665710449, |
| "learning_rate": 3.2680104516610675e-06, |
| "loss": 1.9425, |
| "step": 25040 |
| }, |
| { |
| "epoch": 2.8051511758118703, |
| "grad_norm": 11.797080993652344, |
| "learning_rate": 3.2493467711832775e-06, |
| "loss": 2.1394, |
| "step": 25050 |
| }, |
| { |
| "epoch": 2.8062709966405377, |
| "grad_norm": 15.878313064575195, |
| "learning_rate": 3.230683090705487e-06, |
| "loss": 2.2649, |
| "step": 25060 |
| }, |
| { |
| "epoch": 2.807390817469205, |
| "grad_norm": 11.612027168273926, |
| "learning_rate": 3.212019410227697e-06, |
| "loss": 1.4949, |
| "step": 25070 |
| }, |
| { |
| "epoch": 2.8085106382978724, |
| "grad_norm": 14.034370422363281, |
| "learning_rate": 3.193355729749907e-06, |
| "loss": 1.2324, |
| "step": 25080 |
| }, |
| { |
| "epoch": 2.8096304591265397, |
| "grad_norm": 19.293161392211914, |
| "learning_rate": 3.174692049272117e-06, |
| "loss": 2.2584, |
| "step": 25090 |
| }, |
| { |
| "epoch": 2.810750279955207, |
| "grad_norm": 6.628214359283447, |
| "learning_rate": 3.1560283687943263e-06, |
| "loss": 1.5751, |
| "step": 25100 |
| }, |
| { |
| "epoch": 2.8118701007838744, |
| "grad_norm": 16.124217987060547, |
| "learning_rate": 3.137364688316536e-06, |
| "loss": 2.0243, |
| "step": 25110 |
| }, |
| { |
| "epoch": 2.812989921612542, |
| "grad_norm": 6.106673717498779, |
| "learning_rate": 3.118701007838746e-06, |
| "loss": 1.9384, |
| "step": 25120 |
| }, |
| { |
| "epoch": 2.814109742441209, |
| "grad_norm": 8.48365306854248, |
| "learning_rate": 3.100037327360956e-06, |
| "loss": 1.7521, |
| "step": 25130 |
| }, |
| { |
| "epoch": 2.815229563269877, |
| "grad_norm": 6.083116054534912, |
| "learning_rate": 3.0813736468831654e-06, |
| "loss": 1.5675, |
| "step": 25140 |
| }, |
| { |
| "epoch": 2.8163493840985443, |
| "grad_norm": 16.414230346679688, |
| "learning_rate": 3.0627099664053754e-06, |
| "loss": 1.8823, |
| "step": 25150 |
| }, |
| { |
| "epoch": 2.8174692049272116, |
| "grad_norm": 9.585153579711914, |
| "learning_rate": 3.044046285927585e-06, |
| "loss": 2.0315, |
| "step": 25160 |
| }, |
| { |
| "epoch": 2.818589025755879, |
| "grad_norm": 10.281465530395508, |
| "learning_rate": 3.0253826054497946e-06, |
| "loss": 1.8152, |
| "step": 25170 |
| }, |
| { |
| "epoch": 2.8197088465845463, |
| "grad_norm": 4.486020565032959, |
| "learning_rate": 3.0067189249720046e-06, |
| "loss": 1.5388, |
| "step": 25180 |
| }, |
| { |
| "epoch": 2.820828667413214, |
| "grad_norm": 9.82872200012207, |
| "learning_rate": 2.9880552444942146e-06, |
| "loss": 2.1895, |
| "step": 25190 |
| }, |
| { |
| "epoch": 2.8219484882418815, |
| "grad_norm": 13.074673652648926, |
| "learning_rate": 2.969391564016424e-06, |
| "loss": 2.0497, |
| "step": 25200 |
| }, |
| { |
| "epoch": 2.823068309070549, |
| "grad_norm": 14.259294509887695, |
| "learning_rate": 2.9507278835386338e-06, |
| "loss": 2.1442, |
| "step": 25210 |
| }, |
| { |
| "epoch": 2.824188129899216, |
| "grad_norm": 4.93138313293457, |
| "learning_rate": 2.9320642030608438e-06, |
| "loss": 1.7481, |
| "step": 25220 |
| }, |
| { |
| "epoch": 2.8253079507278835, |
| "grad_norm": 6.078362941741943, |
| "learning_rate": 2.9134005225830533e-06, |
| "loss": 1.7641, |
| "step": 25230 |
| }, |
| { |
| "epoch": 2.826427771556551, |
| "grad_norm": 12.861568450927734, |
| "learning_rate": 2.8947368421052634e-06, |
| "loss": 1.8018, |
| "step": 25240 |
| }, |
| { |
| "epoch": 2.8275475923852182, |
| "grad_norm": 8.707850456237793, |
| "learning_rate": 2.8760731616274734e-06, |
| "loss": 1.8521, |
| "step": 25250 |
| }, |
| { |
| "epoch": 2.8286674132138856, |
| "grad_norm": 12.562678337097168, |
| "learning_rate": 2.8574094811496825e-06, |
| "loss": 1.7405, |
| "step": 25260 |
| }, |
| { |
| "epoch": 2.829787234042553, |
| "grad_norm": 18.912614822387695, |
| "learning_rate": 2.8387458006718925e-06, |
| "loss": 1.7982, |
| "step": 25270 |
| }, |
| { |
| "epoch": 2.8309070548712207, |
| "grad_norm": 6.430877208709717, |
| "learning_rate": 2.8200821201941025e-06, |
| "loss": 1.924, |
| "step": 25280 |
| }, |
| { |
| "epoch": 2.832026875699888, |
| "grad_norm": 5.357717990875244, |
| "learning_rate": 2.801418439716312e-06, |
| "loss": 1.9311, |
| "step": 25290 |
| }, |
| { |
| "epoch": 2.8331466965285554, |
| "grad_norm": 13.666546821594238, |
| "learning_rate": 2.782754759238522e-06, |
| "loss": 1.6254, |
| "step": 25300 |
| }, |
| { |
| "epoch": 2.834266517357223, |
| "grad_norm": 3.9082486629486084, |
| "learning_rate": 2.7640910787607317e-06, |
| "loss": 1.9293, |
| "step": 25310 |
| }, |
| { |
| "epoch": 2.83538633818589, |
| "grad_norm": 5.5125732421875, |
| "learning_rate": 2.7454273982829413e-06, |
| "loss": 2.0829, |
| "step": 25320 |
| }, |
| { |
| "epoch": 2.836506159014558, |
| "grad_norm": 5.76453971862793, |
| "learning_rate": 2.7267637178051513e-06, |
| "loss": 1.9197, |
| "step": 25330 |
| }, |
| { |
| "epoch": 2.8376259798432253, |
| "grad_norm": 7.775246620178223, |
| "learning_rate": 2.7081000373273613e-06, |
| "loss": 1.8856, |
| "step": 25340 |
| }, |
| { |
| "epoch": 2.8387458006718926, |
| "grad_norm": 8.506511688232422, |
| "learning_rate": 2.689436356849571e-06, |
| "loss": 1.5753, |
| "step": 25350 |
| }, |
| { |
| "epoch": 2.83986562150056, |
| "grad_norm": 15.149114608764648, |
| "learning_rate": 2.670772676371781e-06, |
| "loss": 1.8317, |
| "step": 25360 |
| }, |
| { |
| "epoch": 2.8409854423292273, |
| "grad_norm": 11.696993827819824, |
| "learning_rate": 2.6521089958939904e-06, |
| "loss": 1.6111, |
| "step": 25370 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 4.9371209144592285, |
| "learning_rate": 2.6334453154162e-06, |
| "loss": 1.7954, |
| "step": 25380 |
| }, |
| { |
| "epoch": 2.843225083986562, |
| "grad_norm": 18.963680267333984, |
| "learning_rate": 2.61478163493841e-06, |
| "loss": 2.4235, |
| "step": 25390 |
| }, |
| { |
| "epoch": 2.8443449048152294, |
| "grad_norm": 11.938720703125, |
| "learning_rate": 2.59611795446062e-06, |
| "loss": 1.667, |
| "step": 25400 |
| }, |
| { |
| "epoch": 2.8454647256438967, |
| "grad_norm": 8.081610679626465, |
| "learning_rate": 2.5774542739828296e-06, |
| "loss": 1.9183, |
| "step": 25410 |
| }, |
| { |
| "epoch": 2.8465845464725645, |
| "grad_norm": 6.90621280670166, |
| "learning_rate": 2.558790593505039e-06, |
| "loss": 1.9125, |
| "step": 25420 |
| }, |
| { |
| "epoch": 2.847704367301232, |
| "grad_norm": 11.308591842651367, |
| "learning_rate": 2.540126913027249e-06, |
| "loss": 2.2834, |
| "step": 25430 |
| }, |
| { |
| "epoch": 2.8488241881298992, |
| "grad_norm": 12.649473190307617, |
| "learning_rate": 2.5214632325494588e-06, |
| "loss": 2.0221, |
| "step": 25440 |
| }, |
| { |
| "epoch": 2.8499440089585666, |
| "grad_norm": 2.6453304290771484, |
| "learning_rate": 2.502799552071669e-06, |
| "loss": 1.7468, |
| "step": 25450 |
| }, |
| { |
| "epoch": 2.851063829787234, |
| "grad_norm": 10.066814422607422, |
| "learning_rate": 2.484135871593879e-06, |
| "loss": 1.706, |
| "step": 25460 |
| }, |
| { |
| "epoch": 2.8521836506159017, |
| "grad_norm": 11.059213638305664, |
| "learning_rate": 2.465472191116088e-06, |
| "loss": 2.4896, |
| "step": 25470 |
| }, |
| { |
| "epoch": 2.853303471444569, |
| "grad_norm": 5.4998016357421875, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 1.8076, |
| "step": 25480 |
| }, |
| { |
| "epoch": 2.8544232922732364, |
| "grad_norm": 5.691712379455566, |
| "learning_rate": 2.4281448301605075e-06, |
| "loss": 1.9473, |
| "step": 25490 |
| }, |
| { |
| "epoch": 2.855543113101904, |
| "grad_norm": 15.464776992797852, |
| "learning_rate": 2.4094811496827175e-06, |
| "loss": 2.2398, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.856662933930571, |
| "grad_norm": 3.6245803833007812, |
| "learning_rate": 2.3908174692049275e-06, |
| "loss": 1.9342, |
| "step": 25510 |
| }, |
| { |
| "epoch": 2.8577827547592385, |
| "grad_norm": 8.159955978393555, |
| "learning_rate": 2.372153788727137e-06, |
| "loss": 1.6683, |
| "step": 25520 |
| }, |
| { |
| "epoch": 2.858902575587906, |
| "grad_norm": 11.253711700439453, |
| "learning_rate": 2.3534901082493467e-06, |
| "loss": 1.6143, |
| "step": 25530 |
| }, |
| { |
| "epoch": 2.860022396416573, |
| "grad_norm": 14.729676246643066, |
| "learning_rate": 2.3348264277715567e-06, |
| "loss": 1.8251, |
| "step": 25540 |
| }, |
| { |
| "epoch": 2.8611422172452405, |
| "grad_norm": 8.878775596618652, |
| "learning_rate": 2.3161627472937663e-06, |
| "loss": 1.8646, |
| "step": 25550 |
| }, |
| { |
| "epoch": 2.8622620380739083, |
| "grad_norm": 14.987347602844238, |
| "learning_rate": 2.2974990668159763e-06, |
| "loss": 1.9119, |
| "step": 25560 |
| }, |
| { |
| "epoch": 2.8633818589025757, |
| "grad_norm": 5.109477519989014, |
| "learning_rate": 2.2788353863381863e-06, |
| "loss": 2.2621, |
| "step": 25570 |
| }, |
| { |
| "epoch": 2.864501679731243, |
| "grad_norm": 15.59926700592041, |
| "learning_rate": 2.2601717058603955e-06, |
| "loss": 1.9845, |
| "step": 25580 |
| }, |
| { |
| "epoch": 2.8656215005599104, |
| "grad_norm": 4.6907057762146, |
| "learning_rate": 2.2415080253826055e-06, |
| "loss": 2.0773, |
| "step": 25590 |
| }, |
| { |
| "epoch": 2.8667413213885777, |
| "grad_norm": 6.02996826171875, |
| "learning_rate": 2.2228443449048155e-06, |
| "loss": 2.0783, |
| "step": 25600 |
| }, |
| { |
| "epoch": 2.867861142217245, |
| "grad_norm": 14.389623641967773, |
| "learning_rate": 2.204180664427025e-06, |
| "loss": 1.9496, |
| "step": 25610 |
| }, |
| { |
| "epoch": 2.868980963045913, |
| "grad_norm": 5.188795566558838, |
| "learning_rate": 2.185516983949235e-06, |
| "loss": 1.5794, |
| "step": 25620 |
| }, |
| { |
| "epoch": 2.8701007838745802, |
| "grad_norm": 11.492018699645996, |
| "learning_rate": 2.1668533034714446e-06, |
| "loss": 1.5701, |
| "step": 25630 |
| }, |
| { |
| "epoch": 2.8712206047032476, |
| "grad_norm": 7.7545366287231445, |
| "learning_rate": 2.1481896229936542e-06, |
| "loss": 1.9405, |
| "step": 25640 |
| }, |
| { |
| "epoch": 2.872340425531915, |
| "grad_norm": 6.0428314208984375, |
| "learning_rate": 2.1295259425158642e-06, |
| "loss": 1.6884, |
| "step": 25650 |
| }, |
| { |
| "epoch": 2.8734602463605823, |
| "grad_norm": 15.78642749786377, |
| "learning_rate": 2.1108622620380742e-06, |
| "loss": 1.9008, |
| "step": 25660 |
| }, |
| { |
| "epoch": 2.8745800671892496, |
| "grad_norm": 19.360076904296875, |
| "learning_rate": 2.092198581560284e-06, |
| "loss": 2.1805, |
| "step": 25670 |
| }, |
| { |
| "epoch": 2.875699888017917, |
| "grad_norm": 10.965484619140625, |
| "learning_rate": 2.0735349010824934e-06, |
| "loss": 1.8339, |
| "step": 25680 |
| }, |
| { |
| "epoch": 2.8768197088465843, |
| "grad_norm": 13.197468757629395, |
| "learning_rate": 2.0548712206047034e-06, |
| "loss": 1.7847, |
| "step": 25690 |
| }, |
| { |
| "epoch": 2.8779395296752517, |
| "grad_norm": 8.53710651397705, |
| "learning_rate": 2.036207540126913e-06, |
| "loss": 1.6177, |
| "step": 25700 |
| }, |
| { |
| "epoch": 2.8790593505039195, |
| "grad_norm": 5.556687355041504, |
| "learning_rate": 2.017543859649123e-06, |
| "loss": 1.8142, |
| "step": 25710 |
| }, |
| { |
| "epoch": 2.880179171332587, |
| "grad_norm": 8.486677169799805, |
| "learning_rate": 1.998880179171333e-06, |
| "loss": 2.013, |
| "step": 25720 |
| }, |
| { |
| "epoch": 2.881298992161254, |
| "grad_norm": 12.919852256774902, |
| "learning_rate": 1.9802164986935426e-06, |
| "loss": 1.7, |
| "step": 25730 |
| }, |
| { |
| "epoch": 2.8824188129899215, |
| "grad_norm": 9.491839408874512, |
| "learning_rate": 1.961552818215752e-06, |
| "loss": 1.881, |
| "step": 25740 |
| }, |
| { |
| "epoch": 2.883538633818589, |
| "grad_norm": 15.983736991882324, |
| "learning_rate": 1.942889137737962e-06, |
| "loss": 2.1679, |
| "step": 25750 |
| }, |
| { |
| "epoch": 2.8846584546472567, |
| "grad_norm": 5.595208644866943, |
| "learning_rate": 1.9242254572601717e-06, |
| "loss": 1.8901, |
| "step": 25760 |
| }, |
| { |
| "epoch": 2.885778275475924, |
| "grad_norm": 10.111395835876465, |
| "learning_rate": 1.9055617767823815e-06, |
| "loss": 2.3119, |
| "step": 25770 |
| }, |
| { |
| "epoch": 2.8868980963045914, |
| "grad_norm": 18.772340774536133, |
| "learning_rate": 1.8868980963045915e-06, |
| "loss": 1.9794, |
| "step": 25780 |
| }, |
| { |
| "epoch": 2.8880179171332587, |
| "grad_norm": 4.584385395050049, |
| "learning_rate": 1.8682344158268011e-06, |
| "loss": 1.7576, |
| "step": 25790 |
| }, |
| { |
| "epoch": 2.889137737961926, |
| "grad_norm": 17.171106338500977, |
| "learning_rate": 1.849570735349011e-06, |
| "loss": 2.2186, |
| "step": 25800 |
| }, |
| { |
| "epoch": 2.8902575587905934, |
| "grad_norm": 18.548585891723633, |
| "learning_rate": 1.8309070548712205e-06, |
| "loss": 2.2678, |
| "step": 25810 |
| }, |
| { |
| "epoch": 2.891377379619261, |
| "grad_norm": 4.9082417488098145, |
| "learning_rate": 1.8122433743934305e-06, |
| "loss": 1.9042, |
| "step": 25820 |
| }, |
| { |
| "epoch": 2.892497200447928, |
| "grad_norm": 15.952796936035156, |
| "learning_rate": 1.7935796939156403e-06, |
| "loss": 1.7798, |
| "step": 25830 |
| }, |
| { |
| "epoch": 2.8936170212765955, |
| "grad_norm": 12.888788223266602, |
| "learning_rate": 1.7749160134378499e-06, |
| "loss": 1.648, |
| "step": 25840 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 5.010805606842041, |
| "learning_rate": 1.7562523329600599e-06, |
| "loss": 1.8376, |
| "step": 25850 |
| }, |
| { |
| "epoch": 2.8958566629339306, |
| "grad_norm": 9.813081741333008, |
| "learning_rate": 1.7375886524822697e-06, |
| "loss": 1.846, |
| "step": 25860 |
| }, |
| { |
| "epoch": 2.896976483762598, |
| "grad_norm": 11.167447090148926, |
| "learning_rate": 1.7189249720044792e-06, |
| "loss": 1.8414, |
| "step": 25870 |
| }, |
| { |
| "epoch": 2.8980963045912653, |
| "grad_norm": 5.086580276489258, |
| "learning_rate": 1.700261291526689e-06, |
| "loss": 1.4907, |
| "step": 25880 |
| }, |
| { |
| "epoch": 2.8992161254199327, |
| "grad_norm": 12.3839693069458, |
| "learning_rate": 1.681597611048899e-06, |
| "loss": 1.8576, |
| "step": 25890 |
| }, |
| { |
| "epoch": 2.9003359462486005, |
| "grad_norm": 6.351990222930908, |
| "learning_rate": 1.6629339305711086e-06, |
| "loss": 1.8742, |
| "step": 25900 |
| }, |
| { |
| "epoch": 2.901455767077268, |
| "grad_norm": 5.9348978996276855, |
| "learning_rate": 1.6442702500933184e-06, |
| "loss": 2.1135, |
| "step": 25910 |
| }, |
| { |
| "epoch": 2.902575587905935, |
| "grad_norm": 6.191033363342285, |
| "learning_rate": 1.6256065696155284e-06, |
| "loss": 1.6505, |
| "step": 25920 |
| }, |
| { |
| "epoch": 2.9036954087346025, |
| "grad_norm": 6.510402202606201, |
| "learning_rate": 1.606942889137738e-06, |
| "loss": 1.8007, |
| "step": 25930 |
| }, |
| { |
| "epoch": 2.90481522956327, |
| "grad_norm": 7.332479953765869, |
| "learning_rate": 1.5882792086599478e-06, |
| "loss": 1.9839, |
| "step": 25940 |
| }, |
| { |
| "epoch": 2.9059350503919372, |
| "grad_norm": 3.8477463722229004, |
| "learning_rate": 1.5696155281821578e-06, |
| "loss": 2.1467, |
| "step": 25950 |
| }, |
| { |
| "epoch": 2.9070548712206046, |
| "grad_norm": 11.45783805847168, |
| "learning_rate": 1.5509518477043674e-06, |
| "loss": 1.9373, |
| "step": 25960 |
| }, |
| { |
| "epoch": 2.908174692049272, |
| "grad_norm": 18.16033363342285, |
| "learning_rate": 1.5322881672265772e-06, |
| "loss": 1.9102, |
| "step": 25970 |
| }, |
| { |
| "epoch": 2.9092945128779393, |
| "grad_norm": 14.75328254699707, |
| "learning_rate": 1.513624486748787e-06, |
| "loss": 1.5849, |
| "step": 25980 |
| }, |
| { |
| "epoch": 2.910414333706607, |
| "grad_norm": 7.447336196899414, |
| "learning_rate": 1.4949608062709968e-06, |
| "loss": 1.187, |
| "step": 25990 |
| }, |
| { |
| "epoch": 2.9115341545352744, |
| "grad_norm": 6.396903038024902, |
| "learning_rate": 1.4762971257932065e-06, |
| "loss": 1.6073, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.9126539753639418, |
| "grad_norm": 13.001298904418945, |
| "learning_rate": 1.4576334453154161e-06, |
| "loss": 2.1491, |
| "step": 26010 |
| }, |
| { |
| "epoch": 2.913773796192609, |
| "grad_norm": 12.818767547607422, |
| "learning_rate": 1.4389697648376261e-06, |
| "loss": 1.6316, |
| "step": 26020 |
| }, |
| { |
| "epoch": 2.9148936170212765, |
| "grad_norm": 11.24215030670166, |
| "learning_rate": 1.420306084359836e-06, |
| "loss": 2.058, |
| "step": 26030 |
| }, |
| { |
| "epoch": 2.9160134378499443, |
| "grad_norm": 5.04683780670166, |
| "learning_rate": 1.4016424038820455e-06, |
| "loss": 1.6477, |
| "step": 26040 |
| }, |
| { |
| "epoch": 2.9171332586786116, |
| "grad_norm": 8.69908618927002, |
| "learning_rate": 1.3829787234042553e-06, |
| "loss": 1.7106, |
| "step": 26050 |
| }, |
| { |
| "epoch": 2.918253079507279, |
| "grad_norm": 10.91391372680664, |
| "learning_rate": 1.364315042926465e-06, |
| "loss": 1.878, |
| "step": 26060 |
| }, |
| { |
| "epoch": 2.9193729003359463, |
| "grad_norm": 15.811108589172363, |
| "learning_rate": 1.3456513624486749e-06, |
| "loss": 2.1953, |
| "step": 26070 |
| }, |
| { |
| "epoch": 2.9204927211646137, |
| "grad_norm": 12.007214546203613, |
| "learning_rate": 1.3269876819708847e-06, |
| "loss": 2.0814, |
| "step": 26080 |
| }, |
| { |
| "epoch": 2.921612541993281, |
| "grad_norm": 11.011650085449219, |
| "learning_rate": 1.3083240014930945e-06, |
| "loss": 1.5842, |
| "step": 26090 |
| }, |
| { |
| "epoch": 2.9227323628219484, |
| "grad_norm": 8.247806549072266, |
| "learning_rate": 1.2896603210153043e-06, |
| "loss": 1.6496, |
| "step": 26100 |
| }, |
| { |
| "epoch": 2.9238521836506157, |
| "grad_norm": 8.519516944885254, |
| "learning_rate": 1.270996640537514e-06, |
| "loss": 2.2153, |
| "step": 26110 |
| }, |
| { |
| "epoch": 2.924972004479283, |
| "grad_norm": 8.969647407531738, |
| "learning_rate": 1.2523329600597239e-06, |
| "loss": 2.1363, |
| "step": 26120 |
| }, |
| { |
| "epoch": 2.926091825307951, |
| "grad_norm": 11.865436553955078, |
| "learning_rate": 1.2336692795819336e-06, |
| "loss": 2.0425, |
| "step": 26130 |
| }, |
| { |
| "epoch": 2.927211646136618, |
| "grad_norm": 20.383333206176758, |
| "learning_rate": 1.2150055991041434e-06, |
| "loss": 2.0145, |
| "step": 26140 |
| }, |
| { |
| "epoch": 2.9283314669652856, |
| "grad_norm": 5.632237434387207, |
| "learning_rate": 1.1963419186263532e-06, |
| "loss": 1.9828, |
| "step": 26150 |
| }, |
| { |
| "epoch": 2.929451287793953, |
| "grad_norm": 7.68386697769165, |
| "learning_rate": 1.177678238148563e-06, |
| "loss": 2.4198, |
| "step": 26160 |
| }, |
| { |
| "epoch": 2.9305711086226203, |
| "grad_norm": 5.8563151359558105, |
| "learning_rate": 1.1590145576707726e-06, |
| "loss": 1.6861, |
| "step": 26170 |
| }, |
| { |
| "epoch": 2.931690929451288, |
| "grad_norm": 20.610515594482422, |
| "learning_rate": 1.1403508771929826e-06, |
| "loss": 2.0284, |
| "step": 26180 |
| }, |
| { |
| "epoch": 2.9328107502799554, |
| "grad_norm": 5.223932266235352, |
| "learning_rate": 1.1216871967151924e-06, |
| "loss": 2.0917, |
| "step": 26190 |
| }, |
| { |
| "epoch": 2.9339305711086228, |
| "grad_norm": 5.224584579467773, |
| "learning_rate": 1.103023516237402e-06, |
| "loss": 1.6706, |
| "step": 26200 |
| }, |
| { |
| "epoch": 2.93505039193729, |
| "grad_norm": 6.829593181610107, |
| "learning_rate": 1.084359835759612e-06, |
| "loss": 2.0043, |
| "step": 26210 |
| }, |
| { |
| "epoch": 2.9361702127659575, |
| "grad_norm": 5.992856979370117, |
| "learning_rate": 1.0656961552818216e-06, |
| "loss": 1.4484, |
| "step": 26220 |
| }, |
| { |
| "epoch": 2.937290033594625, |
| "grad_norm": 9.6854829788208, |
| "learning_rate": 1.0470324748040314e-06, |
| "loss": 2.1745, |
| "step": 26230 |
| }, |
| { |
| "epoch": 2.938409854423292, |
| "grad_norm": 5.901656627655029, |
| "learning_rate": 1.0283687943262412e-06, |
| "loss": 2.2163, |
| "step": 26240 |
| }, |
| { |
| "epoch": 2.9395296752519595, |
| "grad_norm": 11.751072883605957, |
| "learning_rate": 1.009705113848451e-06, |
| "loss": 2.2047, |
| "step": 26250 |
| }, |
| { |
| "epoch": 2.940649496080627, |
| "grad_norm": 6.053452491760254, |
| "learning_rate": 9.910414333706607e-07, |
| "loss": 1.5099, |
| "step": 26260 |
| }, |
| { |
| "epoch": 2.9417693169092947, |
| "grad_norm": 9.445327758789062, |
| "learning_rate": 9.723777528928705e-07, |
| "loss": 2.0437, |
| "step": 26270 |
| }, |
| { |
| "epoch": 2.942889137737962, |
| "grad_norm": 12.053526878356934, |
| "learning_rate": 9.537140724150803e-07, |
| "loss": 2.1465, |
| "step": 26280 |
| }, |
| { |
| "epoch": 2.9440089585666294, |
| "grad_norm": 10.680662155151367, |
| "learning_rate": 9.350503919372901e-07, |
| "loss": 1.8593, |
| "step": 26290 |
| }, |
| { |
| "epoch": 2.9451287793952967, |
| "grad_norm": 13.565116882324219, |
| "learning_rate": 9.163867114594998e-07, |
| "loss": 2.0493, |
| "step": 26300 |
| }, |
| { |
| "epoch": 2.946248600223964, |
| "grad_norm": 6.383924961090088, |
| "learning_rate": 8.977230309817097e-07, |
| "loss": 2.2825, |
| "step": 26310 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 19.03361701965332, |
| "learning_rate": 8.790593505039194e-07, |
| "loss": 1.7299, |
| "step": 26320 |
| }, |
| { |
| "epoch": 2.948488241881299, |
| "grad_norm": 10.854887962341309, |
| "learning_rate": 8.603956700261292e-07, |
| "loss": 1.7987, |
| "step": 26330 |
| }, |
| { |
| "epoch": 2.9496080627099666, |
| "grad_norm": 11.263670921325684, |
| "learning_rate": 8.417319895483391e-07, |
| "loss": 1.9531, |
| "step": 26340 |
| }, |
| { |
| "epoch": 2.950727883538634, |
| "grad_norm": 6.675166606903076, |
| "learning_rate": 8.230683090705488e-07, |
| "loss": 2.1583, |
| "step": 26350 |
| }, |
| { |
| "epoch": 2.9518477043673013, |
| "grad_norm": 10.624512672424316, |
| "learning_rate": 8.044046285927585e-07, |
| "loss": 1.9506, |
| "step": 26360 |
| }, |
| { |
| "epoch": 2.9529675251959686, |
| "grad_norm": 8.63731575012207, |
| "learning_rate": 7.857409481149684e-07, |
| "loss": 1.802, |
| "step": 26370 |
| }, |
| { |
| "epoch": 2.954087346024636, |
| "grad_norm": 8.122550964355469, |
| "learning_rate": 7.670772676371781e-07, |
| "loss": 1.5223, |
| "step": 26380 |
| }, |
| { |
| "epoch": 2.9552071668533033, |
| "grad_norm": 16.51909065246582, |
| "learning_rate": 7.484135871593878e-07, |
| "loss": 1.8571, |
| "step": 26390 |
| }, |
| { |
| "epoch": 2.9563269876819707, |
| "grad_norm": 7.450239658355713, |
| "learning_rate": 7.297499066815976e-07, |
| "loss": 1.5009, |
| "step": 26400 |
| }, |
| { |
| "epoch": 2.9574468085106385, |
| "grad_norm": 14.5098237991333, |
| "learning_rate": 7.110862262038074e-07, |
| "loss": 1.9467, |
| "step": 26410 |
| }, |
| { |
| "epoch": 2.958566629339306, |
| "grad_norm": 6.5906782150268555, |
| "learning_rate": 6.924225457260172e-07, |
| "loss": 1.8331, |
| "step": 26420 |
| }, |
| { |
| "epoch": 2.959686450167973, |
| "grad_norm": 5.737934112548828, |
| "learning_rate": 6.73758865248227e-07, |
| "loss": 2.0944, |
| "step": 26430 |
| }, |
| { |
| "epoch": 2.9608062709966405, |
| "grad_norm": 11.81939697265625, |
| "learning_rate": 6.550951847704367e-07, |
| "loss": 1.6853, |
| "step": 26440 |
| }, |
| { |
| "epoch": 2.961926091825308, |
| "grad_norm": 7.474795341491699, |
| "learning_rate": 6.364315042926465e-07, |
| "loss": 1.4531, |
| "step": 26450 |
| }, |
| { |
| "epoch": 2.963045912653975, |
| "grad_norm": 10.875497817993164, |
| "learning_rate": 6.177678238148564e-07, |
| "loss": 2.3634, |
| "step": 26460 |
| }, |
| { |
| "epoch": 2.964165733482643, |
| "grad_norm": 7.887001991271973, |
| "learning_rate": 5.991041433370661e-07, |
| "loss": 1.9128, |
| "step": 26470 |
| }, |
| { |
| "epoch": 2.9652855543113104, |
| "grad_norm": 6.30941915512085, |
| "learning_rate": 5.804404628592759e-07, |
| "loss": 2.0812, |
| "step": 26480 |
| }, |
| { |
| "epoch": 2.9664053751399777, |
| "grad_norm": 6.238934516906738, |
| "learning_rate": 5.617767823814857e-07, |
| "loss": 1.7906, |
| "step": 26490 |
| }, |
| { |
| "epoch": 2.967525195968645, |
| "grad_norm": 7.524406909942627, |
| "learning_rate": 5.431131019036955e-07, |
| "loss": 1.9674, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.9686450167973124, |
| "grad_norm": 7.257613658905029, |
| "learning_rate": 5.244494214259052e-07, |
| "loss": 1.9621, |
| "step": 26510 |
| }, |
| { |
| "epoch": 2.9697648376259798, |
| "grad_norm": 16.3148136138916, |
| "learning_rate": 5.057857409481149e-07, |
| "loss": 1.6907, |
| "step": 26520 |
| }, |
| { |
| "epoch": 2.970884658454647, |
| "grad_norm": 5.2545952796936035, |
| "learning_rate": 4.871220604703247e-07, |
| "loss": 1.7305, |
| "step": 26530 |
| }, |
| { |
| "epoch": 2.9720044792833145, |
| "grad_norm": 16.13141632080078, |
| "learning_rate": 4.6845837999253457e-07, |
| "loss": 1.9891, |
| "step": 26540 |
| }, |
| { |
| "epoch": 2.973124300111982, |
| "grad_norm": 9.418984413146973, |
| "learning_rate": 4.497946995147443e-07, |
| "loss": 1.7309, |
| "step": 26550 |
| }, |
| { |
| "epoch": 2.9742441209406496, |
| "grad_norm": 3.6016457080841064, |
| "learning_rate": 4.311310190369541e-07, |
| "loss": 1.6292, |
| "step": 26560 |
| }, |
| { |
| "epoch": 2.975363941769317, |
| "grad_norm": 7.472093105316162, |
| "learning_rate": 4.1246733855916395e-07, |
| "loss": 1.8515, |
| "step": 26570 |
| }, |
| { |
| "epoch": 2.9764837625979843, |
| "grad_norm": 6.936214923858643, |
| "learning_rate": 3.9380365808137364e-07, |
| "loss": 1.7409, |
| "step": 26580 |
| }, |
| { |
| "epoch": 2.9776035834266517, |
| "grad_norm": 6.333293437957764, |
| "learning_rate": 3.751399776035835e-07, |
| "loss": 1.8553, |
| "step": 26590 |
| }, |
| { |
| "epoch": 2.978723404255319, |
| "grad_norm": 5.685304164886475, |
| "learning_rate": 3.564762971257932e-07, |
| "loss": 1.7563, |
| "step": 26600 |
| }, |
| { |
| "epoch": 2.979843225083987, |
| "grad_norm": 7.443325996398926, |
| "learning_rate": 3.37812616648003e-07, |
| "loss": 2.0484, |
| "step": 26610 |
| }, |
| { |
| "epoch": 2.980963045912654, |
| "grad_norm": 6.894618034362793, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 1.6829, |
| "step": 26620 |
| }, |
| { |
| "epoch": 2.9820828667413215, |
| "grad_norm": 18.16074562072754, |
| "learning_rate": 3.004852556924226e-07, |
| "loss": 2.1276, |
| "step": 26630 |
| }, |
| { |
| "epoch": 2.983202687569989, |
| "grad_norm": 13.232165336608887, |
| "learning_rate": 2.8182157521463234e-07, |
| "loss": 1.8867, |
| "step": 26640 |
| }, |
| { |
| "epoch": 2.984322508398656, |
| "grad_norm": 3.8035480976104736, |
| "learning_rate": 2.6315789473684213e-07, |
| "loss": 1.9104, |
| "step": 26650 |
| }, |
| { |
| "epoch": 2.9854423292273236, |
| "grad_norm": 6.058941841125488, |
| "learning_rate": 2.4449421425905187e-07, |
| "loss": 1.6504, |
| "step": 26660 |
| }, |
| { |
| "epoch": 2.986562150055991, |
| "grad_norm": 8.38434886932373, |
| "learning_rate": 2.258305337812617e-07, |
| "loss": 1.4973, |
| "step": 26670 |
| }, |
| { |
| "epoch": 2.9876819708846583, |
| "grad_norm": 16.152746200561523, |
| "learning_rate": 2.0716685330347146e-07, |
| "loss": 1.6986, |
| "step": 26680 |
| }, |
| { |
| "epoch": 2.9888017917133256, |
| "grad_norm": 5.802217960357666, |
| "learning_rate": 1.8850317282568122e-07, |
| "loss": 1.6346, |
| "step": 26690 |
| }, |
| { |
| "epoch": 2.9899216125419934, |
| "grad_norm": 18.607810974121094, |
| "learning_rate": 1.6983949234789102e-07, |
| "loss": 1.9916, |
| "step": 26700 |
| }, |
| { |
| "epoch": 2.9910414333706608, |
| "grad_norm": 5.028476715087891, |
| "learning_rate": 1.5117581187010078e-07, |
| "loss": 1.7811, |
| "step": 26710 |
| }, |
| { |
| "epoch": 2.992161254199328, |
| "grad_norm": 6.0024824142456055, |
| "learning_rate": 1.3251213139231058e-07, |
| "loss": 2.2338, |
| "step": 26720 |
| }, |
| { |
| "epoch": 2.9932810750279955, |
| "grad_norm": 5.444699287414551, |
| "learning_rate": 1.1384845091452034e-07, |
| "loss": 2.1604, |
| "step": 26730 |
| }, |
| { |
| "epoch": 2.994400895856663, |
| "grad_norm": 12.872075080871582, |
| "learning_rate": 9.518477043673014e-08, |
| "loss": 1.2952, |
| "step": 26740 |
| }, |
| { |
| "epoch": 2.9955207166853306, |
| "grad_norm": 10.56808853149414, |
| "learning_rate": 7.652108995893992e-08, |
| "loss": 1.8126, |
| "step": 26750 |
| }, |
| { |
| "epoch": 2.996640537513998, |
| "grad_norm": 5.550817966461182, |
| "learning_rate": 5.785740948114969e-08, |
| "loss": 2.0958, |
| "step": 26760 |
| }, |
| { |
| "epoch": 2.9977603583426653, |
| "grad_norm": 6.287519454956055, |
| "learning_rate": 3.919372900335946e-08, |
| "loss": 1.8855, |
| "step": 26770 |
| }, |
| { |
| "epoch": 2.9988801791713326, |
| "grad_norm": 8.283456802368164, |
| "learning_rate": 2.0530048525569244e-08, |
| "loss": 1.916, |
| "step": 26780 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 7.1067328453063965, |
| "learning_rate": 1.866368047779022e-09, |
| "loss": 1.6709, |
| "step": 26790 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 26790, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.457535008768e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|