diff --git "a/qwen3-vl-4b-agentnet-bsz384-step3922/trainer_state.json" "b/qwen3-vl-4b-agentnet-bsz384-step3922/trainer_state.json" new file mode 100644--- /dev/null +++ "b/qwen3-vl-4b-agentnet-bsz384-step3922/trainer_state.json" @@ -0,0 +1,27488 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 10000.0, + "global_step": 3922, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00025497195308516065, + "grad_norm": 14.721250534057617, + "learning_rate": 1.0152284263959391e-07, + "loss": 1.1753778457641602, + "step": 1 + }, + { + "epoch": 0.0005099439061703213, + "grad_norm": 14.536355018615723, + "learning_rate": 2.0304568527918783e-07, + "loss": 1.176823377609253, + "step": 2 + }, + { + "epoch": 0.0007649158592554819, + "grad_norm": 14.321669578552246, + "learning_rate": 3.0456852791878176e-07, + "loss": 1.1615561246871948, + "step": 3 + }, + { + "epoch": 0.0010198878123406426, + "grad_norm": 14.656594276428223, + "learning_rate": 4.0609137055837566e-07, + "loss": 1.1877045631408691, + "step": 4 + }, + { + "epoch": 0.0012748597654258032, + "grad_norm": 14.882043838500977, + "learning_rate": 5.076142131979696e-07, + "loss": 1.1730620861053467, + "step": 5 + }, + { + "epoch": 0.0015298317185109638, + "grad_norm": 14.535433769226074, + "learning_rate": 6.091370558375635e-07, + "loss": 1.1784110069274902, + "step": 6 + }, + { + "epoch": 0.0017848036715961244, + "grad_norm": 14.390504837036133, + "learning_rate": 7.106598984771574e-07, + "loss": 1.169837474822998, + "step": 7 + }, + { + "epoch": 0.002039775624681285, + "grad_norm": 14.107206344604492, + "learning_rate": 8.121827411167513e-07, + "loss": 1.1590073108673096, + "step": 8 + }, + { + "epoch": 0.0022947475777664456, + "grad_norm": 14.10217571258545, + "learning_rate": 9.137055837563452e-07, + "loss": 1.1481646299362183, + "step": 9 + }, + { + "epoch": 0.0025497195308516064, + "grad_norm": 13.8338623046875, + "learning_rate": 1.0152284263959392e-06, + "loss": 1.1406148672103882, + "step": 10 + }, + { + "epoch": 0.0028046914839367667, + "grad_norm": 12.136486053466797, + "learning_rate": 1.116751269035533e-06, + "loss": 1.0856387615203857, + "step": 11 + }, + { + "epoch": 0.0030596634370219276, + "grad_norm": 11.842436790466309, + "learning_rate": 1.218274111675127e-06, + "loss": 1.0627188682556152, + "step": 12 + }, + { + "epoch": 0.0033146353901070884, + "grad_norm": 11.395700454711914, + "learning_rate": 1.319796954314721e-06, + "loss": 1.0452030897140503, + "step": 13 + }, + { + "epoch": 0.0035696073431922487, + "grad_norm": 7.274648666381836, + "learning_rate": 1.4213197969543148e-06, + "loss": 0.9214948415756226, + "step": 14 + }, + { + "epoch": 0.0038245792962774095, + "grad_norm": 6.850907325744629, + "learning_rate": 1.5228426395939088e-06, + "loss": 0.8989903330802917, + "step": 15 + }, + { + "epoch": 0.00407955124936257, + "grad_norm": 6.552920818328857, + "learning_rate": 1.6243654822335026e-06, + "loss": 0.8858917951583862, + "step": 16 + }, + { + "epoch": 0.004334523202447731, + "grad_norm": 5.687392711639404, + "learning_rate": 1.7258883248730964e-06, + "loss": 0.8578486442565918, + "step": 17 + }, + { + "epoch": 0.004589495155532891, + "grad_norm": 5.373569965362549, + "learning_rate": 1.8274111675126904e-06, + "loss": 0.8581478595733643, + "step": 18 + }, + { + "epoch": 0.004844467108618052, + "grad_norm": 2.3742711544036865, + "learning_rate": 1.9289340101522844e-06, + "loss": 0.7883254289627075, + "step": 19 + }, + { + "epoch": 0.005099439061703213, + "grad_norm": 1.9187192916870117, + "learning_rate": 2.0304568527918785e-06, + "loss": 0.7528367042541504, + "step": 20 + }, + { + "epoch": 0.0053544110147883735, + "grad_norm": 1.869828701019287, + "learning_rate": 2.1319796954314725e-06, + "loss": 0.7535017728805542, + "step": 21 + }, + { + "epoch": 0.0056093829678735335, + "grad_norm": 1.7534024715423584, + "learning_rate": 2.233502538071066e-06, + "loss": 0.7501562833786011, + "step": 22 + }, + { + "epoch": 0.005864354920958694, + "grad_norm": 1.589906096458435, + "learning_rate": 2.33502538071066e-06, + "loss": 0.7379674911499023, + "step": 23 + }, + { + "epoch": 0.006119326874043855, + "grad_norm": 1.4798340797424316, + "learning_rate": 2.436548223350254e-06, + "loss": 0.7441307902336121, + "step": 24 + }, + { + "epoch": 0.006374298827129016, + "grad_norm": 1.3907454013824463, + "learning_rate": 2.5380710659898476e-06, + "loss": 0.7364725470542908, + "step": 25 + }, + { + "epoch": 0.006629270780214177, + "grad_norm": 1.217887043952942, + "learning_rate": 2.639593908629442e-06, + "loss": 0.7361563444137573, + "step": 26 + }, + { + "epoch": 0.006884242733299337, + "grad_norm": 1.6351842880249023, + "learning_rate": 2.7411167512690357e-06, + "loss": 0.7294541597366333, + "step": 27 + }, + { + "epoch": 0.0071392146863844975, + "grad_norm": 1.6783027648925781, + "learning_rate": 2.8426395939086297e-06, + "loss": 0.7308826446533203, + "step": 28 + }, + { + "epoch": 0.007394186639469658, + "grad_norm": 1.4227818250656128, + "learning_rate": 2.9441624365482237e-06, + "loss": 0.7099924087524414, + "step": 29 + }, + { + "epoch": 0.007649158592554819, + "grad_norm": 1.152917742729187, + "learning_rate": 3.0456852791878177e-06, + "loss": 0.705260157585144, + "step": 30 + }, + { + "epoch": 0.00790413054563998, + "grad_norm": 0.9976029992103577, + "learning_rate": 3.1472081218274113e-06, + "loss": 0.6943796873092651, + "step": 31 + }, + { + "epoch": 0.00815910249872514, + "grad_norm": 0.9264460206031799, + "learning_rate": 3.2487309644670053e-06, + "loss": 0.6871784925460815, + "step": 32 + }, + { + "epoch": 0.008414074451810302, + "grad_norm": 0.8817992806434631, + "learning_rate": 3.3502538071065993e-06, + "loss": 0.7034109830856323, + "step": 33 + }, + { + "epoch": 0.008669046404895462, + "grad_norm": 0.8824509382247925, + "learning_rate": 3.451776649746193e-06, + "loss": 0.6865764856338501, + "step": 34 + }, + { + "epoch": 0.008924018357980621, + "grad_norm": 0.8971969485282898, + "learning_rate": 3.5532994923857873e-06, + "loss": 0.6814626455307007, + "step": 35 + }, + { + "epoch": 0.009178990311065782, + "grad_norm": 0.8440399169921875, + "learning_rate": 3.654822335025381e-06, + "loss": 0.6798086166381836, + "step": 36 + }, + { + "epoch": 0.009433962264150943, + "grad_norm": 0.7719917297363281, + "learning_rate": 3.756345177664975e-06, + "loss": 0.670407235622406, + "step": 37 + }, + { + "epoch": 0.009688934217236104, + "grad_norm": 0.7223917841911316, + "learning_rate": 3.857868020304569e-06, + "loss": 0.6843222379684448, + "step": 38 + }, + { + "epoch": 0.009943906170321265, + "grad_norm": 0.8157918453216553, + "learning_rate": 3.959390862944163e-06, + "loss": 0.6840213537216187, + "step": 39 + }, + { + "epoch": 0.010198878123406425, + "grad_norm": 0.8798688650131226, + "learning_rate": 4.060913705583757e-06, + "loss": 0.6696931719779968, + "step": 40 + }, + { + "epoch": 0.010453850076491586, + "grad_norm": 0.9106945991516113, + "learning_rate": 4.162436548223351e-06, + "loss": 0.6799577474594116, + "step": 41 + }, + { + "epoch": 0.010708822029576747, + "grad_norm": 0.7262415289878845, + "learning_rate": 4.263959390862945e-06, + "loss": 0.6715244650840759, + "step": 42 + }, + { + "epoch": 0.010963793982661908, + "grad_norm": 0.7152193784713745, + "learning_rate": 4.365482233502538e-06, + "loss": 0.6724027395248413, + "step": 43 + }, + { + "epoch": 0.011218765935747067, + "grad_norm": 0.7244555950164795, + "learning_rate": 4.467005076142132e-06, + "loss": 0.6676362752914429, + "step": 44 + }, + { + "epoch": 0.011473737888832228, + "grad_norm": 0.7677158117294312, + "learning_rate": 4.568527918781726e-06, + "loss": 0.6664789319038391, + "step": 45 + }, + { + "epoch": 0.011728709841917389, + "grad_norm": 0.7524646520614624, + "learning_rate": 4.67005076142132e-06, + "loss": 0.6637834310531616, + "step": 46 + }, + { + "epoch": 0.01198368179500255, + "grad_norm": 0.7234571576118469, + "learning_rate": 4.771573604060914e-06, + "loss": 0.6616507768630981, + "step": 47 + }, + { + "epoch": 0.01223865374808771, + "grad_norm": 0.637627363204956, + "learning_rate": 4.873096446700508e-06, + "loss": 0.6517107486724854, + "step": 48 + }, + { + "epoch": 0.012493625701172871, + "grad_norm": 0.6411532759666443, + "learning_rate": 4.974619289340102e-06, + "loss": 0.6510401368141174, + "step": 49 + }, + { + "epoch": 0.012748597654258032, + "grad_norm": 0.6801655292510986, + "learning_rate": 5.076142131979695e-06, + "loss": 0.6529415845870972, + "step": 50 + }, + { + "epoch": 0.013003569607343193, + "grad_norm": 0.6227138042449951, + "learning_rate": 5.17766497461929e-06, + "loss": 0.6479157209396362, + "step": 51 + }, + { + "epoch": 0.013258541560428353, + "grad_norm": 0.6181749105453491, + "learning_rate": 5.279187817258884e-06, + "loss": 0.655814528465271, + "step": 52 + }, + { + "epoch": 0.013513513513513514, + "grad_norm": 0.6531626582145691, + "learning_rate": 5.380710659898477e-06, + "loss": 0.648429811000824, + "step": 53 + }, + { + "epoch": 0.013768485466598673, + "grad_norm": 0.6198768019676208, + "learning_rate": 5.482233502538071e-06, + "loss": 0.6356717348098755, + "step": 54 + }, + { + "epoch": 0.014023457419683834, + "grad_norm": 0.5874183177947998, + "learning_rate": 5.583756345177665e-06, + "loss": 0.6472568511962891, + "step": 55 + }, + { + "epoch": 0.014278429372768995, + "grad_norm": 0.5928446054458618, + "learning_rate": 5.685279187817259e-06, + "loss": 0.6416783332824707, + "step": 56 + }, + { + "epoch": 0.014533401325854156, + "grad_norm": 0.6353596448898315, + "learning_rate": 5.7868020304568525e-06, + "loss": 0.6454662084579468, + "step": 57 + }, + { + "epoch": 0.014788373278939317, + "grad_norm": 0.6318172216415405, + "learning_rate": 5.888324873096447e-06, + "loss": 0.6328722238540649, + "step": 58 + }, + { + "epoch": 0.015043345232024477, + "grad_norm": 0.6352128982543945, + "learning_rate": 5.989847715736041e-06, + "loss": 0.6377559900283813, + "step": 59 + }, + { + "epoch": 0.015298317185109638, + "grad_norm": 0.5992292761802673, + "learning_rate": 6.091370558375635e-06, + "loss": 0.6464930772781372, + "step": 60 + }, + { + "epoch": 0.015553289138194799, + "grad_norm": 0.6220185160636902, + "learning_rate": 6.1928934010152285e-06, + "loss": 0.6422460079193115, + "step": 61 + }, + { + "epoch": 0.01580826109127996, + "grad_norm": 0.5953255891799927, + "learning_rate": 6.2944162436548225e-06, + "loss": 0.6442855596542358, + "step": 62 + }, + { + "epoch": 0.01606323304436512, + "grad_norm": 0.6460153460502625, + "learning_rate": 6.395939086294417e-06, + "loss": 0.6416395902633667, + "step": 63 + }, + { + "epoch": 0.01631820499745028, + "grad_norm": 0.6249935626983643, + "learning_rate": 6.4974619289340105e-06, + "loss": 0.6346004605293274, + "step": 64 + }, + { + "epoch": 0.01657317695053544, + "grad_norm": 0.7142345309257507, + "learning_rate": 6.5989847715736045e-06, + "loss": 0.6336624622344971, + "step": 65 + }, + { + "epoch": 0.016828148903620603, + "grad_norm": 0.6218382716178894, + "learning_rate": 6.7005076142131985e-06, + "loss": 0.6198103427886963, + "step": 66 + }, + { + "epoch": 0.017083120856705762, + "grad_norm": 0.6140885353088379, + "learning_rate": 6.8020304568527926e-06, + "loss": 0.6337085962295532, + "step": 67 + }, + { + "epoch": 0.017338092809790925, + "grad_norm": 0.6162137985229492, + "learning_rate": 6.903553299492386e-06, + "loss": 0.6449379920959473, + "step": 68 + }, + { + "epoch": 0.017593064762876084, + "grad_norm": 0.6531879901885986, + "learning_rate": 7.0050761421319806e-06, + "loss": 0.6236452460289001, + "step": 69 + }, + { + "epoch": 0.017848036715961243, + "grad_norm": 0.6065159440040588, + "learning_rate": 7.106598984771575e-06, + "loss": 0.6327449679374695, + "step": 70 + }, + { + "epoch": 0.018103008669046405, + "grad_norm": 0.6237702965736389, + "learning_rate": 7.208121827411169e-06, + "loss": 0.620902955532074, + "step": 71 + }, + { + "epoch": 0.018357980622131564, + "grad_norm": 0.6284878849983215, + "learning_rate": 7.309644670050762e-06, + "loss": 0.6234478950500488, + "step": 72 + }, + { + "epoch": 0.018612952575216727, + "grad_norm": 0.6469556093215942, + "learning_rate": 7.411167512690356e-06, + "loss": 0.6349846124649048, + "step": 73 + }, + { + "epoch": 0.018867924528301886, + "grad_norm": 0.6482848525047302, + "learning_rate": 7.51269035532995e-06, + "loss": 0.633674144744873, + "step": 74 + }, + { + "epoch": 0.01912289648138705, + "grad_norm": 0.6971201300621033, + "learning_rate": 7.614213197969543e-06, + "loss": 0.6282392740249634, + "step": 75 + }, + { + "epoch": 0.019377868434472208, + "grad_norm": 0.6171407699584961, + "learning_rate": 7.715736040609138e-06, + "loss": 0.614886462688446, + "step": 76 + }, + { + "epoch": 0.01963284038755737, + "grad_norm": 0.6748046278953552, + "learning_rate": 7.817258883248731e-06, + "loss": 0.6317158937454224, + "step": 77 + }, + { + "epoch": 0.01988781234064253, + "grad_norm": 0.6388630270957947, + "learning_rate": 7.918781725888326e-06, + "loss": 0.6277610063552856, + "step": 78 + }, + { + "epoch": 0.02014278429372769, + "grad_norm": 0.6394343972206116, + "learning_rate": 8.020304568527919e-06, + "loss": 0.6236151456832886, + "step": 79 + }, + { + "epoch": 0.02039775624681285, + "grad_norm": 0.6329667568206787, + "learning_rate": 8.121827411167514e-06, + "loss": 0.6225125789642334, + "step": 80 + }, + { + "epoch": 0.02065272819989801, + "grad_norm": 0.6687252521514893, + "learning_rate": 8.223350253807107e-06, + "loss": 0.6116753816604614, + "step": 81 + }, + { + "epoch": 0.020907700152983173, + "grad_norm": 0.646135151386261, + "learning_rate": 8.324873096446702e-06, + "loss": 0.6251896023750305, + "step": 82 + }, + { + "epoch": 0.02116267210606833, + "grad_norm": 0.6251548528671265, + "learning_rate": 8.426395939086295e-06, + "loss": 0.6129327416419983, + "step": 83 + }, + { + "epoch": 0.021417644059153494, + "grad_norm": 0.6150680780410767, + "learning_rate": 8.52791878172589e-06, + "loss": 0.6178444623947144, + "step": 84 + }, + { + "epoch": 0.021672616012238653, + "grad_norm": 0.645462691783905, + "learning_rate": 8.629441624365483e-06, + "loss": 0.6154327392578125, + "step": 85 + }, + { + "epoch": 0.021927587965323816, + "grad_norm": 0.6240130662918091, + "learning_rate": 8.730964467005076e-06, + "loss": 0.6073774695396423, + "step": 86 + }, + { + "epoch": 0.022182559918408975, + "grad_norm": 0.6194576621055603, + "learning_rate": 8.832487309644671e-06, + "loss": 0.6132628321647644, + "step": 87 + }, + { + "epoch": 0.022437531871494134, + "grad_norm": 0.6547966599464417, + "learning_rate": 8.934010152284264e-06, + "loss": 0.6120939254760742, + "step": 88 + }, + { + "epoch": 0.022692503824579296, + "grad_norm": 0.6388934850692749, + "learning_rate": 9.035532994923859e-06, + "loss": 0.6320643424987793, + "step": 89 + }, + { + "epoch": 0.022947475777664456, + "grad_norm": 0.6568025350570679, + "learning_rate": 9.137055837563452e-06, + "loss": 0.6370781064033508, + "step": 90 + }, + { + "epoch": 0.023202447730749618, + "grad_norm": 0.6334405541419983, + "learning_rate": 9.238578680203047e-06, + "loss": 0.6066848039627075, + "step": 91 + }, + { + "epoch": 0.023457419683834777, + "grad_norm": 0.6498663425445557, + "learning_rate": 9.34010152284264e-06, + "loss": 0.6138375401496887, + "step": 92 + }, + { + "epoch": 0.02371239163691994, + "grad_norm": 0.634401261806488, + "learning_rate": 9.441624365482235e-06, + "loss": 0.611154317855835, + "step": 93 + }, + { + "epoch": 0.0239673635900051, + "grad_norm": 0.6656029224395752, + "learning_rate": 9.543147208121828e-06, + "loss": 0.6110818386077881, + "step": 94 + }, + { + "epoch": 0.02422233554309026, + "grad_norm": 0.652365505695343, + "learning_rate": 9.644670050761421e-06, + "loss": 0.6210362911224365, + "step": 95 + }, + { + "epoch": 0.02447730749617542, + "grad_norm": 0.6741998791694641, + "learning_rate": 9.746192893401016e-06, + "loss": 0.6129822134971619, + "step": 96 + }, + { + "epoch": 0.024732279449260583, + "grad_norm": 0.6117521524429321, + "learning_rate": 9.84771573604061e-06, + "loss": 0.6152582168579102, + "step": 97 + }, + { + "epoch": 0.024987251402345742, + "grad_norm": 0.6950223445892334, + "learning_rate": 9.949238578680204e-06, + "loss": 0.6205112934112549, + "step": 98 + }, + { + "epoch": 0.0252422233554309, + "grad_norm": 0.6633530259132385, + "learning_rate": 1.0050761421319797e-05, + "loss": 0.6119738221168518, + "step": 99 + }, + { + "epoch": 0.025497195308516064, + "grad_norm": 0.668982744216919, + "learning_rate": 1.015228426395939e-05, + "loss": 0.6128796935081482, + "step": 100 + }, + { + "epoch": 0.025752167261601223, + "grad_norm": 0.754180371761322, + "learning_rate": 1.0253807106598985e-05, + "loss": 0.6129236221313477, + "step": 101 + }, + { + "epoch": 0.026007139214686385, + "grad_norm": 0.6716321706771851, + "learning_rate": 1.035532994923858e-05, + "loss": 0.6249738931655884, + "step": 102 + }, + { + "epoch": 0.026262111167771544, + "grad_norm": 0.6696272492408752, + "learning_rate": 1.0456852791878173e-05, + "loss": 0.6208000779151917, + "step": 103 + }, + { + "epoch": 0.026517083120856707, + "grad_norm": 0.7047821283340454, + "learning_rate": 1.0558375634517768e-05, + "loss": 0.6148910522460938, + "step": 104 + }, + { + "epoch": 0.026772055073941866, + "grad_norm": 0.715707004070282, + "learning_rate": 1.0659898477157361e-05, + "loss": 0.6176806688308716, + "step": 105 + }, + { + "epoch": 0.02702702702702703, + "grad_norm": 0.674106776714325, + "learning_rate": 1.0761421319796955e-05, + "loss": 0.6050000786781311, + "step": 106 + }, + { + "epoch": 0.027281998980112188, + "grad_norm": 0.7002929449081421, + "learning_rate": 1.0862944162436548e-05, + "loss": 0.5905170440673828, + "step": 107 + }, + { + "epoch": 0.027536970933197347, + "grad_norm": 0.7115480899810791, + "learning_rate": 1.0964467005076143e-05, + "loss": 0.6017693281173706, + "step": 108 + }, + { + "epoch": 0.02779194288628251, + "grad_norm": 0.6301277279853821, + "learning_rate": 1.1065989847715737e-05, + "loss": 0.5972022414207458, + "step": 109 + }, + { + "epoch": 0.02804691483936767, + "grad_norm": 0.6617989540100098, + "learning_rate": 1.116751269035533e-05, + "loss": 0.6111524105072021, + "step": 110 + }, + { + "epoch": 0.02830188679245283, + "grad_norm": 0.6753233075141907, + "learning_rate": 1.1269035532994925e-05, + "loss": 0.6101921796798706, + "step": 111 + }, + { + "epoch": 0.02855685874553799, + "grad_norm": 0.6855331063270569, + "learning_rate": 1.1370558375634519e-05, + "loss": 0.6058908700942993, + "step": 112 + }, + { + "epoch": 0.028811830698623152, + "grad_norm": 0.6627272367477417, + "learning_rate": 1.1472081218274113e-05, + "loss": 0.61765456199646, + "step": 113 + }, + { + "epoch": 0.02906680265170831, + "grad_norm": 0.7235305309295654, + "learning_rate": 1.1573604060913705e-05, + "loss": 0.6114434599876404, + "step": 114 + }, + { + "epoch": 0.029321774604793474, + "grad_norm": 0.6944801211357117, + "learning_rate": 1.16751269035533e-05, + "loss": 0.5895316004753113, + "step": 115 + }, + { + "epoch": 0.029576746557878633, + "grad_norm": 0.6143687963485718, + "learning_rate": 1.1776649746192895e-05, + "loss": 0.6181043982505798, + "step": 116 + }, + { + "epoch": 0.029831718510963796, + "grad_norm": 0.702218770980835, + "learning_rate": 1.1878172588832488e-05, + "loss": 0.6035462021827698, + "step": 117 + }, + { + "epoch": 0.030086690464048955, + "grad_norm": 0.649609386920929, + "learning_rate": 1.1979695431472083e-05, + "loss": 0.6008034348487854, + "step": 118 + }, + { + "epoch": 0.030341662417134114, + "grad_norm": 0.7478456497192383, + "learning_rate": 1.2081218274111678e-05, + "loss": 0.6118278503417969, + "step": 119 + }, + { + "epoch": 0.030596634370219276, + "grad_norm": 0.6522828936576843, + "learning_rate": 1.218274111675127e-05, + "loss": 0.6109780669212341, + "step": 120 + }, + { + "epoch": 0.030851606323304435, + "grad_norm": 0.6617293953895569, + "learning_rate": 1.2284263959390864e-05, + "loss": 0.6059006452560425, + "step": 121 + }, + { + "epoch": 0.031106578276389598, + "grad_norm": 0.7125791311264038, + "learning_rate": 1.2385786802030457e-05, + "loss": 0.5911107063293457, + "step": 122 + }, + { + "epoch": 0.03136155022947476, + "grad_norm": 0.7545065879821777, + "learning_rate": 1.2487309644670052e-05, + "loss": 0.6011978983879089, + "step": 123 + }, + { + "epoch": 0.03161652218255992, + "grad_norm": 0.7025015950202942, + "learning_rate": 1.2588832487309645e-05, + "loss": 0.6012612581253052, + "step": 124 + }, + { + "epoch": 0.03187149413564508, + "grad_norm": 0.7487797737121582, + "learning_rate": 1.269035532994924e-05, + "loss": 0.5970184803009033, + "step": 125 + }, + { + "epoch": 0.03212646608873024, + "grad_norm": 0.6467530727386475, + "learning_rate": 1.2791878172588835e-05, + "loss": 0.5995411276817322, + "step": 126 + }, + { + "epoch": 0.0323814380418154, + "grad_norm": 0.6963929533958435, + "learning_rate": 1.2893401015228428e-05, + "loss": 0.6060649752616882, + "step": 127 + }, + { + "epoch": 0.03263640999490056, + "grad_norm": 0.7099882364273071, + "learning_rate": 1.2994923857868021e-05, + "loss": 0.6047376394271851, + "step": 128 + }, + { + "epoch": 0.03289138194798572, + "grad_norm": 0.6539604067802429, + "learning_rate": 1.3096446700507614e-05, + "loss": 0.586577832698822, + "step": 129 + }, + { + "epoch": 0.03314635390107088, + "grad_norm": 0.6586759686470032, + "learning_rate": 1.3197969543147209e-05, + "loss": 0.5893585681915283, + "step": 130 + }, + { + "epoch": 0.03340132585415604, + "grad_norm": 0.6708022356033325, + "learning_rate": 1.3299492385786802e-05, + "loss": 0.5953554511070251, + "step": 131 + }, + { + "epoch": 0.033656297807241206, + "grad_norm": 0.669730007648468, + "learning_rate": 1.3401015228426397e-05, + "loss": 0.6061270236968994, + "step": 132 + }, + { + "epoch": 0.033911269760326365, + "grad_norm": 0.7589663863182068, + "learning_rate": 1.3502538071065992e-05, + "loss": 0.6139975786209106, + "step": 133 + }, + { + "epoch": 0.034166241713411524, + "grad_norm": 0.7068342566490173, + "learning_rate": 1.3604060913705585e-05, + "loss": 0.5982658267021179, + "step": 134 + }, + { + "epoch": 0.03442121366649668, + "grad_norm": 0.7072777152061462, + "learning_rate": 1.3705583756345178e-05, + "loss": 0.5927161574363708, + "step": 135 + }, + { + "epoch": 0.03467618561958185, + "grad_norm": 0.8180981874465942, + "learning_rate": 1.3807106598984771e-05, + "loss": 0.6063701510429382, + "step": 136 + }, + { + "epoch": 0.03493115757266701, + "grad_norm": 0.601114809513092, + "learning_rate": 1.3908629441624366e-05, + "loss": 0.574968159198761, + "step": 137 + }, + { + "epoch": 0.03518612952575217, + "grad_norm": 0.7802360653877258, + "learning_rate": 1.4010152284263961e-05, + "loss": 0.5933843851089478, + "step": 138 + }, + { + "epoch": 0.03544110147883733, + "grad_norm": 0.7044690847396851, + "learning_rate": 1.4111675126903554e-05, + "loss": 0.5892487168312073, + "step": 139 + }, + { + "epoch": 0.035696073431922486, + "grad_norm": 0.7081224918365479, + "learning_rate": 1.421319796954315e-05, + "loss": 0.602599024772644, + "step": 140 + }, + { + "epoch": 0.03595104538500765, + "grad_norm": 0.6481499075889587, + "learning_rate": 1.4314720812182742e-05, + "loss": 0.6026634573936462, + "step": 141 + }, + { + "epoch": 0.03620601733809281, + "grad_norm": 0.7358949184417725, + "learning_rate": 1.4416243654822337e-05, + "loss": 0.609551191329956, + "step": 142 + }, + { + "epoch": 0.03646098929117797, + "grad_norm": 0.6168767809867859, + "learning_rate": 1.4517766497461929e-05, + "loss": 0.591394305229187, + "step": 143 + }, + { + "epoch": 0.03671596124426313, + "grad_norm": 0.6998642086982727, + "learning_rate": 1.4619289340101523e-05, + "loss": 0.606634795665741, + "step": 144 + }, + { + "epoch": 0.036970933197348295, + "grad_norm": 0.6324466466903687, + "learning_rate": 1.4720812182741118e-05, + "loss": 0.6037495136260986, + "step": 145 + }, + { + "epoch": 0.037225905150433454, + "grad_norm": 0.7579569220542908, + "learning_rate": 1.4822335025380712e-05, + "loss": 0.5949714779853821, + "step": 146 + }, + { + "epoch": 0.03748087710351861, + "grad_norm": 0.708786129951477, + "learning_rate": 1.4923857868020306e-05, + "loss": 0.5930920839309692, + "step": 147 + }, + { + "epoch": 0.03773584905660377, + "grad_norm": 0.6507843732833862, + "learning_rate": 1.50253807106599e-05, + "loss": 0.6192787885665894, + "step": 148 + }, + { + "epoch": 0.03799082100968893, + "grad_norm": 0.6853079795837402, + "learning_rate": 1.5126903553299494e-05, + "loss": 0.5902525782585144, + "step": 149 + }, + { + "epoch": 0.0382457929627741, + "grad_norm": 0.7084800601005554, + "learning_rate": 1.5228426395939086e-05, + "loss": 0.5898470878601074, + "step": 150 + }, + { + "epoch": 0.038500764915859256, + "grad_norm": 0.6946215033531189, + "learning_rate": 1.5329949238578682e-05, + "loss": 0.6009114384651184, + "step": 151 + }, + { + "epoch": 0.038755736868944415, + "grad_norm": 0.6725452542304993, + "learning_rate": 1.5431472081218276e-05, + "loss": 0.5899134874343872, + "step": 152 + }, + { + "epoch": 0.039010708822029574, + "grad_norm": 0.7391459345817566, + "learning_rate": 1.553299492385787e-05, + "loss": 0.6052982211112976, + "step": 153 + }, + { + "epoch": 0.03926568077511474, + "grad_norm": 0.653334379196167, + "learning_rate": 1.5634517766497462e-05, + "loss": 0.5881596803665161, + "step": 154 + }, + { + "epoch": 0.0395206527281999, + "grad_norm": 0.70075923204422, + "learning_rate": 1.573604060913706e-05, + "loss": 0.5922362208366394, + "step": 155 + }, + { + "epoch": 0.03977562468128506, + "grad_norm": 0.6499072313308716, + "learning_rate": 1.583756345177665e-05, + "loss": 0.6015427708625793, + "step": 156 + }, + { + "epoch": 0.04003059663437022, + "grad_norm": 0.6751073002815247, + "learning_rate": 1.5939086294416245e-05, + "loss": 0.5883609652519226, + "step": 157 + }, + { + "epoch": 0.04028556858745538, + "grad_norm": 0.6795080900192261, + "learning_rate": 1.6040609137055838e-05, + "loss": 0.6162182688713074, + "step": 158 + }, + { + "epoch": 0.04054054054054054, + "grad_norm": 0.7047825455665588, + "learning_rate": 1.614213197969543e-05, + "loss": 0.5969679355621338, + "step": 159 + }, + { + "epoch": 0.0407955124936257, + "grad_norm": 0.6710919737815857, + "learning_rate": 1.6243654822335028e-05, + "loss": 0.5879135131835938, + "step": 160 + }, + { + "epoch": 0.04105048444671086, + "grad_norm": 0.6726529002189636, + "learning_rate": 1.634517766497462e-05, + "loss": 0.5928366184234619, + "step": 161 + }, + { + "epoch": 0.04130545639979602, + "grad_norm": 0.6386929154396057, + "learning_rate": 1.6446700507614214e-05, + "loss": 0.6050669550895691, + "step": 162 + }, + { + "epoch": 0.041560428352881186, + "grad_norm": 0.7748963832855225, + "learning_rate": 1.654822335025381e-05, + "loss": 0.5833348035812378, + "step": 163 + }, + { + "epoch": 0.041815400305966345, + "grad_norm": 0.7063996195793152, + "learning_rate": 1.6649746192893404e-05, + "loss": 0.588036298751831, + "step": 164 + }, + { + "epoch": 0.042070372259051504, + "grad_norm": 0.663123369216919, + "learning_rate": 1.6751269035532997e-05, + "loss": 0.5778357982635498, + "step": 165 + }, + { + "epoch": 0.04232534421213666, + "grad_norm": 0.715749204158783, + "learning_rate": 1.685279187817259e-05, + "loss": 0.5950402617454529, + "step": 166 + }, + { + "epoch": 0.04258031616522182, + "grad_norm": 0.657593309879303, + "learning_rate": 1.6954314720812183e-05, + "loss": 0.5839961767196655, + "step": 167 + }, + { + "epoch": 0.04283528811830699, + "grad_norm": 0.7044532299041748, + "learning_rate": 1.705583756345178e-05, + "loss": 0.5974175930023193, + "step": 168 + }, + { + "epoch": 0.04309026007139215, + "grad_norm": 0.678438663482666, + "learning_rate": 1.7157360406091373e-05, + "loss": 0.5922316312789917, + "step": 169 + }, + { + "epoch": 0.043345232024477306, + "grad_norm": 0.7534765601158142, + "learning_rate": 1.7258883248730966e-05, + "loss": 0.5797853469848633, + "step": 170 + }, + { + "epoch": 0.043600203977562466, + "grad_norm": 0.6486764550209045, + "learning_rate": 1.736040609137056e-05, + "loss": 0.5776516795158386, + "step": 171 + }, + { + "epoch": 0.04385517593064763, + "grad_norm": 0.6965651512145996, + "learning_rate": 1.7461928934010152e-05, + "loss": 0.5867475271224976, + "step": 172 + }, + { + "epoch": 0.04411014788373279, + "grad_norm": 0.7084111571311951, + "learning_rate": 1.7563451776649745e-05, + "loss": 0.5911486744880676, + "step": 173 + }, + { + "epoch": 0.04436511983681795, + "grad_norm": 0.6508429646492004, + "learning_rate": 1.7664974619289342e-05, + "loss": 0.5861883163452148, + "step": 174 + }, + { + "epoch": 0.04462009178990311, + "grad_norm": 0.6957427859306335, + "learning_rate": 1.7766497461928935e-05, + "loss": 0.5958301424980164, + "step": 175 + }, + { + "epoch": 0.04487506374298827, + "grad_norm": 0.6256112456321716, + "learning_rate": 1.786802030456853e-05, + "loss": 0.5757142901420593, + "step": 176 + }, + { + "epoch": 0.045130035696073434, + "grad_norm": 0.6769145727157593, + "learning_rate": 1.7969543147208125e-05, + "loss": 0.6127809286117554, + "step": 177 + }, + { + "epoch": 0.04538500764915859, + "grad_norm": 0.6851696372032166, + "learning_rate": 1.8071065989847718e-05, + "loss": 0.5945026874542236, + "step": 178 + }, + { + "epoch": 0.04563997960224375, + "grad_norm": 0.6712672114372253, + "learning_rate": 1.817258883248731e-05, + "loss": 0.5810881853103638, + "step": 179 + }, + { + "epoch": 0.04589495155532891, + "grad_norm": 0.6257203817367554, + "learning_rate": 1.8274111675126904e-05, + "loss": 0.5861809253692627, + "step": 180 + }, + { + "epoch": 0.04614992350841408, + "grad_norm": 0.7018057107925415, + "learning_rate": 1.8375634517766498e-05, + "loss": 0.5822906494140625, + "step": 181 + }, + { + "epoch": 0.046404895461499236, + "grad_norm": 0.6754285097122192, + "learning_rate": 1.8477157360406094e-05, + "loss": 0.5803866386413574, + "step": 182 + }, + { + "epoch": 0.046659867414584395, + "grad_norm": 0.6747144460678101, + "learning_rate": 1.8578680203045687e-05, + "loss": 0.5928809642791748, + "step": 183 + }, + { + "epoch": 0.046914839367669554, + "grad_norm": 0.6424155235290527, + "learning_rate": 1.868020304568528e-05, + "loss": 0.6039294004440308, + "step": 184 + }, + { + "epoch": 0.04716981132075472, + "grad_norm": 0.6336119771003723, + "learning_rate": 1.8781725888324877e-05, + "loss": 0.5872094631195068, + "step": 185 + }, + { + "epoch": 0.04742478327383988, + "grad_norm": 0.8231200575828552, + "learning_rate": 1.888324873096447e-05, + "loss": 0.5870229005813599, + "step": 186 + }, + { + "epoch": 0.04767975522692504, + "grad_norm": 0.6766306161880493, + "learning_rate": 1.8984771573604063e-05, + "loss": 0.596156120300293, + "step": 187 + }, + { + "epoch": 0.0479347271800102, + "grad_norm": 0.7034800052642822, + "learning_rate": 1.9086294416243656e-05, + "loss": 0.5882968306541443, + "step": 188 + }, + { + "epoch": 0.04818969913309536, + "grad_norm": 0.6791765093803406, + "learning_rate": 1.918781725888325e-05, + "loss": 0.5723391771316528, + "step": 189 + }, + { + "epoch": 0.04844467108618052, + "grad_norm": 0.7587200999259949, + "learning_rate": 1.9289340101522843e-05, + "loss": 0.5900762677192688, + "step": 190 + }, + { + "epoch": 0.04869964303926568, + "grad_norm": 0.7111191749572754, + "learning_rate": 1.939086294416244e-05, + "loss": 0.5732899308204651, + "step": 191 + }, + { + "epoch": 0.04895461499235084, + "grad_norm": 0.6762776374816895, + "learning_rate": 1.9492385786802032e-05, + "loss": 0.5881006717681885, + "step": 192 + }, + { + "epoch": 0.049209586945436, + "grad_norm": 0.6622965335845947, + "learning_rate": 1.9593908629441626e-05, + "loss": 0.5785661935806274, + "step": 193 + }, + { + "epoch": 0.049464558898521166, + "grad_norm": 0.6711365580558777, + "learning_rate": 1.969543147208122e-05, + "loss": 0.5604966282844543, + "step": 194 + }, + { + "epoch": 0.049719530851606325, + "grad_norm": 0.6800675392150879, + "learning_rate": 1.9796954314720812e-05, + "loss": 0.5834771394729614, + "step": 195 + }, + { + "epoch": 0.049974502804691484, + "grad_norm": 0.7173908352851868, + "learning_rate": 1.989847715736041e-05, + "loss": 0.5812022686004639, + "step": 196 + }, + { + "epoch": 0.05022947475777664, + "grad_norm": 0.7289883494377136, + "learning_rate": 2e-05, + "loss": 0.5865119099617004, + "step": 197 + }, + { + "epoch": 0.0504844467108618, + "grad_norm": 0.6825205087661743, + "learning_rate": 1.9999996443546215e-05, + "loss": 0.583699643611908, + "step": 198 + }, + { + "epoch": 0.05073941866394697, + "grad_norm": 0.7007641196250916, + "learning_rate": 1.9999985774187383e-05, + "loss": 0.5956920981407166, + "step": 199 + }, + { + "epoch": 0.05099439061703213, + "grad_norm": 0.6625441312789917, + "learning_rate": 1.9999967991931094e-05, + "loss": 0.5949492454528809, + "step": 200 + }, + { + "epoch": 0.051249362570117286, + "grad_norm": 0.6780283451080322, + "learning_rate": 1.999994309679e-05, + "loss": 0.5827687978744507, + "step": 201 + }, + { + "epoch": 0.051504334523202445, + "grad_norm": 0.6601573824882507, + "learning_rate": 1.9999911088781804e-05, + "loss": 0.5779587626457214, + "step": 202 + }, + { + "epoch": 0.05175930647628761, + "grad_norm": 0.6573741436004639, + "learning_rate": 1.9999871967929278e-05, + "loss": 0.599594235420227, + "step": 203 + }, + { + "epoch": 0.05201427842937277, + "grad_norm": 0.639542281627655, + "learning_rate": 1.9999825734260247e-05, + "loss": 0.5684338808059692, + "step": 204 + }, + { + "epoch": 0.05226925038245793, + "grad_norm": 0.6658681035041809, + "learning_rate": 1.9999772387807592e-05, + "loss": 0.5895668268203735, + "step": 205 + }, + { + "epoch": 0.05252422233554309, + "grad_norm": 0.6262486577033997, + "learning_rate": 1.9999711928609265e-05, + "loss": 0.5960357189178467, + "step": 206 + }, + { + "epoch": 0.05277919428862825, + "grad_norm": 0.6094916462898254, + "learning_rate": 1.9999644356708262e-05, + "loss": 0.5768816471099854, + "step": 207 + }, + { + "epoch": 0.053034166241713414, + "grad_norm": 0.6605136394500732, + "learning_rate": 1.999956967215265e-05, + "loss": 0.5881094932556152, + "step": 208 + }, + { + "epoch": 0.05328913819479857, + "grad_norm": 0.7232625484466553, + "learning_rate": 1.9999487874995556e-05, + "loss": 0.5898884534835815, + "step": 209 + }, + { + "epoch": 0.05354411014788373, + "grad_norm": 0.5892757773399353, + "learning_rate": 1.9999398965295157e-05, + "loss": 0.5826902985572815, + "step": 210 + }, + { + "epoch": 0.05379908210096889, + "grad_norm": 0.6755984425544739, + "learning_rate": 1.9999302943114692e-05, + "loss": 0.5842537879943848, + "step": 211 + }, + { + "epoch": 0.05405405405405406, + "grad_norm": 0.6419636011123657, + "learning_rate": 1.999919980852246e-05, + "loss": 0.5654566287994385, + "step": 212 + }, + { + "epoch": 0.054309026007139216, + "grad_norm": 0.7050777673721313, + "learning_rate": 1.9999089561591826e-05, + "loss": 0.5780625343322754, + "step": 213 + }, + { + "epoch": 0.054563997960224375, + "grad_norm": 0.6014391183853149, + "learning_rate": 1.9998972202401206e-05, + "loss": 0.5929491519927979, + "step": 214 + }, + { + "epoch": 0.054818969913309534, + "grad_norm": 0.662752628326416, + "learning_rate": 1.9998847731034073e-05, + "loss": 0.5813001990318298, + "step": 215 + }, + { + "epoch": 0.05507394186639469, + "grad_norm": 0.6744083166122437, + "learning_rate": 1.9998716147578963e-05, + "loss": 0.5822510123252869, + "step": 216 + }, + { + "epoch": 0.05532891381947986, + "grad_norm": 0.6255607008934021, + "learning_rate": 1.999857745212947e-05, + "loss": 0.5750855803489685, + "step": 217 + }, + { + "epoch": 0.05558388577256502, + "grad_norm": 0.7068489789962769, + "learning_rate": 1.999843164478425e-05, + "loss": 0.5958958864212036, + "step": 218 + }, + { + "epoch": 0.05583885772565018, + "grad_norm": 0.6492578983306885, + "learning_rate": 1.9998278725647015e-05, + "loss": 0.5895745754241943, + "step": 219 + }, + { + "epoch": 0.05609382967873534, + "grad_norm": 0.6811429262161255, + "learning_rate": 1.9998118694826524e-05, + "loss": 0.5909900665283203, + "step": 220 + }, + { + "epoch": 0.0563488016318205, + "grad_norm": 0.6466650366783142, + "learning_rate": 1.999795155243662e-05, + "loss": 0.5822628140449524, + "step": 221 + }, + { + "epoch": 0.05660377358490566, + "grad_norm": 0.6460858583450317, + "learning_rate": 1.999777729859618e-05, + "loss": 0.5704171061515808, + "step": 222 + }, + { + "epoch": 0.05685874553799082, + "grad_norm": 0.7094568014144897, + "learning_rate": 1.999759593342916e-05, + "loss": 0.5730971693992615, + "step": 223 + }, + { + "epoch": 0.05711371749107598, + "grad_norm": 0.6798518896102905, + "learning_rate": 1.9997407457064547e-05, + "loss": 0.5954874157905579, + "step": 224 + }, + { + "epoch": 0.05736868944416114, + "grad_norm": 0.6695542335510254, + "learning_rate": 1.9997211869636417e-05, + "loss": 0.5870078802108765, + "step": 225 + }, + { + "epoch": 0.057623661397246305, + "grad_norm": 0.6144403219223022, + "learning_rate": 1.9997009171283882e-05, + "loss": 0.5849398374557495, + "step": 226 + }, + { + "epoch": 0.057878633350331464, + "grad_norm": 0.7087647318840027, + "learning_rate": 1.9996799362151124e-05, + "loss": 0.5610650777816772, + "step": 227 + }, + { + "epoch": 0.05813360530341662, + "grad_norm": 0.6279194951057434, + "learning_rate": 1.999658244238737e-05, + "loss": 0.5676144361495972, + "step": 228 + }, + { + "epoch": 0.05838857725650178, + "grad_norm": 0.7145519256591797, + "learning_rate": 1.9996358412146922e-05, + "loss": 0.5789085626602173, + "step": 229 + }, + { + "epoch": 0.05864354920958695, + "grad_norm": 0.6787282824516296, + "learning_rate": 1.999612727158913e-05, + "loss": 0.5748767852783203, + "step": 230 + }, + { + "epoch": 0.05889852116267211, + "grad_norm": 0.6754794120788574, + "learning_rate": 1.99958890208784e-05, + "loss": 0.5880950093269348, + "step": 231 + }, + { + "epoch": 0.059153493115757266, + "grad_norm": 3.1220149993896484, + "learning_rate": 1.999564366018419e-05, + "loss": 0.5785700082778931, + "step": 232 + }, + { + "epoch": 0.059408465068842425, + "grad_norm": 0.8344593644142151, + "learning_rate": 1.999539118968104e-05, + "loss": 0.5717012882232666, + "step": 233 + }, + { + "epoch": 0.05966343702192759, + "grad_norm": 0.6129970550537109, + "learning_rate": 1.9995131609548513e-05, + "loss": 0.5926172733306885, + "step": 234 + }, + { + "epoch": 0.05991840897501275, + "grad_norm": 0.7066675424575806, + "learning_rate": 1.9994864919971255e-05, + "loss": 0.5734254717826843, + "step": 235 + }, + { + "epoch": 0.06017338092809791, + "grad_norm": 0.7014307975769043, + "learning_rate": 1.9994591121138954e-05, + "loss": 0.5930376052856445, + "step": 236 + }, + { + "epoch": 0.06042835288118307, + "grad_norm": 0.6754729747772217, + "learning_rate": 1.999431021324637e-05, + "loss": 0.5725037455558777, + "step": 237 + }, + { + "epoch": 0.06068332483426823, + "grad_norm": 0.714168131351471, + "learning_rate": 1.99940221964933e-05, + "loss": 0.5748128890991211, + "step": 238 + }, + { + "epoch": 0.060938296787353394, + "grad_norm": 0.6713688969612122, + "learning_rate": 1.9993727071084613e-05, + "loss": 0.5799189805984497, + "step": 239 + }, + { + "epoch": 0.06119326874043855, + "grad_norm": 0.6011117696762085, + "learning_rate": 1.999342483723023e-05, + "loss": 0.5691922903060913, + "step": 240 + }, + { + "epoch": 0.06144824069352371, + "grad_norm": 0.6895564794540405, + "learning_rate": 1.9993115495145125e-05, + "loss": 0.5746656060218811, + "step": 241 + }, + { + "epoch": 0.06170321264660887, + "grad_norm": 0.6951008439064026, + "learning_rate": 1.999279904504933e-05, + "loss": 0.574233889579773, + "step": 242 + }, + { + "epoch": 0.06195818459969404, + "grad_norm": 0.6126100420951843, + "learning_rate": 1.999247548716793e-05, + "loss": 0.565665602684021, + "step": 243 + }, + { + "epoch": 0.062213156552779196, + "grad_norm": 0.6327571868896484, + "learning_rate": 1.9992144821731075e-05, + "loss": 0.5754146575927734, + "step": 244 + }, + { + "epoch": 0.062468128505864355, + "grad_norm": 0.6757508516311646, + "learning_rate": 1.999180704897396e-05, + "loss": 0.5776671171188354, + "step": 245 + }, + { + "epoch": 0.06272310045894952, + "grad_norm": 0.6368160843849182, + "learning_rate": 1.9991462169136843e-05, + "loss": 0.5814864635467529, + "step": 246 + }, + { + "epoch": 0.06297807241203468, + "grad_norm": 0.6689087748527527, + "learning_rate": 1.9991110182465032e-05, + "loss": 0.5699710845947266, + "step": 247 + }, + { + "epoch": 0.06323304436511984, + "grad_norm": 0.6050186157226562, + "learning_rate": 1.9990751089208894e-05, + "loss": 0.5749836564064026, + "step": 248 + }, + { + "epoch": 0.063488016318205, + "grad_norm": 0.6237219572067261, + "learning_rate": 1.999038488962384e-05, + "loss": 0.5625735521316528, + "step": 249 + }, + { + "epoch": 0.06374298827129016, + "grad_norm": 0.6202013492584229, + "learning_rate": 1.9990011583970355e-05, + "loss": 0.5654894113540649, + "step": 250 + }, + { + "epoch": 0.06399796022437532, + "grad_norm": 0.5948776602745056, + "learning_rate": 1.998963117251396e-05, + "loss": 0.5873595476150513, + "step": 251 + }, + { + "epoch": 0.06425293217746048, + "grad_norm": 0.6015293002128601, + "learning_rate": 1.9989243655525248e-05, + "loss": 0.5803345441818237, + "step": 252 + }, + { + "epoch": 0.06450790413054563, + "grad_norm": 0.5996026992797852, + "learning_rate": 1.9988849033279848e-05, + "loss": 0.5710455775260925, + "step": 253 + }, + { + "epoch": 0.0647628760836308, + "grad_norm": 0.5705514550209045, + "learning_rate": 1.998844730605845e-05, + "loss": 0.5731304883956909, + "step": 254 + }, + { + "epoch": 0.06501784803671597, + "grad_norm": 0.5985062718391418, + "learning_rate": 1.9988038474146804e-05, + "loss": 0.5627564191818237, + "step": 255 + }, + { + "epoch": 0.06527281998980113, + "grad_norm": 0.6054355502128601, + "learning_rate": 1.9987622537835708e-05, + "loss": 0.5643206834793091, + "step": 256 + }, + { + "epoch": 0.06552779194288628, + "grad_norm": 0.643243670463562, + "learning_rate": 1.998719949742101e-05, + "loss": 0.5702025890350342, + "step": 257 + }, + { + "epoch": 0.06578276389597144, + "grad_norm": 0.6212320327758789, + "learning_rate": 1.998676935320362e-05, + "loss": 0.5596565008163452, + "step": 258 + }, + { + "epoch": 0.0660377358490566, + "grad_norm": 0.6149182915687561, + "learning_rate": 1.998633210548949e-05, + "loss": 0.5833432078361511, + "step": 259 + }, + { + "epoch": 0.06629270780214176, + "grad_norm": 0.5876778960227966, + "learning_rate": 1.9985887754589634e-05, + "loss": 0.563776969909668, + "step": 260 + }, + { + "epoch": 0.06654767975522692, + "grad_norm": 0.6171606183052063, + "learning_rate": 1.9985436300820108e-05, + "loss": 0.5687385201454163, + "step": 261 + }, + { + "epoch": 0.06680265170831208, + "grad_norm": 0.6119545698165894, + "learning_rate": 1.9984977744502038e-05, + "loss": 0.5770053267478943, + "step": 262 + }, + { + "epoch": 0.06705762366139725, + "grad_norm": 0.625106155872345, + "learning_rate": 1.9984512085961584e-05, + "loss": 0.5761595964431763, + "step": 263 + }, + { + "epoch": 0.06731259561448241, + "grad_norm": 0.627996563911438, + "learning_rate": 1.9984039325529965e-05, + "loss": 0.5653480291366577, + "step": 264 + }, + { + "epoch": 0.06756756756756757, + "grad_norm": 0.6073549389839172, + "learning_rate": 1.9983559463543453e-05, + "loss": 0.5723285675048828, + "step": 265 + }, + { + "epoch": 0.06782253952065273, + "grad_norm": 0.6070031523704529, + "learning_rate": 1.9983072500343367e-05, + "loss": 0.5722025632858276, + "step": 266 + }, + { + "epoch": 0.06807751147373789, + "grad_norm": 0.6099826097488403, + "learning_rate": 1.9982578436276084e-05, + "loss": 0.5669994354248047, + "step": 267 + }, + { + "epoch": 0.06833248342682305, + "grad_norm": 0.565504252910614, + "learning_rate": 1.998207727169302e-05, + "loss": 0.5737874507904053, + "step": 268 + }, + { + "epoch": 0.06858745537990821, + "grad_norm": 0.6359668970108032, + "learning_rate": 1.9981569006950655e-05, + "loss": 0.5619522929191589, + "step": 269 + }, + { + "epoch": 0.06884242733299337, + "grad_norm": 0.5864189863204956, + "learning_rate": 1.998105364241051e-05, + "loss": 0.5720523595809937, + "step": 270 + }, + { + "epoch": 0.06909739928607853, + "grad_norm": 0.6055141091346741, + "learning_rate": 1.9980531178439157e-05, + "loss": 0.5729068517684937, + "step": 271 + }, + { + "epoch": 0.0693523712391637, + "grad_norm": 0.5796358585357666, + "learning_rate": 1.9980001615408228e-05, + "loss": 0.5904380083084106, + "step": 272 + }, + { + "epoch": 0.06960734319224886, + "grad_norm": 0.5816705822944641, + "learning_rate": 1.9979464953694388e-05, + "loss": 0.561646580696106, + "step": 273 + }, + { + "epoch": 0.06986231514533402, + "grad_norm": 0.5701183676719666, + "learning_rate": 1.9978921193679362e-05, + "loss": 0.5620859861373901, + "step": 274 + }, + { + "epoch": 0.07011728709841918, + "grad_norm": 0.640988826751709, + "learning_rate": 1.997837033574992e-05, + "loss": 0.5681776404380798, + "step": 275 + }, + { + "epoch": 0.07037225905150434, + "grad_norm": 0.5843631625175476, + "learning_rate": 1.9977812380297888e-05, + "loss": 0.5757836103439331, + "step": 276 + }, + { + "epoch": 0.0706272310045895, + "grad_norm": 0.6079508066177368, + "learning_rate": 1.9977247327720128e-05, + "loss": 0.5664551258087158, + "step": 277 + }, + { + "epoch": 0.07088220295767465, + "grad_norm": 0.6057885885238647, + "learning_rate": 1.9976675178418562e-05, + "loss": 0.5635764598846436, + "step": 278 + }, + { + "epoch": 0.07113717491075981, + "grad_norm": 0.5704780220985413, + "learning_rate": 1.997609593280015e-05, + "loss": 0.5763840675354004, + "step": 279 + }, + { + "epoch": 0.07139214686384497, + "grad_norm": 0.6236803531646729, + "learning_rate": 1.9975509591276902e-05, + "loss": 0.5770968794822693, + "step": 280 + }, + { + "epoch": 0.07164711881693014, + "grad_norm": 0.5445568561553955, + "learning_rate": 1.9974916154265887e-05, + "loss": 0.5587365627288818, + "step": 281 + }, + { + "epoch": 0.0719020907700153, + "grad_norm": 0.5997340679168701, + "learning_rate": 1.99743156221892e-05, + "loss": 0.5820911526679993, + "step": 282 + }, + { + "epoch": 0.07215706272310046, + "grad_norm": 0.6064010858535767, + "learning_rate": 1.9973707995474e-05, + "loss": 0.5616083145141602, + "step": 283 + }, + { + "epoch": 0.07241203467618562, + "grad_norm": 0.5523048043251038, + "learning_rate": 1.997309327455249e-05, + "loss": 0.5786893367767334, + "step": 284 + }, + { + "epoch": 0.07266700662927078, + "grad_norm": 0.544638454914093, + "learning_rate": 1.9972471459861908e-05, + "loss": 0.5746819972991943, + "step": 285 + }, + { + "epoch": 0.07292197858235594, + "grad_norm": 0.5817301273345947, + "learning_rate": 1.997184255184455e-05, + "loss": 0.5628771185874939, + "step": 286 + }, + { + "epoch": 0.0731769505354411, + "grad_norm": 0.5777922868728638, + "learning_rate": 1.9971206550947748e-05, + "loss": 0.5575648546218872, + "step": 287 + }, + { + "epoch": 0.07343192248852626, + "grad_norm": 0.591174840927124, + "learning_rate": 1.9970563457623885e-05, + "loss": 0.5619316101074219, + "step": 288 + }, + { + "epoch": 0.07368689444161142, + "grad_norm": 0.6272574067115784, + "learning_rate": 1.996991327233039e-05, + "loss": 0.554466962814331, + "step": 289 + }, + { + "epoch": 0.07394186639469659, + "grad_norm": 0.6420910954475403, + "learning_rate": 1.996925599552973e-05, + "loss": 0.5560216903686523, + "step": 290 + }, + { + "epoch": 0.07419683834778175, + "grad_norm": 0.6134066581726074, + "learning_rate": 1.9968591627689428e-05, + "loss": 0.5638004541397095, + "step": 291 + }, + { + "epoch": 0.07445181030086691, + "grad_norm": 0.5705217123031616, + "learning_rate": 1.996792016928203e-05, + "loss": 0.5734155178070068, + "step": 292 + }, + { + "epoch": 0.07470678225395207, + "grad_norm": 0.6209928393363953, + "learning_rate": 1.9967241620785148e-05, + "loss": 0.5502181053161621, + "step": 293 + }, + { + "epoch": 0.07496175420703723, + "grad_norm": 0.5950906276702881, + "learning_rate": 1.9966555982681418e-05, + "loss": 0.5590144991874695, + "step": 294 + }, + { + "epoch": 0.07521672616012239, + "grad_norm": 0.5660822987556458, + "learning_rate": 1.996586325545854e-05, + "loss": 0.5804231762886047, + "step": 295 + }, + { + "epoch": 0.07547169811320754, + "grad_norm": 0.6057717204093933, + "learning_rate": 1.996516343960924e-05, + "loss": 0.5541191101074219, + "step": 296 + }, + { + "epoch": 0.0757266700662927, + "grad_norm": 0.6694291234016418, + "learning_rate": 1.9964456535631287e-05, + "loss": 0.5697247385978699, + "step": 297 + }, + { + "epoch": 0.07598164201937786, + "grad_norm": 0.6713988184928894, + "learning_rate": 1.9963742544027495e-05, + "loss": 0.5601903200149536, + "step": 298 + }, + { + "epoch": 0.07623661397246304, + "grad_norm": 0.5943032503128052, + "learning_rate": 1.9963021465305724e-05, + "loss": 0.5566859841346741, + "step": 299 + }, + { + "epoch": 0.0764915859255482, + "grad_norm": 0.6334525942802429, + "learning_rate": 1.996229329997887e-05, + "loss": 0.5652881860733032, + "step": 300 + }, + { + "epoch": 0.07674655787863335, + "grad_norm": 0.6462168097496033, + "learning_rate": 1.996155804856487e-05, + "loss": 0.568797767162323, + "step": 301 + }, + { + "epoch": 0.07700152983171851, + "grad_norm": 0.5707945227622986, + "learning_rate": 1.9960815711586695e-05, + "loss": 0.5688931345939636, + "step": 302 + }, + { + "epoch": 0.07725650178480367, + "grad_norm": 0.5915647745132446, + "learning_rate": 1.996006628957237e-05, + "loss": 0.5620356798171997, + "step": 303 + }, + { + "epoch": 0.07751147373788883, + "grad_norm": 0.6422096490859985, + "learning_rate": 1.995930978305495e-05, + "loss": 0.5672441720962524, + "step": 304 + }, + { + "epoch": 0.07776644569097399, + "grad_norm": 0.5536637306213379, + "learning_rate": 1.9958546192572535e-05, + "loss": 0.567467987537384, + "step": 305 + }, + { + "epoch": 0.07802141764405915, + "grad_norm": 0.5747365355491638, + "learning_rate": 1.995777551866825e-05, + "loss": 0.5674901008605957, + "step": 306 + }, + { + "epoch": 0.07827638959714431, + "grad_norm": 0.5945401191711426, + "learning_rate": 1.9956997761890277e-05, + "loss": 0.5611063838005066, + "step": 307 + }, + { + "epoch": 0.07853136155022948, + "grad_norm": 0.5844597816467285, + "learning_rate": 1.9956212922791825e-05, + "loss": 0.5652987360954285, + "step": 308 + }, + { + "epoch": 0.07878633350331464, + "grad_norm": 0.5997607111930847, + "learning_rate": 1.995542100193114e-05, + "loss": 0.5385614633560181, + "step": 309 + }, + { + "epoch": 0.0790413054563998, + "grad_norm": 0.5956833362579346, + "learning_rate": 1.9954621999871512e-05, + "loss": 0.5502104759216309, + "step": 310 + }, + { + "epoch": 0.07929627740948496, + "grad_norm": 0.6196988821029663, + "learning_rate": 1.9953815917181258e-05, + "loss": 0.554365336894989, + "step": 311 + }, + { + "epoch": 0.07955124936257012, + "grad_norm": 0.5872536897659302, + "learning_rate": 1.9953002754433743e-05, + "loss": 0.5669418573379517, + "step": 312 + }, + { + "epoch": 0.07980622131565528, + "grad_norm": 0.5234706401824951, + "learning_rate": 1.995218251220736e-05, + "loss": 0.5619447231292725, + "step": 313 + }, + { + "epoch": 0.08006119326874044, + "grad_norm": 0.6450950503349304, + "learning_rate": 1.995135519108554e-05, + "loss": 0.5421331524848938, + "step": 314 + }, + { + "epoch": 0.0803161652218256, + "grad_norm": 0.5748299956321716, + "learning_rate": 1.995052079165675e-05, + "loss": 0.5626130700111389, + "step": 315 + }, + { + "epoch": 0.08057113717491075, + "grad_norm": 0.5997022390365601, + "learning_rate": 1.994967931451449e-05, + "loss": 0.5518707036972046, + "step": 316 + }, + { + "epoch": 0.08082610912799593, + "grad_norm": 0.6092442274093628, + "learning_rate": 1.994883076025729e-05, + "loss": 0.5542733073234558, + "step": 317 + }, + { + "epoch": 0.08108108108108109, + "grad_norm": 0.5561699271202087, + "learning_rate": 1.9947975129488726e-05, + "loss": 0.5553290247917175, + "step": 318 + }, + { + "epoch": 0.08133605303416624, + "grad_norm": 0.5849249362945557, + "learning_rate": 1.9947112422817397e-05, + "loss": 0.562719464302063, + "step": 319 + }, + { + "epoch": 0.0815910249872514, + "grad_norm": 0.5908464193344116, + "learning_rate": 1.994624264085694e-05, + "loss": 0.5615310668945312, + "step": 320 + }, + { + "epoch": 0.08184599694033656, + "grad_norm": 0.5735774040222168, + "learning_rate": 1.9945365784226018e-05, + "loss": 0.5658733248710632, + "step": 321 + }, + { + "epoch": 0.08210096889342172, + "grad_norm": 0.5955777168273926, + "learning_rate": 1.9944481853548335e-05, + "loss": 0.5692462921142578, + "step": 322 + }, + { + "epoch": 0.08235594084650688, + "grad_norm": 0.5768380761146545, + "learning_rate": 1.994359084945262e-05, + "loss": 0.5534395575523376, + "step": 323 + }, + { + "epoch": 0.08261091279959204, + "grad_norm": 0.5331956744194031, + "learning_rate": 1.9942692772572645e-05, + "loss": 0.561090886592865, + "step": 324 + }, + { + "epoch": 0.0828658847526772, + "grad_norm": 0.564062237739563, + "learning_rate": 1.9941787623547194e-05, + "loss": 0.5530338287353516, + "step": 325 + }, + { + "epoch": 0.08312085670576237, + "grad_norm": 0.5807541012763977, + "learning_rate": 1.9940875403020095e-05, + "loss": 0.5750647783279419, + "step": 326 + }, + { + "epoch": 0.08337582865884753, + "grad_norm": 0.5979731678962708, + "learning_rate": 1.9939956111640198e-05, + "loss": 0.5668591260910034, + "step": 327 + }, + { + "epoch": 0.08363080061193269, + "grad_norm": 0.5643814206123352, + "learning_rate": 1.993902975006139e-05, + "loss": 0.5625386834144592, + "step": 328 + }, + { + "epoch": 0.08388577256501785, + "grad_norm": 0.7797948122024536, + "learning_rate": 1.9938096318942583e-05, + "loss": 0.5680009722709656, + "step": 329 + }, + { + "epoch": 0.08414074451810301, + "grad_norm": 0.6033815741539001, + "learning_rate": 1.993715581894772e-05, + "loss": 0.551729679107666, + "step": 330 + }, + { + "epoch": 0.08439571647118817, + "grad_norm": 0.7249321937561035, + "learning_rate": 1.9936208250745767e-05, + "loss": 0.5621654987335205, + "step": 331 + }, + { + "epoch": 0.08465068842427333, + "grad_norm": 0.5542919635772705, + "learning_rate": 1.9935253615010718e-05, + "loss": 0.5623407363891602, + "step": 332 + }, + { + "epoch": 0.08490566037735849, + "grad_norm": 0.5352950692176819, + "learning_rate": 1.9934291912421602e-05, + "loss": 0.5668007135391235, + "step": 333 + }, + { + "epoch": 0.08516063233044364, + "grad_norm": 0.58268803358078, + "learning_rate": 1.993332314366247e-05, + "loss": 0.5632613301277161, + "step": 334 + }, + { + "epoch": 0.08541560428352882, + "grad_norm": 0.5689071416854858, + "learning_rate": 1.9932347309422393e-05, + "loss": 0.5548432469367981, + "step": 335 + }, + { + "epoch": 0.08567057623661398, + "grad_norm": 0.5977305173873901, + "learning_rate": 1.9931364410395474e-05, + "loss": 0.5616758465766907, + "step": 336 + }, + { + "epoch": 0.08592554818969914, + "grad_norm": 0.5791174173355103, + "learning_rate": 1.9930374447280844e-05, + "loss": 0.5640117526054382, + "step": 337 + }, + { + "epoch": 0.0861805201427843, + "grad_norm": 0.5817056894302368, + "learning_rate": 1.992937742078265e-05, + "loss": 0.5501354932785034, + "step": 338 + }, + { + "epoch": 0.08643549209586945, + "grad_norm": 0.555757462978363, + "learning_rate": 1.9928373331610068e-05, + "loss": 0.5771246552467346, + "step": 339 + }, + { + "epoch": 0.08669046404895461, + "grad_norm": 0.5622315406799316, + "learning_rate": 1.99273621804773e-05, + "loss": 0.5560314655303955, + "step": 340 + }, + { + "epoch": 0.08694543600203977, + "grad_norm": 0.5567899942398071, + "learning_rate": 1.992634396810357e-05, + "loss": 0.5638811588287354, + "step": 341 + }, + { + "epoch": 0.08720040795512493, + "grad_norm": 0.9306987524032593, + "learning_rate": 1.992531869521312e-05, + "loss": 0.5746092796325684, + "step": 342 + }, + { + "epoch": 0.08745537990821009, + "grad_norm": 0.6356567740440369, + "learning_rate": 1.9924286362535214e-05, + "loss": 0.5542607307434082, + "step": 343 + }, + { + "epoch": 0.08771035186129526, + "grad_norm": 0.5900321006774902, + "learning_rate": 1.9923246970804148e-05, + "loss": 0.5701124668121338, + "step": 344 + }, + { + "epoch": 0.08796532381438042, + "grad_norm": 0.5773775577545166, + "learning_rate": 1.9922200520759224e-05, + "loss": 0.567479133605957, + "step": 345 + }, + { + "epoch": 0.08822029576746558, + "grad_norm": 0.6096591949462891, + "learning_rate": 1.9921147013144782e-05, + "loss": 0.5689170956611633, + "step": 346 + }, + { + "epoch": 0.08847526772055074, + "grad_norm": 0.5603031516075134, + "learning_rate": 1.9920086448710162e-05, + "loss": 0.5633706450462341, + "step": 347 + }, + { + "epoch": 0.0887302396736359, + "grad_norm": 0.5907047986984253, + "learning_rate": 1.9919018828209737e-05, + "loss": 0.5522236227989197, + "step": 348 + }, + { + "epoch": 0.08898521162672106, + "grad_norm": 0.5694087743759155, + "learning_rate": 1.99179441524029e-05, + "loss": 0.5575593113899231, + "step": 349 + }, + { + "epoch": 0.08924018357980622, + "grad_norm": 0.5818880796432495, + "learning_rate": 1.9916862422054053e-05, + "loss": 0.5685945749282837, + "step": 350 + }, + { + "epoch": 0.08949515553289138, + "grad_norm": 0.5598475933074951, + "learning_rate": 1.991577363793262e-05, + "loss": 0.5636742115020752, + "step": 351 + }, + { + "epoch": 0.08975012748597654, + "grad_norm": 0.5648708939552307, + "learning_rate": 1.991467780081305e-05, + "loss": 0.549278974533081, + "step": 352 + }, + { + "epoch": 0.09000509943906171, + "grad_norm": 0.5766149163246155, + "learning_rate": 1.9913574911474795e-05, + "loss": 0.5545766353607178, + "step": 353 + }, + { + "epoch": 0.09026007139214687, + "grad_norm": 0.5259166955947876, + "learning_rate": 1.9912464970702334e-05, + "loss": 0.5719385147094727, + "step": 354 + }, + { + "epoch": 0.09051504334523203, + "grad_norm": 0.5788517594337463, + "learning_rate": 1.9911347979285157e-05, + "loss": 0.5588270425796509, + "step": 355 + }, + { + "epoch": 0.09077001529831719, + "grad_norm": 0.5590032935142517, + "learning_rate": 1.9910223938017768e-05, + "loss": 0.5672649145126343, + "step": 356 + }, + { + "epoch": 0.09102498725140235, + "grad_norm": 0.5529116988182068, + "learning_rate": 1.9909092847699684e-05, + "loss": 0.5570537447929382, + "step": 357 + }, + { + "epoch": 0.0912799592044875, + "grad_norm": 0.5393619537353516, + "learning_rate": 1.9907954709135445e-05, + "loss": 0.5653344988822937, + "step": 358 + }, + { + "epoch": 0.09153493115757266, + "grad_norm": 0.5495200753211975, + "learning_rate": 1.9906809523134597e-05, + "loss": 0.5471888780593872, + "step": 359 + }, + { + "epoch": 0.09178990311065782, + "grad_norm": 0.5408692955970764, + "learning_rate": 1.9905657290511698e-05, + "loss": 0.5618144273757935, + "step": 360 + }, + { + "epoch": 0.09204487506374298, + "grad_norm": 0.5768983364105225, + "learning_rate": 1.9904498012086327e-05, + "loss": 0.556114673614502, + "step": 361 + }, + { + "epoch": 0.09229984701682815, + "grad_norm": 0.5828980207443237, + "learning_rate": 1.990333168868306e-05, + "loss": 0.5702318549156189, + "step": 362 + }, + { + "epoch": 0.09255481896991331, + "grad_norm": 0.5826273560523987, + "learning_rate": 1.9902158321131494e-05, + "loss": 0.5587303638458252, + "step": 363 + }, + { + "epoch": 0.09280979092299847, + "grad_norm": 0.5333880186080933, + "learning_rate": 1.9900977910266235e-05, + "loss": 0.5579195618629456, + "step": 364 + }, + { + "epoch": 0.09306476287608363, + "grad_norm": 0.5813481211662292, + "learning_rate": 1.98997904569269e-05, + "loss": 0.5583409070968628, + "step": 365 + }, + { + "epoch": 0.09331973482916879, + "grad_norm": 0.5596633553504944, + "learning_rate": 1.9898595961958112e-05, + "loss": 0.5528003573417664, + "step": 366 + }, + { + "epoch": 0.09357470678225395, + "grad_norm": 0.536656379699707, + "learning_rate": 1.9897394426209507e-05, + "loss": 0.5665175914764404, + "step": 367 + }, + { + "epoch": 0.09382967873533911, + "grad_norm": 0.561928391456604, + "learning_rate": 1.989618585053572e-05, + "loss": 0.5620167851448059, + "step": 368 + }, + { + "epoch": 0.09408465068842427, + "grad_norm": 0.6040554046630859, + "learning_rate": 1.9894970235796408e-05, + "loss": 0.5658797025680542, + "step": 369 + }, + { + "epoch": 0.09433962264150944, + "grad_norm": 0.5419930815696716, + "learning_rate": 1.9893747582856216e-05, + "loss": 0.5657435655593872, + "step": 370 + }, + { + "epoch": 0.0945945945945946, + "grad_norm": 0.5847061276435852, + "learning_rate": 1.989251789258482e-05, + "loss": 0.5543770790100098, + "step": 371 + }, + { + "epoch": 0.09484956654767976, + "grad_norm": 0.5925385355949402, + "learning_rate": 1.9891281165856876e-05, + "loss": 0.5622945427894592, + "step": 372 + }, + { + "epoch": 0.09510453850076492, + "grad_norm": 0.5780953168869019, + "learning_rate": 1.9890037403552055e-05, + "loss": 0.5627380609512329, + "step": 373 + }, + { + "epoch": 0.09535951045385008, + "grad_norm": 0.5803493857383728, + "learning_rate": 1.9888786606555045e-05, + "loss": 0.564932107925415, + "step": 374 + }, + { + "epoch": 0.09561448240693524, + "grad_norm": 0.5735729336738586, + "learning_rate": 1.9887528775755517e-05, + "loss": 0.5499950647354126, + "step": 375 + }, + { + "epoch": 0.0958694543600204, + "grad_norm": 0.5783923268318176, + "learning_rate": 1.9886263912048156e-05, + "loss": 0.5513893961906433, + "step": 376 + }, + { + "epoch": 0.09612442631310555, + "grad_norm": 0.544529378414154, + "learning_rate": 1.988499201633265e-05, + "loss": 0.5571684837341309, + "step": 377 + }, + { + "epoch": 0.09637939826619071, + "grad_norm": 0.547946035861969, + "learning_rate": 1.9883713089513686e-05, + "loss": 0.5583531856536865, + "step": 378 + }, + { + "epoch": 0.09663437021927589, + "grad_norm": 0.6041200160980225, + "learning_rate": 1.988242713250095e-05, + "loss": 0.5552366971969604, + "step": 379 + }, + { + "epoch": 0.09688934217236105, + "grad_norm": 0.5608474016189575, + "learning_rate": 1.9881134146209137e-05, + "loss": 0.5538177490234375, + "step": 380 + }, + { + "epoch": 0.0971443141254462, + "grad_norm": 0.5594263076782227, + "learning_rate": 1.987983413155793e-05, + "loss": 0.5568601489067078, + "step": 381 + }, + { + "epoch": 0.09739928607853136, + "grad_norm": 0.5792958736419678, + "learning_rate": 1.987852708947202e-05, + "loss": 0.541644811630249, + "step": 382 + }, + { + "epoch": 0.09765425803161652, + "grad_norm": 0.6069263219833374, + "learning_rate": 1.9877213020881096e-05, + "loss": 0.5578107833862305, + "step": 383 + }, + { + "epoch": 0.09790922998470168, + "grad_norm": 0.5543441772460938, + "learning_rate": 1.9875891926719838e-05, + "loss": 0.564591109752655, + "step": 384 + }, + { + "epoch": 0.09816420193778684, + "grad_norm": 0.5760945677757263, + "learning_rate": 1.9874563807927936e-05, + "loss": 0.5550767779350281, + "step": 385 + }, + { + "epoch": 0.098419173890872, + "grad_norm": 0.5751408934593201, + "learning_rate": 1.987322866545006e-05, + "loss": 0.5435432195663452, + "step": 386 + }, + { + "epoch": 0.09867414584395716, + "grad_norm": 0.5727622509002686, + "learning_rate": 1.987188650023589e-05, + "loss": 0.5572792291641235, + "step": 387 + }, + { + "epoch": 0.09892911779704233, + "grad_norm": 0.5712685585021973, + "learning_rate": 1.9870537313240096e-05, + "loss": 0.5545316934585571, + "step": 388 + }, + { + "epoch": 0.09918408975012749, + "grad_norm": 0.5742475986480713, + "learning_rate": 1.9869181105422335e-05, + "loss": 0.5356405973434448, + "step": 389 + }, + { + "epoch": 0.09943906170321265, + "grad_norm": 0.5529100298881531, + "learning_rate": 1.9867817877747276e-05, + "loss": 0.5502123236656189, + "step": 390 + }, + { + "epoch": 0.09969403365629781, + "grad_norm": 0.5420888662338257, + "learning_rate": 1.9866447631184556e-05, + "loss": 0.548579752445221, + "step": 391 + }, + { + "epoch": 0.09994900560938297, + "grad_norm": 0.6033974885940552, + "learning_rate": 1.9865070366708835e-05, + "loss": 0.551939845085144, + "step": 392 + }, + { + "epoch": 0.10020397756246813, + "grad_norm": 0.5301747918128967, + "learning_rate": 1.986368608529974e-05, + "loss": 0.5500297546386719, + "step": 393 + }, + { + "epoch": 0.10045894951555329, + "grad_norm": 0.6244176626205444, + "learning_rate": 1.9862294787941897e-05, + "loss": 0.5556297302246094, + "step": 394 + }, + { + "epoch": 0.10071392146863845, + "grad_norm": 0.5527774691581726, + "learning_rate": 1.9860896475624925e-05, + "loss": 0.5543330311775208, + "step": 395 + }, + { + "epoch": 0.1009688934217236, + "grad_norm": 0.5964891314506531, + "learning_rate": 1.985949114934343e-05, + "loss": 0.5608077049255371, + "step": 396 + }, + { + "epoch": 0.10122386537480878, + "grad_norm": 0.5965831875801086, + "learning_rate": 1.9858078810097004e-05, + "loss": 0.5453798770904541, + "step": 397 + }, + { + "epoch": 0.10147883732789394, + "grad_norm": 0.5628226399421692, + "learning_rate": 1.9856659458890237e-05, + "loss": 0.5567731857299805, + "step": 398 + }, + { + "epoch": 0.1017338092809791, + "grad_norm": 0.5935264825820923, + "learning_rate": 1.98552330967327e-05, + "loss": 0.5467360615730286, + "step": 399 + }, + { + "epoch": 0.10198878123406425, + "grad_norm": 0.5754925012588501, + "learning_rate": 1.9853799724638943e-05, + "loss": 0.5451920032501221, + "step": 400 + }, + { + "epoch": 0.10224375318714941, + "grad_norm": 0.5660573244094849, + "learning_rate": 1.985235934362852e-05, + "loss": 0.5535790324211121, + "step": 401 + }, + { + "epoch": 0.10249872514023457, + "grad_norm": 0.6029878854751587, + "learning_rate": 1.985091195472596e-05, + "loss": 0.5511131286621094, + "step": 402 + }, + { + "epoch": 0.10275369709331973, + "grad_norm": 0.5555292963981628, + "learning_rate": 1.984945755896077e-05, + "loss": 0.5386227369308472, + "step": 403 + }, + { + "epoch": 0.10300866904640489, + "grad_norm": 0.6450228095054626, + "learning_rate": 1.9847996157367455e-05, + "loss": 0.5452139973640442, + "step": 404 + }, + { + "epoch": 0.10326364099949005, + "grad_norm": 0.5396519303321838, + "learning_rate": 1.984652775098549e-05, + "loss": 0.5553629398345947, + "step": 405 + }, + { + "epoch": 0.10351861295257522, + "grad_norm": 1.392758846282959, + "learning_rate": 1.984505234085935e-05, + "loss": 0.5525428652763367, + "step": 406 + }, + { + "epoch": 0.10377358490566038, + "grad_norm": 0.593498706817627, + "learning_rate": 1.9843569928038468e-05, + "loss": 0.5546976327896118, + "step": 407 + }, + { + "epoch": 0.10402855685874554, + "grad_norm": 0.5546035170555115, + "learning_rate": 1.984208051357728e-05, + "loss": 0.5391968488693237, + "step": 408 + }, + { + "epoch": 0.1042835288118307, + "grad_norm": 0.5852994918823242, + "learning_rate": 1.9840584098535184e-05, + "loss": 0.5535978078842163, + "step": 409 + }, + { + "epoch": 0.10453850076491586, + "grad_norm": 0.5861825346946716, + "learning_rate": 1.9839080683976574e-05, + "loss": 0.5517117977142334, + "step": 410 + }, + { + "epoch": 0.10479347271800102, + "grad_norm": 0.5767414569854736, + "learning_rate": 1.983757027097081e-05, + "loss": 0.5635597109794617, + "step": 411 + }, + { + "epoch": 0.10504844467108618, + "grad_norm": 0.5809765458106995, + "learning_rate": 1.9836052860592237e-05, + "loss": 0.5357990264892578, + "step": 412 + }, + { + "epoch": 0.10530341662417134, + "grad_norm": 0.601608157157898, + "learning_rate": 1.9834528453920177e-05, + "loss": 0.549247145652771, + "step": 413 + }, + { + "epoch": 0.1055583885772565, + "grad_norm": 0.6048389673233032, + "learning_rate": 1.9832997052038922e-05, + "loss": 0.5468675494194031, + "step": 414 + }, + { + "epoch": 0.10581336053034167, + "grad_norm": 0.5551306009292603, + "learning_rate": 1.9831458656037748e-05, + "loss": 0.5442308187484741, + "step": 415 + }, + { + "epoch": 0.10606833248342683, + "grad_norm": 0.5358591675758362, + "learning_rate": 1.9829913267010898e-05, + "loss": 0.5525547862052917, + "step": 416 + }, + { + "epoch": 0.10632330443651199, + "grad_norm": 0.5656468272209167, + "learning_rate": 1.9828360886057597e-05, + "loss": 0.5506001710891724, + "step": 417 + }, + { + "epoch": 0.10657827638959715, + "grad_norm": 0.5704612135887146, + "learning_rate": 1.982680151428203e-05, + "loss": 0.5432718992233276, + "step": 418 + }, + { + "epoch": 0.1068332483426823, + "grad_norm": 0.5470494031906128, + "learning_rate": 1.982523515279338e-05, + "loss": 0.5554221868515015, + "step": 419 + }, + { + "epoch": 0.10708822029576746, + "grad_norm": 0.6086367964744568, + "learning_rate": 1.9823661802705773e-05, + "loss": 0.5490662455558777, + "step": 420 + }, + { + "epoch": 0.10734319224885262, + "grad_norm": 0.5634582042694092, + "learning_rate": 1.982208146513832e-05, + "loss": 0.5513579845428467, + "step": 421 + }, + { + "epoch": 0.10759816420193778, + "grad_norm": 0.5439919233322144, + "learning_rate": 1.98204941412151e-05, + "loss": 0.5450636744499207, + "step": 422 + }, + { + "epoch": 0.10785313615502294, + "grad_norm": 0.5421820878982544, + "learning_rate": 1.9818899832065172e-05, + "loss": 0.5400946736335754, + "step": 423 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 0.5724503397941589, + "learning_rate": 1.981729853882254e-05, + "loss": 0.5543707609176636, + "step": 424 + }, + { + "epoch": 0.10836308006119327, + "grad_norm": 0.5568701028823853, + "learning_rate": 1.9815690262626194e-05, + "loss": 0.5440797805786133, + "step": 425 + }, + { + "epoch": 0.10861805201427843, + "grad_norm": 0.5401669144630432, + "learning_rate": 1.981407500462009e-05, + "loss": 0.5601478815078735, + "step": 426 + }, + { + "epoch": 0.10887302396736359, + "grad_norm": 0.5365921854972839, + "learning_rate": 1.9812452765953137e-05, + "loss": 0.5376150012016296, + "step": 427 + }, + { + "epoch": 0.10912799592044875, + "grad_norm": 0.5326974391937256, + "learning_rate": 1.9810823547779226e-05, + "loss": 0.5423977375030518, + "step": 428 + }, + { + "epoch": 0.10938296787353391, + "grad_norm": 0.6022288203239441, + "learning_rate": 1.9809187351257205e-05, + "loss": 0.5444180965423584, + "step": 429 + }, + { + "epoch": 0.10963793982661907, + "grad_norm": 0.5397586226463318, + "learning_rate": 1.9807544177550882e-05, + "loss": 0.548419713973999, + "step": 430 + }, + { + "epoch": 0.10989291177970423, + "grad_norm": 0.5493386387825012, + "learning_rate": 1.980589402782903e-05, + "loss": 0.5459563732147217, + "step": 431 + }, + { + "epoch": 0.11014788373278939, + "grad_norm": 0.5432290434837341, + "learning_rate": 1.980423690326539e-05, + "loss": 0.5504255294799805, + "step": 432 + }, + { + "epoch": 0.11040285568587456, + "grad_norm": 0.5319405198097229, + "learning_rate": 1.9802572805038655e-05, + "loss": 0.5571215152740479, + "step": 433 + }, + { + "epoch": 0.11065782763895972, + "grad_norm": 0.5671195387840271, + "learning_rate": 1.9800901734332488e-05, + "loss": 0.5390557050704956, + "step": 434 + }, + { + "epoch": 0.11091279959204488, + "grad_norm": 0.5227031707763672, + "learning_rate": 1.97992236923355e-05, + "loss": 0.5487812757492065, + "step": 435 + }, + { + "epoch": 0.11116777154513004, + "grad_norm": 0.6271248459815979, + "learning_rate": 1.979753868024127e-05, + "loss": 0.5339798331260681, + "step": 436 + }, + { + "epoch": 0.1114227434982152, + "grad_norm": 0.5489307045936584, + "learning_rate": 1.979584669924833e-05, + "loss": 0.5359088182449341, + "step": 437 + }, + { + "epoch": 0.11167771545130036, + "grad_norm": 0.5491774082183838, + "learning_rate": 1.9794147750560176e-05, + "loss": 0.5513111352920532, + "step": 438 + }, + { + "epoch": 0.11193268740438551, + "grad_norm": 0.5912401676177979, + "learning_rate": 1.9792441835385247e-05, + "loss": 0.5447284579277039, + "step": 439 + }, + { + "epoch": 0.11218765935747067, + "grad_norm": 0.5678877830505371, + "learning_rate": 1.979072895493695e-05, + "loss": 0.5309037566184998, + "step": 440 + }, + { + "epoch": 0.11244263131055583, + "grad_norm": 0.5953914523124695, + "learning_rate": 1.9789009110433636e-05, + "loss": 0.5362557172775269, + "step": 441 + }, + { + "epoch": 0.112697603263641, + "grad_norm": 0.5275906920433044, + "learning_rate": 1.9787282303098618e-05, + "loss": 0.5531718730926514, + "step": 442 + }, + { + "epoch": 0.11295257521672616, + "grad_norm": 0.5663650631904602, + "learning_rate": 1.978554853416016e-05, + "loss": 0.5560038089752197, + "step": 443 + }, + { + "epoch": 0.11320754716981132, + "grad_norm": 0.5504028797149658, + "learning_rate": 1.978380780485147e-05, + "loss": 0.5488899946212769, + "step": 444 + }, + { + "epoch": 0.11346251912289648, + "grad_norm": 0.5290588736534119, + "learning_rate": 1.9782060116410718e-05, + "loss": 0.5435810089111328, + "step": 445 + }, + { + "epoch": 0.11371749107598164, + "grad_norm": 0.5408740639686584, + "learning_rate": 1.9780305470081017e-05, + "loss": 0.5513544082641602, + "step": 446 + }, + { + "epoch": 0.1139724630290668, + "grad_norm": 0.5024471879005432, + "learning_rate": 1.9778543867110428e-05, + "loss": 0.5466337203979492, + "step": 447 + }, + { + "epoch": 0.11422743498215196, + "grad_norm": 0.539569616317749, + "learning_rate": 1.9776775308751964e-05, + "loss": 0.5356583595275879, + "step": 448 + }, + { + "epoch": 0.11448240693523712, + "grad_norm": 0.525956392288208, + "learning_rate": 1.9774999796263588e-05, + "loss": 0.5408869385719299, + "step": 449 + }, + { + "epoch": 0.11473737888832228, + "grad_norm": 0.5690717697143555, + "learning_rate": 1.9773217330908205e-05, + "loss": 0.5401495695114136, + "step": 450 + }, + { + "epoch": 0.11499235084140745, + "grad_norm": 0.5895713567733765, + "learning_rate": 1.9771427913953662e-05, + "loss": 0.5400240421295166, + "step": 451 + }, + { + "epoch": 0.11524732279449261, + "grad_norm": 0.6181904077529907, + "learning_rate": 1.9769631546672756e-05, + "loss": 0.5605360269546509, + "step": 452 + }, + { + "epoch": 0.11550229474757777, + "grad_norm": 0.5505849123001099, + "learning_rate": 1.9767828230343228e-05, + "loss": 0.5433098077774048, + "step": 453 + }, + { + "epoch": 0.11575726670066293, + "grad_norm": 0.6274793744087219, + "learning_rate": 1.9766017966247763e-05, + "loss": 0.5562180876731873, + "step": 454 + }, + { + "epoch": 0.11601223865374809, + "grad_norm": 0.5555331110954285, + "learning_rate": 1.9764200755673976e-05, + "loss": 0.5439101457595825, + "step": 455 + }, + { + "epoch": 0.11626721060683325, + "grad_norm": 0.5220648050308228, + "learning_rate": 1.976237659991444e-05, + "loss": 0.5579456090927124, + "step": 456 + }, + { + "epoch": 0.1165221825599184, + "grad_norm": 0.5761458277702332, + "learning_rate": 1.9760545500266656e-05, + "loss": 0.5493845343589783, + "step": 457 + }, + { + "epoch": 0.11677715451300356, + "grad_norm": 0.5373028516769409, + "learning_rate": 1.9758707458033073e-05, + "loss": 0.5442485809326172, + "step": 458 + }, + { + "epoch": 0.11703212646608872, + "grad_norm": 0.4915338456630707, + "learning_rate": 1.975686247452107e-05, + "loss": 0.5558599233627319, + "step": 459 + }, + { + "epoch": 0.1172870984191739, + "grad_norm": 0.5607727766036987, + "learning_rate": 1.975501055104297e-05, + "loss": 0.5386903882026672, + "step": 460 + }, + { + "epoch": 0.11754207037225906, + "grad_norm": 0.5363933444023132, + "learning_rate": 1.9753151688916025e-05, + "loss": 0.5452768802642822, + "step": 461 + }, + { + "epoch": 0.11779704232534421, + "grad_norm": 0.5088923573493958, + "learning_rate": 1.9751285889462424e-05, + "loss": 0.5445141196250916, + "step": 462 + }, + { + "epoch": 0.11805201427842937, + "grad_norm": 0.5186890959739685, + "learning_rate": 1.97494131540093e-05, + "loss": 0.5501380562782288, + "step": 463 + }, + { + "epoch": 0.11830698623151453, + "grad_norm": 0.5275077223777771, + "learning_rate": 1.9747533483888707e-05, + "loss": 0.5424678325653076, + "step": 464 + }, + { + "epoch": 0.11856195818459969, + "grad_norm": 0.5187524557113647, + "learning_rate": 1.9745646880437642e-05, + "loss": 0.5477076768875122, + "step": 465 + }, + { + "epoch": 0.11881693013768485, + "grad_norm": 0.46981313824653625, + "learning_rate": 1.9743753344998024e-05, + "loss": 0.5471561551094055, + "step": 466 + }, + { + "epoch": 0.11907190209077001, + "grad_norm": 0.62347012758255, + "learning_rate": 1.974185287891671e-05, + "loss": 0.5401801466941833, + "step": 467 + }, + { + "epoch": 0.11932687404385518, + "grad_norm": 0.5400267243385315, + "learning_rate": 1.973994548354548e-05, + "loss": 0.5439719557762146, + "step": 468 + }, + { + "epoch": 0.11958184599694034, + "grad_norm": 0.5315633416175842, + "learning_rate": 1.9738031160241054e-05, + "loss": 0.5322132110595703, + "step": 469 + }, + { + "epoch": 0.1198368179500255, + "grad_norm": 0.5405152440071106, + "learning_rate": 1.9736109910365066e-05, + "loss": 0.5460124015808105, + "step": 470 + }, + { + "epoch": 0.12009178990311066, + "grad_norm": 0.5464207530021667, + "learning_rate": 1.9734181735284083e-05, + "loss": 0.547633171081543, + "step": 471 + }, + { + "epoch": 0.12034676185619582, + "grad_norm": 0.49561744928359985, + "learning_rate": 1.9732246636369605e-05, + "loss": 0.5439903736114502, + "step": 472 + }, + { + "epoch": 0.12060173380928098, + "grad_norm": 0.526726245880127, + "learning_rate": 1.9730304614998043e-05, + "loss": 0.5577775239944458, + "step": 473 + }, + { + "epoch": 0.12085670576236614, + "grad_norm": 0.5296305418014526, + "learning_rate": 1.9728355672550743e-05, + "loss": 0.5440423488616943, + "step": 474 + }, + { + "epoch": 0.1211116777154513, + "grad_norm": 0.5025987029075623, + "learning_rate": 1.9726399810413967e-05, + "loss": 0.5496869683265686, + "step": 475 + }, + { + "epoch": 0.12136664966853646, + "grad_norm": 0.5133289694786072, + "learning_rate": 1.9724437029978903e-05, + "loss": 0.5514233112335205, + "step": 476 + }, + { + "epoch": 0.12162162162162163, + "grad_norm": 0.5568787455558777, + "learning_rate": 1.9722467332641656e-05, + "loss": 0.5393109321594238, + "step": 477 + }, + { + "epoch": 0.12187659357470679, + "grad_norm": 0.5228671431541443, + "learning_rate": 1.972049071980326e-05, + "loss": 0.525666356086731, + "step": 478 + }, + { + "epoch": 0.12213156552779195, + "grad_norm": 0.5343871712684631, + "learning_rate": 1.9718507192869656e-05, + "loss": 0.5391042828559875, + "step": 479 + }, + { + "epoch": 0.1223865374808771, + "grad_norm": 0.5969329476356506, + "learning_rate": 1.971651675325171e-05, + "loss": 0.561370313167572, + "step": 480 + }, + { + "epoch": 0.12264150943396226, + "grad_norm": 0.5148950219154358, + "learning_rate": 1.97145194023652e-05, + "loss": 0.5295721888542175, + "step": 481 + }, + { + "epoch": 0.12289648138704742, + "grad_norm": 0.5520386099815369, + "learning_rate": 1.971251514163083e-05, + "loss": 0.5467032790184021, + "step": 482 + }, + { + "epoch": 0.12315145334013258, + "grad_norm": 0.5354210138320923, + "learning_rate": 1.9710503972474205e-05, + "loss": 0.5428868532180786, + "step": 483 + }, + { + "epoch": 0.12340642529321774, + "grad_norm": 0.5027364492416382, + "learning_rate": 1.970848589632586e-05, + "loss": 0.5456262826919556, + "step": 484 + }, + { + "epoch": 0.1236613972463029, + "grad_norm": 0.5828258395195007, + "learning_rate": 1.9706460914621225e-05, + "loss": 0.5422083735466003, + "step": 485 + }, + { + "epoch": 0.12391636919938807, + "grad_norm": 0.5776222348213196, + "learning_rate": 1.970442902880065e-05, + "loss": 0.5387598276138306, + "step": 486 + }, + { + "epoch": 0.12417134115247323, + "grad_norm": 0.5405538082122803, + "learning_rate": 1.9702390240309402e-05, + "loss": 0.5427982807159424, + "step": 487 + }, + { + "epoch": 0.12442631310555839, + "grad_norm": 0.5617440938949585, + "learning_rate": 1.9700344550597656e-05, + "loss": 0.5464503765106201, + "step": 488 + }, + { + "epoch": 0.12468128505864355, + "grad_norm": 0.5526654124259949, + "learning_rate": 1.9698291961120485e-05, + "loss": 0.5374647378921509, + "step": 489 + }, + { + "epoch": 0.12493625701172871, + "grad_norm": 0.5415617227554321, + "learning_rate": 1.9696232473337878e-05, + "loss": 0.5434882044792175, + "step": 490 + }, + { + "epoch": 0.12519122896481388, + "grad_norm": 0.555722713470459, + "learning_rate": 1.9694166088714733e-05, + "loss": 0.5455443859100342, + "step": 491 + }, + { + "epoch": 0.12544620091789904, + "grad_norm": 0.5384701490402222, + "learning_rate": 1.9692092808720846e-05, + "loss": 0.5436517000198364, + "step": 492 + }, + { + "epoch": 0.1257011728709842, + "grad_norm": 0.5429685711860657, + "learning_rate": 1.9690012634830925e-05, + "loss": 0.5500330924987793, + "step": 493 + }, + { + "epoch": 0.12595614482406936, + "grad_norm": 0.547223687171936, + "learning_rate": 1.9687925568524577e-05, + "loss": 0.5479034781455994, + "step": 494 + }, + { + "epoch": 0.12621111677715452, + "grad_norm": 0.5522466897964478, + "learning_rate": 1.9685831611286312e-05, + "loss": 0.550029993057251, + "step": 495 + }, + { + "epoch": 0.12646608873023968, + "grad_norm": 0.5701102614402771, + "learning_rate": 1.9683730764605547e-05, + "loss": 0.5450952053070068, + "step": 496 + }, + { + "epoch": 0.12672106068332484, + "grad_norm": 0.5533367395401001, + "learning_rate": 1.968162302997659e-05, + "loss": 0.5455372333526611, + "step": 497 + }, + { + "epoch": 0.12697603263641, + "grad_norm": 0.5364789962768555, + "learning_rate": 1.9679508408898656e-05, + "loss": 0.5422064065933228, + "step": 498 + }, + { + "epoch": 0.12723100458949516, + "grad_norm": 0.5047568678855896, + "learning_rate": 1.967738690287585e-05, + "loss": 0.5391058921813965, + "step": 499 + }, + { + "epoch": 0.12748597654258031, + "grad_norm": 0.5302197933197021, + "learning_rate": 1.967525851341719e-05, + "loss": 0.5551446676254272, + "step": 500 + }, + { + "epoch": 0.12774094849566547, + "grad_norm": 0.4994960129261017, + "learning_rate": 1.967312324203657e-05, + "loss": 0.5429335832595825, + "step": 501 + }, + { + "epoch": 0.12799592044875063, + "grad_norm": 0.5566155314445496, + "learning_rate": 1.9670981090252794e-05, + "loss": 0.5487710237503052, + "step": 502 + }, + { + "epoch": 0.1282508924018358, + "grad_norm": 0.5283164381980896, + "learning_rate": 1.9668832059589552e-05, + "loss": 0.5358626842498779, + "step": 503 + }, + { + "epoch": 0.12850586435492095, + "grad_norm": 0.5359975099563599, + "learning_rate": 1.966667615157543e-05, + "loss": 0.5301843881607056, + "step": 504 + }, + { + "epoch": 0.1287608363080061, + "grad_norm": 0.48307645320892334, + "learning_rate": 1.9664513367743908e-05, + "loss": 0.5526771545410156, + "step": 505 + }, + { + "epoch": 0.12901580826109127, + "grad_norm": 0.5109233260154724, + "learning_rate": 1.9662343709633352e-05, + "loss": 0.5420863628387451, + "step": 506 + }, + { + "epoch": 0.12927078021417643, + "grad_norm": 0.4935479164123535, + "learning_rate": 1.966016717878702e-05, + "loss": 0.5470730066299438, + "step": 507 + }, + { + "epoch": 0.1295257521672616, + "grad_norm": 0.4998207986354828, + "learning_rate": 1.965798377675306e-05, + "loss": 0.5346579551696777, + "step": 508 + }, + { + "epoch": 0.12978072412034677, + "grad_norm": 0.5249969959259033, + "learning_rate": 1.9655793505084503e-05, + "loss": 0.5412853956222534, + "step": 509 + }, + { + "epoch": 0.13003569607343193, + "grad_norm": 0.5389821529388428, + "learning_rate": 1.965359636533927e-05, + "loss": 0.5390466451644897, + "step": 510 + }, + { + "epoch": 0.1302906680265171, + "grad_norm": 0.539438009262085, + "learning_rate": 1.9651392359080164e-05, + "loss": 0.5388988852500916, + "step": 511 + }, + { + "epoch": 0.13054563997960225, + "grad_norm": 0.483715295791626, + "learning_rate": 1.964918148787488e-05, + "loss": 0.5375275611877441, + "step": 512 + }, + { + "epoch": 0.1308006119326874, + "grad_norm": 0.5151199102401733, + "learning_rate": 1.9646963753295987e-05, + "loss": 0.5366615653038025, + "step": 513 + }, + { + "epoch": 0.13105558388577257, + "grad_norm": 0.5603130459785461, + "learning_rate": 1.964473915692094e-05, + "loss": 0.5489786863327026, + "step": 514 + }, + { + "epoch": 0.13131055583885773, + "grad_norm": 0.5356683731079102, + "learning_rate": 1.9642507700332068e-05, + "loss": 0.5374578833580017, + "step": 515 + }, + { + "epoch": 0.1315655277919429, + "grad_norm": 0.4916025996208191, + "learning_rate": 1.964026938511659e-05, + "loss": 0.5560331344604492, + "step": 516 + }, + { + "epoch": 0.13182049974502805, + "grad_norm": 0.5539834499359131, + "learning_rate": 1.9638024212866606e-05, + "loss": 0.5416618585586548, + "step": 517 + }, + { + "epoch": 0.1320754716981132, + "grad_norm": 0.5391684174537659, + "learning_rate": 1.9635772185179074e-05, + "loss": 0.5419684052467346, + "step": 518 + }, + { + "epoch": 0.13233044365119837, + "grad_norm": 0.5286181569099426, + "learning_rate": 1.963351330365585e-05, + "loss": 0.5357035398483276, + "step": 519 + }, + { + "epoch": 0.13258541560428352, + "grad_norm": 0.5388613343238831, + "learning_rate": 1.963124756990365e-05, + "loss": 0.5408596396446228, + "step": 520 + }, + { + "epoch": 0.13284038755736868, + "grad_norm": 0.5261210203170776, + "learning_rate": 1.962897498553407e-05, + "loss": 0.5369002819061279, + "step": 521 + }, + { + "epoch": 0.13309535951045384, + "grad_norm": 0.5403190851211548, + "learning_rate": 1.962669555216358e-05, + "loss": 0.5399401783943176, + "step": 522 + }, + { + "epoch": 0.133350331463539, + "grad_norm": 0.5058906078338623, + "learning_rate": 1.962440927141352e-05, + "loss": 0.5308097004890442, + "step": 523 + }, + { + "epoch": 0.13360530341662416, + "grad_norm": 0.5357410907745361, + "learning_rate": 1.9622116144910095e-05, + "loss": 0.5340980887413025, + "step": 524 + }, + { + "epoch": 0.13386027536970932, + "grad_norm": 0.5216020345687866, + "learning_rate": 1.9619816174284393e-05, + "loss": 0.5346090793609619, + "step": 525 + }, + { + "epoch": 0.1341152473227945, + "grad_norm": 0.5459076762199402, + "learning_rate": 1.9617509361172356e-05, + "loss": 0.53227698802948, + "step": 526 + }, + { + "epoch": 0.13437021927587967, + "grad_norm": 0.5702166557312012, + "learning_rate": 1.9615195707214805e-05, + "loss": 0.5388787984848022, + "step": 527 + }, + { + "epoch": 0.13462519122896482, + "grad_norm": 0.49151238799095154, + "learning_rate": 1.9612875214057414e-05, + "loss": 0.5381410121917725, + "step": 528 + }, + { + "epoch": 0.13488016318204998, + "grad_norm": 0.5661936402320862, + "learning_rate": 1.961054788335073e-05, + "loss": 0.55195152759552, + "step": 529 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 0.5514591932296753, + "learning_rate": 1.9608213716750164e-05, + "loss": 0.5301691293716431, + "step": 530 + }, + { + "epoch": 0.1353901070882203, + "grad_norm": 0.5307884216308594, + "learning_rate": 1.9605872715915986e-05, + "loss": 0.5460130572319031, + "step": 531 + }, + { + "epoch": 0.13564507904130546, + "grad_norm": 0.5439223647117615, + "learning_rate": 1.9603524882513326e-05, + "loss": 0.5416542887687683, + "step": 532 + }, + { + "epoch": 0.13590005099439062, + "grad_norm": 0.5474928021430969, + "learning_rate": 1.9601170218212183e-05, + "loss": 0.5408943891525269, + "step": 533 + }, + { + "epoch": 0.13615502294747578, + "grad_norm": 0.5338258743286133, + "learning_rate": 1.9598808724687398e-05, + "loss": 0.5379087924957275, + "step": 534 + }, + { + "epoch": 0.13640999490056094, + "grad_norm": 0.5032938122749329, + "learning_rate": 1.9596440403618688e-05, + "loss": 0.5256127715110779, + "step": 535 + }, + { + "epoch": 0.1366649668536461, + "grad_norm": 0.5296608805656433, + "learning_rate": 1.9594065256690614e-05, + "loss": 0.5323258638381958, + "step": 536 + }, + { + "epoch": 0.13691993880673126, + "grad_norm": 0.549721896648407, + "learning_rate": 1.9591683285592595e-05, + "loss": 0.5364140868186951, + "step": 537 + }, + { + "epoch": 0.13717491075981642, + "grad_norm": 0.5119733214378357, + "learning_rate": 1.9589294492018907e-05, + "loss": 0.5263822078704834, + "step": 538 + }, + { + "epoch": 0.13742988271290157, + "grad_norm": 0.5301274657249451, + "learning_rate": 1.9586898877668677e-05, + "loss": 0.5366970896720886, + "step": 539 + }, + { + "epoch": 0.13768485466598673, + "grad_norm": 0.5832563638687134, + "learning_rate": 1.9584496444245885e-05, + "loss": 0.5429117679595947, + "step": 540 + }, + { + "epoch": 0.1379398266190719, + "grad_norm": 0.5166734457015991, + "learning_rate": 1.9582087193459355e-05, + "loss": 0.5296197533607483, + "step": 541 + }, + { + "epoch": 0.13819479857215705, + "grad_norm": 0.5472302436828613, + "learning_rate": 1.957967112702277e-05, + "loss": 0.5465075969696045, + "step": 542 + }, + { + "epoch": 0.1384497705252422, + "grad_norm": 0.5155672430992126, + "learning_rate": 1.9577248246654653e-05, + "loss": 0.5279982089996338, + "step": 543 + }, + { + "epoch": 0.1387047424783274, + "grad_norm": 0.5144238471984863, + "learning_rate": 1.9574818554078373e-05, + "loss": 0.5338395833969116, + "step": 544 + }, + { + "epoch": 0.13895971443141256, + "grad_norm": 0.5261866450309753, + "learning_rate": 1.9572382051022154e-05, + "loss": 0.5367433428764343, + "step": 545 + }, + { + "epoch": 0.13921468638449772, + "grad_norm": 0.5071731209754944, + "learning_rate": 1.9569938739219054e-05, + "loss": 0.5295122265815735, + "step": 546 + }, + { + "epoch": 0.13946965833758287, + "grad_norm": 0.5251842737197876, + "learning_rate": 1.9567488620406984e-05, + "loss": 0.5354647636413574, + "step": 547 + }, + { + "epoch": 0.13972463029066803, + "grad_norm": 0.5434141159057617, + "learning_rate": 1.956503169632868e-05, + "loss": 0.5488410592079163, + "step": 548 + }, + { + "epoch": 0.1399796022437532, + "grad_norm": 0.5540900826454163, + "learning_rate": 1.9562567968731742e-05, + "loss": 0.5251233577728271, + "step": 549 + }, + { + "epoch": 0.14023457419683835, + "grad_norm": 0.4901406466960907, + "learning_rate": 1.9560097439368585e-05, + "loss": 0.5400822758674622, + "step": 550 + }, + { + "epoch": 0.1404895461499235, + "grad_norm": 0.5357276797294617, + "learning_rate": 1.9557620109996483e-05, + "loss": 0.528943657875061, + "step": 551 + }, + { + "epoch": 0.14074451810300867, + "grad_norm": 0.5773533582687378, + "learning_rate": 1.955513598237753e-05, + "loss": 0.5278090238571167, + "step": 552 + }, + { + "epoch": 0.14099949005609383, + "grad_norm": 0.4954776167869568, + "learning_rate": 1.9552645058278668e-05, + "loss": 0.5397365093231201, + "step": 553 + }, + { + "epoch": 0.141254462009179, + "grad_norm": 0.5578538775444031, + "learning_rate": 1.955014733947167e-05, + "loss": 0.5337741374969482, + "step": 554 + }, + { + "epoch": 0.14150943396226415, + "grad_norm": 0.5055522918701172, + "learning_rate": 1.9547642827733136e-05, + "loss": 0.5458861589431763, + "step": 555 + }, + { + "epoch": 0.1417644059153493, + "grad_norm": 0.4946596324443817, + "learning_rate": 1.9545131524844506e-05, + "loss": 0.5277013778686523, + "step": 556 + }, + { + "epoch": 0.14201937786843447, + "grad_norm": 0.5347071290016174, + "learning_rate": 1.954261343259204e-05, + "loss": 0.5519477128982544, + "step": 557 + }, + { + "epoch": 0.14227434982151962, + "grad_norm": 0.5242138504981995, + "learning_rate": 1.9540088552766837e-05, + "loss": 0.5345370173454285, + "step": 558 + }, + { + "epoch": 0.14252932177460478, + "grad_norm": 0.5394492149353027, + "learning_rate": 1.9537556887164823e-05, + "loss": 0.5402896404266357, + "step": 559 + }, + { + "epoch": 0.14278429372768994, + "grad_norm": 0.5234394073486328, + "learning_rate": 1.953501843758675e-05, + "loss": 0.5392587184906006, + "step": 560 + }, + { + "epoch": 0.1430392656807751, + "grad_norm": 0.511301577091217, + "learning_rate": 1.9532473205838184e-05, + "loss": 0.543667197227478, + "step": 561 + }, + { + "epoch": 0.1432942376338603, + "grad_norm": 0.5079246759414673, + "learning_rate": 1.9529921193729535e-05, + "loss": 0.5325132608413696, + "step": 562 + }, + { + "epoch": 0.14354920958694545, + "grad_norm": 0.5344441533088684, + "learning_rate": 1.9527362403076022e-05, + "loss": 0.5426052808761597, + "step": 563 + }, + { + "epoch": 0.1438041815400306, + "grad_norm": 0.5662505626678467, + "learning_rate": 1.9524796835697687e-05, + "loss": 0.5501530766487122, + "step": 564 + }, + { + "epoch": 0.14405915349311577, + "grad_norm": 0.479183554649353, + "learning_rate": 1.9522224493419396e-05, + "loss": 0.5392322540283203, + "step": 565 + }, + { + "epoch": 0.14431412544620092, + "grad_norm": 0.5352718234062195, + "learning_rate": 1.9519645378070833e-05, + "loss": 0.5259209871292114, + "step": 566 + }, + { + "epoch": 0.14456909739928608, + "grad_norm": 0.49504008889198303, + "learning_rate": 1.95170594914865e-05, + "loss": 0.5283001065254211, + "step": 567 + }, + { + "epoch": 0.14482406935237124, + "grad_norm": 0.5184426307678223, + "learning_rate": 1.9514466835505714e-05, + "loss": 0.5301646590232849, + "step": 568 + }, + { + "epoch": 0.1450790413054564, + "grad_norm": 0.550871729850769, + "learning_rate": 1.9511867411972606e-05, + "loss": 0.5460808873176575, + "step": 569 + }, + { + "epoch": 0.14533401325854156, + "grad_norm": 0.49909305572509766, + "learning_rate": 1.950926122273612e-05, + "loss": 0.5394014120101929, + "step": 570 + }, + { + "epoch": 0.14558898521162672, + "grad_norm": 0.5513647794723511, + "learning_rate": 1.9506648269650017e-05, + "loss": 0.5427008867263794, + "step": 571 + }, + { + "epoch": 0.14584395716471188, + "grad_norm": 0.48180994391441345, + "learning_rate": 1.9504028554572865e-05, + "loss": 0.5257720351219177, + "step": 572 + }, + { + "epoch": 0.14609892911779704, + "grad_norm": 0.6737922430038452, + "learning_rate": 1.9501402079368042e-05, + "loss": 0.5394344329833984, + "step": 573 + }, + { + "epoch": 0.1463539010708822, + "grad_norm": 0.48611074686050415, + "learning_rate": 1.949876884590374e-05, + "loss": 0.5424251556396484, + "step": 574 + }, + { + "epoch": 0.14660887302396736, + "grad_norm": 0.5731044411659241, + "learning_rate": 1.949612885605295e-05, + "loss": 0.5467298030853271, + "step": 575 + }, + { + "epoch": 0.14686384497705252, + "grad_norm": 0.4976442754268646, + "learning_rate": 1.9493482111693474e-05, + "loss": 0.5349081754684448, + "step": 576 + }, + { + "epoch": 0.14711881693013767, + "grad_norm": 0.49011242389678955, + "learning_rate": 1.9490828614707917e-05, + "loss": 0.5175978541374207, + "step": 577 + }, + { + "epoch": 0.14737378888322283, + "grad_norm": 0.5349713563919067, + "learning_rate": 1.9488168366983685e-05, + "loss": 0.5363706350326538, + "step": 578 + }, + { + "epoch": 0.147628760836308, + "grad_norm": 0.5079620480537415, + "learning_rate": 1.9485501370412988e-05, + "loss": 0.5362746119499207, + "step": 579 + }, + { + "epoch": 0.14788373278939318, + "grad_norm": 0.5510601997375488, + "learning_rate": 1.9482827626892838e-05, + "loss": 0.5300568342208862, + "step": 580 + }, + { + "epoch": 0.14813870474247834, + "grad_norm": 0.48822200298309326, + "learning_rate": 1.948014713832504e-05, + "loss": 0.5431665778160095, + "step": 581 + }, + { + "epoch": 0.1483936766955635, + "grad_norm": 0.5097545385360718, + "learning_rate": 1.947745990661621e-05, + "loss": 0.5365676879882812, + "step": 582 + }, + { + "epoch": 0.14864864864864866, + "grad_norm": 0.524516761302948, + "learning_rate": 1.9474765933677736e-05, + "loss": 0.5253903865814209, + "step": 583 + }, + { + "epoch": 0.14890362060173382, + "grad_norm": 0.5344324707984924, + "learning_rate": 1.9472065221425827e-05, + "loss": 0.5223650932312012, + "step": 584 + }, + { + "epoch": 0.14915859255481898, + "grad_norm": 0.5210974812507629, + "learning_rate": 1.9469357771781476e-05, + "loss": 0.5488340854644775, + "step": 585 + }, + { + "epoch": 0.14941356450790413, + "grad_norm": 0.5254884958267212, + "learning_rate": 1.946664358667046e-05, + "loss": 0.5375260710716248, + "step": 586 + }, + { + "epoch": 0.1496685364609893, + "grad_norm": 0.5536079406738281, + "learning_rate": 1.9463922668023358e-05, + "loss": 0.5502488017082214, + "step": 587 + }, + { + "epoch": 0.14992350841407445, + "grad_norm": 0.5063897967338562, + "learning_rate": 1.9461195017775534e-05, + "loss": 0.5332539081573486, + "step": 588 + }, + { + "epoch": 0.1501784803671596, + "grad_norm": 0.5346553921699524, + "learning_rate": 1.9458460637867137e-05, + "loss": 0.5380452871322632, + "step": 589 + }, + { + "epoch": 0.15043345232024477, + "grad_norm": 0.573293149471283, + "learning_rate": 1.945571953024311e-05, + "loss": 0.5183770656585693, + "step": 590 + }, + { + "epoch": 0.15068842427332993, + "grad_norm": 0.5627990365028381, + "learning_rate": 1.9452971696853175e-05, + "loss": 0.5426141619682312, + "step": 591 + }, + { + "epoch": 0.1509433962264151, + "grad_norm": 0.49688220024108887, + "learning_rate": 1.9450217139651845e-05, + "loss": 0.5325013399124146, + "step": 592 + }, + { + "epoch": 0.15119836817950025, + "grad_norm": 0.5401874780654907, + "learning_rate": 1.9447455860598407e-05, + "loss": 0.5306686758995056, + "step": 593 + }, + { + "epoch": 0.1514533401325854, + "grad_norm": 0.49251788854599, + "learning_rate": 1.9444687861656932e-05, + "loss": 0.5364015102386475, + "step": 594 + }, + { + "epoch": 0.15170831208567057, + "grad_norm": 0.4938039183616638, + "learning_rate": 1.9441913144796274e-05, + "loss": 0.5307292342185974, + "step": 595 + }, + { + "epoch": 0.15196328403875572, + "grad_norm": 0.5310829281806946, + "learning_rate": 1.9439131711990064e-05, + "loss": 0.524756669998169, + "step": 596 + }, + { + "epoch": 0.15221825599184088, + "grad_norm": 0.5013731718063354, + "learning_rate": 1.943634356521671e-05, + "loss": 0.5259018540382385, + "step": 597 + }, + { + "epoch": 0.15247322794492607, + "grad_norm": 0.49433621764183044, + "learning_rate": 1.9433548706459394e-05, + "loss": 0.540558934211731, + "step": 598 + }, + { + "epoch": 0.15272819989801123, + "grad_norm": 0.47782808542251587, + "learning_rate": 1.9430747137706075e-05, + "loss": 0.5381788015365601, + "step": 599 + }, + { + "epoch": 0.1529831718510964, + "grad_norm": 0.4966195821762085, + "learning_rate": 1.942793886094948e-05, + "loss": 0.5175549983978271, + "step": 600 + }, + { + "epoch": 0.15323814380418155, + "grad_norm": 0.5325585007667542, + "learning_rate": 1.942512387818711e-05, + "loss": 0.5350815653800964, + "step": 601 + }, + { + "epoch": 0.1534931157572667, + "grad_norm": 0.4902491867542267, + "learning_rate": 1.942230219142124e-05, + "loss": 0.5346603393554688, + "step": 602 + }, + { + "epoch": 0.15374808771035187, + "grad_norm": 0.5321222543716431, + "learning_rate": 1.941947380265891e-05, + "loss": 0.5547575950622559, + "step": 603 + }, + { + "epoch": 0.15400305966343703, + "grad_norm": 0.4908081591129303, + "learning_rate": 1.9416638713911918e-05, + "loss": 0.5460398197174072, + "step": 604 + }, + { + "epoch": 0.15425803161652218, + "grad_norm": 0.5266767144203186, + "learning_rate": 1.9413796927196843e-05, + "loss": 0.5413973331451416, + "step": 605 + }, + { + "epoch": 0.15451300356960734, + "grad_norm": 0.5171524882316589, + "learning_rate": 1.9410948444535026e-05, + "loss": 0.5335320234298706, + "step": 606 + }, + { + "epoch": 0.1547679755226925, + "grad_norm": 0.5076289176940918, + "learning_rate": 1.940809326795256e-05, + "loss": 0.5222631692886353, + "step": 607 + }, + { + "epoch": 0.15502294747577766, + "grad_norm": 0.5298922061920166, + "learning_rate": 1.9405231399480308e-05, + "loss": 0.5390610694885254, + "step": 608 + }, + { + "epoch": 0.15527791942886282, + "grad_norm": 0.4918667674064636, + "learning_rate": 1.9402362841153887e-05, + "loss": 0.5259802937507629, + "step": 609 + }, + { + "epoch": 0.15553289138194798, + "grad_norm": 0.49876874685287476, + "learning_rate": 1.9399487595013683e-05, + "loss": 0.5446267127990723, + "step": 610 + }, + { + "epoch": 0.15578786333503314, + "grad_norm": 0.5575383901596069, + "learning_rate": 1.9396605663104825e-05, + "loss": 0.5347251296043396, + "step": 611 + }, + { + "epoch": 0.1560428352881183, + "grad_norm": 0.47660186886787415, + "learning_rate": 1.9393717047477213e-05, + "loss": 0.5166308879852295, + "step": 612 + }, + { + "epoch": 0.15629780724120346, + "grad_norm": 0.5122316479682922, + "learning_rate": 1.9390821750185482e-05, + "loss": 0.5235748291015625, + "step": 613 + }, + { + "epoch": 0.15655277919428862, + "grad_norm": 0.5255897045135498, + "learning_rate": 1.9387919773289036e-05, + "loss": 0.531277060508728, + "step": 614 + }, + { + "epoch": 0.15680775114737378, + "grad_norm": 0.5205806493759155, + "learning_rate": 1.9385011118852023e-05, + "loss": 0.5403878688812256, + "step": 615 + }, + { + "epoch": 0.15706272310045896, + "grad_norm": 0.52583247423172, + "learning_rate": 1.9382095788943345e-05, + "loss": 0.5407297015190125, + "step": 616 + }, + { + "epoch": 0.15731769505354412, + "grad_norm": 0.49851149320602417, + "learning_rate": 1.9379173785636647e-05, + "loss": 0.5462212562561035, + "step": 617 + }, + { + "epoch": 0.15757266700662928, + "grad_norm": 0.5226295590400696, + "learning_rate": 1.9376245111010324e-05, + "loss": 0.5349191427230835, + "step": 618 + }, + { + "epoch": 0.15782763895971444, + "grad_norm": 0.4985732436180115, + "learning_rate": 1.937330976714751e-05, + "loss": 0.5181032419204712, + "step": 619 + }, + { + "epoch": 0.1580826109127996, + "grad_norm": 0.5089516043663025, + "learning_rate": 1.9370367756136093e-05, + "loss": 0.5246825218200684, + "step": 620 + }, + { + "epoch": 0.15833758286588476, + "grad_norm": 0.5574972629547119, + "learning_rate": 1.93674190800687e-05, + "loss": 0.5245094299316406, + "step": 621 + }, + { + "epoch": 0.15859255481896992, + "grad_norm": 0.5276218056678772, + "learning_rate": 1.9364463741042694e-05, + "loss": 0.542697012424469, + "step": 622 + }, + { + "epoch": 0.15884752677205508, + "grad_norm": 0.5665618777275085, + "learning_rate": 1.936150174116018e-05, + "loss": 0.5375609397888184, + "step": 623 + }, + { + "epoch": 0.15910249872514023, + "grad_norm": 0.5423240661621094, + "learning_rate": 1.9358533082528e-05, + "loss": 0.5459458827972412, + "step": 624 + }, + { + "epoch": 0.1593574706782254, + "grad_norm": 0.5253497958183289, + "learning_rate": 1.9355557767257738e-05, + "loss": 0.5167371034622192, + "step": 625 + }, + { + "epoch": 0.15961244263131055, + "grad_norm": 0.5296688675880432, + "learning_rate": 1.9352575797465704e-05, + "loss": 0.5493143200874329, + "step": 626 + }, + { + "epoch": 0.1598674145843957, + "grad_norm": 0.7724140882492065, + "learning_rate": 1.934958717527295e-05, + "loss": 0.5376444458961487, + "step": 627 + }, + { + "epoch": 0.16012238653748087, + "grad_norm": 0.5141907334327698, + "learning_rate": 1.9346591902805246e-05, + "loss": 0.5336338877677917, + "step": 628 + }, + { + "epoch": 0.16037735849056603, + "grad_norm": 0.49341434240341187, + "learning_rate": 1.9343589982193113e-05, + "loss": 0.529479444026947, + "step": 629 + }, + { + "epoch": 0.1606323304436512, + "grad_norm": 0.5087915062904358, + "learning_rate": 1.9340581415571783e-05, + "loss": 0.5335008502006531, + "step": 630 + }, + { + "epoch": 0.16088730239673635, + "grad_norm": 0.5580664873123169, + "learning_rate": 1.9337566205081225e-05, + "loss": 0.530624508857727, + "step": 631 + }, + { + "epoch": 0.1611422743498215, + "grad_norm": 0.49169811606407166, + "learning_rate": 1.9334544352866126e-05, + "loss": 0.5459813475608826, + "step": 632 + }, + { + "epoch": 0.1613972463029067, + "grad_norm": 0.49668213725090027, + "learning_rate": 1.9331515861075907e-05, + "loss": 0.5162781476974487, + "step": 633 + }, + { + "epoch": 0.16165221825599185, + "grad_norm": 0.5077573657035828, + "learning_rate": 1.9328480731864698e-05, + "loss": 0.5343323349952698, + "step": 634 + }, + { + "epoch": 0.161907190209077, + "grad_norm": 0.5330377817153931, + "learning_rate": 1.932543896739137e-05, + "loss": 0.5407090187072754, + "step": 635 + }, + { + "epoch": 0.16216216216216217, + "grad_norm": 0.5178587436676025, + "learning_rate": 1.932239056981949e-05, + "loss": 0.5372127890586853, + "step": 636 + }, + { + "epoch": 0.16241713411524733, + "grad_norm": 0.4959794878959656, + "learning_rate": 1.9319335541317363e-05, + "loss": 0.5220591425895691, + "step": 637 + }, + { + "epoch": 0.1626721060683325, + "grad_norm": 0.4854819178581238, + "learning_rate": 1.9316273884058e-05, + "loss": 0.544111430644989, + "step": 638 + }, + { + "epoch": 0.16292707802141765, + "grad_norm": 0.49497857689857483, + "learning_rate": 1.931320560021913e-05, + "loss": 0.5263684988021851, + "step": 639 + }, + { + "epoch": 0.1631820499745028, + "grad_norm": 0.5102571249008179, + "learning_rate": 1.9310130691983192e-05, + "loss": 0.5366497039794922, + "step": 640 + }, + { + "epoch": 0.16343702192758797, + "grad_norm": 0.5193189978599548, + "learning_rate": 1.9307049161537346e-05, + "loss": 0.5210189819335938, + "step": 641 + }, + { + "epoch": 0.16369199388067313, + "grad_norm": 0.5115967988967896, + "learning_rate": 1.9303961011073448e-05, + "loss": 0.532012403011322, + "step": 642 + }, + { + "epoch": 0.16394696583375828, + "grad_norm": 0.5676260590553284, + "learning_rate": 1.930086624278808e-05, + "loss": 0.523317813873291, + "step": 643 + }, + { + "epoch": 0.16420193778684344, + "grad_norm": 0.5026125907897949, + "learning_rate": 1.9297764858882516e-05, + "loss": 0.539628267288208, + "step": 644 + }, + { + "epoch": 0.1644569097399286, + "grad_norm": 0.520626962184906, + "learning_rate": 1.929465686156274e-05, + "loss": 0.5272216200828552, + "step": 645 + }, + { + "epoch": 0.16471188169301376, + "grad_norm": 0.4822489619255066, + "learning_rate": 1.929154225303945e-05, + "loss": 0.5315180420875549, + "step": 646 + }, + { + "epoch": 0.16496685364609892, + "grad_norm": 0.5027172565460205, + "learning_rate": 1.928842103552803e-05, + "loss": 0.5313969254493713, + "step": 647 + }, + { + "epoch": 0.16522182559918408, + "grad_norm": 0.5247225761413574, + "learning_rate": 1.9285293211248578e-05, + "loss": 0.5298991203308105, + "step": 648 + }, + { + "epoch": 0.16547679755226924, + "grad_norm": 0.5090324878692627, + "learning_rate": 1.9282158782425884e-05, + "loss": 0.5201632976531982, + "step": 649 + }, + { + "epoch": 0.1657317695053544, + "grad_norm": 0.5293415784835815, + "learning_rate": 1.9279017751289438e-05, + "loss": 0.5382990837097168, + "step": 650 + }, + { + "epoch": 0.16598674145843959, + "grad_norm": 0.5205861330032349, + "learning_rate": 1.9275870120073426e-05, + "loss": 0.5317997932434082, + "step": 651 + }, + { + "epoch": 0.16624171341152474, + "grad_norm": 0.5162908434867859, + "learning_rate": 1.9272715891016736e-05, + "loss": 0.5323415994644165, + "step": 652 + }, + { + "epoch": 0.1664966853646099, + "grad_norm": 0.5015779733657837, + "learning_rate": 1.926955506636293e-05, + "loss": 0.5262338519096375, + "step": 653 + }, + { + "epoch": 0.16675165731769506, + "grad_norm": 0.49459603428840637, + "learning_rate": 1.9266387648360285e-05, + "loss": 0.5312216281890869, + "step": 654 + }, + { + "epoch": 0.16700662927078022, + "grad_norm": 0.5384743213653564, + "learning_rate": 1.926321363926175e-05, + "loss": 0.5280896425247192, + "step": 655 + }, + { + "epoch": 0.16726160122386538, + "grad_norm": 0.49762842059135437, + "learning_rate": 1.9260033041324964e-05, + "loss": 0.5348333120346069, + "step": 656 + }, + { + "epoch": 0.16751657317695054, + "grad_norm": 0.5026784539222717, + "learning_rate": 1.9256845856812266e-05, + "loss": 0.5293280482292175, + "step": 657 + }, + { + "epoch": 0.1677715451300357, + "grad_norm": 0.5365166068077087, + "learning_rate": 1.9253652087990667e-05, + "loss": 0.5373440980911255, + "step": 658 + }, + { + "epoch": 0.16802651708312086, + "grad_norm": 0.5215746164321899, + "learning_rate": 1.9250451737131866e-05, + "loss": 0.536878228187561, + "step": 659 + }, + { + "epoch": 0.16828148903620602, + "grad_norm": 0.4788154363632202, + "learning_rate": 1.9247244806512244e-05, + "loss": 0.5261363983154297, + "step": 660 + }, + { + "epoch": 0.16853646098929118, + "grad_norm": 0.4960463345050812, + "learning_rate": 1.9244031298412857e-05, + "loss": 0.5358573198318481, + "step": 661 + }, + { + "epoch": 0.16879143294237633, + "grad_norm": 0.5835654735565186, + "learning_rate": 1.9240811215119447e-05, + "loss": 0.5243600606918335, + "step": 662 + }, + { + "epoch": 0.1690464048954615, + "grad_norm": 0.4894512891769409, + "learning_rate": 1.9237584558922425e-05, + "loss": 0.5246574878692627, + "step": 663 + }, + { + "epoch": 0.16930137684854665, + "grad_norm": 0.519819974899292, + "learning_rate": 1.9234351332116888e-05, + "loss": 0.52531898021698, + "step": 664 + }, + { + "epoch": 0.1695563488016318, + "grad_norm": 0.46499520540237427, + "learning_rate": 1.92311115370026e-05, + "loss": 0.5293796062469482, + "step": 665 + }, + { + "epoch": 0.16981132075471697, + "grad_norm": 0.484613835811615, + "learning_rate": 1.9227865175883993e-05, + "loss": 0.5152720212936401, + "step": 666 + }, + { + "epoch": 0.17006629270780213, + "grad_norm": 0.49043798446655273, + "learning_rate": 1.9224612251070176e-05, + "loss": 0.5403691530227661, + "step": 667 + }, + { + "epoch": 0.1703212646608873, + "grad_norm": 0.5324785113334656, + "learning_rate": 1.9221352764874923e-05, + "loss": 0.5379540920257568, + "step": 668 + }, + { + "epoch": 0.17057623661397248, + "grad_norm": 0.4996960163116455, + "learning_rate": 1.9218086719616678e-05, + "loss": 0.5312554240226746, + "step": 669 + }, + { + "epoch": 0.17083120856705764, + "grad_norm": 0.46016421914100647, + "learning_rate": 1.9214814117618547e-05, + "loss": 0.5201725363731384, + "step": 670 + }, + { + "epoch": 0.1710861805201428, + "grad_norm": 0.499889999628067, + "learning_rate": 1.9211534961208303e-05, + "loss": 0.5231727957725525, + "step": 671 + }, + { + "epoch": 0.17134115247322795, + "grad_norm": 0.6887993216514587, + "learning_rate": 1.920824925271838e-05, + "loss": 0.5276427268981934, + "step": 672 + }, + { + "epoch": 0.1715961244263131, + "grad_norm": 0.5240216851234436, + "learning_rate": 1.9204956994485872e-05, + "loss": 0.5339590311050415, + "step": 673 + }, + { + "epoch": 0.17185109637939827, + "grad_norm": 0.5527732372283936, + "learning_rate": 1.920165818885253e-05, + "loss": 0.5293232798576355, + "step": 674 + }, + { + "epoch": 0.17210606833248343, + "grad_norm": 0.5374642610549927, + "learning_rate": 1.9198352838164767e-05, + "loss": 0.539370059967041, + "step": 675 + }, + { + "epoch": 0.1723610402855686, + "grad_norm": 0.658922553062439, + "learning_rate": 1.9195040944773645e-05, + "loss": 0.5308823585510254, + "step": 676 + }, + { + "epoch": 0.17261601223865375, + "grad_norm": 0.5424158573150635, + "learning_rate": 1.9191722511034884e-05, + "loss": 0.5404328107833862, + "step": 677 + }, + { + "epoch": 0.1728709841917389, + "grad_norm": 0.5352630019187927, + "learning_rate": 1.918839753930886e-05, + "loss": 0.5331385135650635, + "step": 678 + }, + { + "epoch": 0.17312595614482407, + "grad_norm": 0.5205698013305664, + "learning_rate": 1.9185066031960587e-05, + "loss": 0.5314624309539795, + "step": 679 + }, + { + "epoch": 0.17338092809790923, + "grad_norm": 0.5151124596595764, + "learning_rate": 1.918172799135974e-05, + "loss": 0.5420811176300049, + "step": 680 + }, + { + "epoch": 0.17363590005099439, + "grad_norm": 0.522510290145874, + "learning_rate": 1.9178383419880635e-05, + "loss": 0.5371588468551636, + "step": 681 + }, + { + "epoch": 0.17389087200407954, + "grad_norm": 0.5063507556915283, + "learning_rate": 1.9175032319902236e-05, + "loss": 0.5372850298881531, + "step": 682 + }, + { + "epoch": 0.1741458439571647, + "grad_norm": 0.5166487693786621, + "learning_rate": 1.917167469380815e-05, + "loss": 0.5451241731643677, + "step": 683 + }, + { + "epoch": 0.17440081591024986, + "grad_norm": 0.5515632033348083, + "learning_rate": 1.916831054398662e-05, + "loss": 0.5475760698318481, + "step": 684 + }, + { + "epoch": 0.17465578786333502, + "grad_norm": 0.5165284872055054, + "learning_rate": 1.9164939872830542e-05, + "loss": 0.5366044044494629, + "step": 685 + }, + { + "epoch": 0.17491075981642018, + "grad_norm": 0.5325884222984314, + "learning_rate": 1.916156268273744e-05, + "loss": 0.5388437509536743, + "step": 686 + }, + { + "epoch": 0.17516573176950537, + "grad_norm": 0.5325320959091187, + "learning_rate": 1.9158178976109475e-05, + "loss": 0.5334592461585999, + "step": 687 + }, + { + "epoch": 0.17542070372259053, + "grad_norm": 0.5373082160949707, + "learning_rate": 1.9154788755353454e-05, + "loss": 0.5266925692558289, + "step": 688 + }, + { + "epoch": 0.17567567567567569, + "grad_norm": 0.5505048036575317, + "learning_rate": 1.9151392022880802e-05, + "loss": 0.5321172475814819, + "step": 689 + }, + { + "epoch": 0.17593064762876084, + "grad_norm": 0.5130444169044495, + "learning_rate": 1.9147988781107588e-05, + "loss": 0.5240427851676941, + "step": 690 + }, + { + "epoch": 0.176185619581846, + "grad_norm": 0.5584560036659241, + "learning_rate": 1.9144579032454503e-05, + "loss": 0.5241004228591919, + "step": 691 + }, + { + "epoch": 0.17644059153493116, + "grad_norm": 0.46925339102745056, + "learning_rate": 1.9141162779346875e-05, + "loss": 0.5358484983444214, + "step": 692 + }, + { + "epoch": 0.17669556348801632, + "grad_norm": 0.5069420337677002, + "learning_rate": 1.9137740024214647e-05, + "loss": 0.518207311630249, + "step": 693 + }, + { + "epoch": 0.17695053544110148, + "grad_norm": 0.5575243234634399, + "learning_rate": 1.91343107694924e-05, + "loss": 0.5154577493667603, + "step": 694 + }, + { + "epoch": 0.17720550739418664, + "grad_norm": 0.510741651058197, + "learning_rate": 1.913087501761932e-05, + "loss": 0.5157490968704224, + "step": 695 + }, + { + "epoch": 0.1774604793472718, + "grad_norm": 0.5032539367675781, + "learning_rate": 1.912743277103924e-05, + "loss": 0.5181795954704285, + "step": 696 + }, + { + "epoch": 0.17771545130035696, + "grad_norm": 0.49168163537979126, + "learning_rate": 1.9123984032200586e-05, + "loss": 0.5217337608337402, + "step": 697 + }, + { + "epoch": 0.17797042325344212, + "grad_norm": 0.5148937702178955, + "learning_rate": 1.912052880355642e-05, + "loss": 0.5191341638565063, + "step": 698 + }, + { + "epoch": 0.17822539520652728, + "grad_norm": 0.5000565052032471, + "learning_rate": 1.9117067087564413e-05, + "loss": 0.5333778262138367, + "step": 699 + }, + { + "epoch": 0.17848036715961244, + "grad_norm": 0.5110312700271606, + "learning_rate": 1.9113598886686856e-05, + "loss": 0.5120112895965576, + "step": 700 + }, + { + "epoch": 0.1787353391126976, + "grad_norm": 0.5078974962234497, + "learning_rate": 1.911012420339064e-05, + "loss": 0.5289940237998962, + "step": 701 + }, + { + "epoch": 0.17899031106578275, + "grad_norm": 0.5486764311790466, + "learning_rate": 1.910664304014728e-05, + "loss": 0.5259402990341187, + "step": 702 + }, + { + "epoch": 0.1792452830188679, + "grad_norm": 0.5564773678779602, + "learning_rate": 1.910315539943289e-05, + "loss": 0.5347332954406738, + "step": 703 + }, + { + "epoch": 0.17950025497195307, + "grad_norm": 0.6226455569267273, + "learning_rate": 1.9099661283728207e-05, + "loss": 0.5321149826049805, + "step": 704 + }, + { + "epoch": 0.17975522692503826, + "grad_norm": 0.5020618438720703, + "learning_rate": 1.9096160695518552e-05, + "loss": 0.5225280523300171, + "step": 705 + }, + { + "epoch": 0.18001019887812342, + "grad_norm": 0.5472486019134521, + "learning_rate": 1.9092653637293873e-05, + "loss": 0.5333778858184814, + "step": 706 + }, + { + "epoch": 0.18026517083120858, + "grad_norm": 0.5323438048362732, + "learning_rate": 1.9089140111548695e-05, + "loss": 0.5389878153800964, + "step": 707 + }, + { + "epoch": 0.18052014278429374, + "grad_norm": 0.5173847079277039, + "learning_rate": 1.9085620120782165e-05, + "loss": 0.5251544713973999, + "step": 708 + }, + { + "epoch": 0.1807751147373789, + "grad_norm": 0.53608238697052, + "learning_rate": 1.9082093667498018e-05, + "loss": 0.5316119194030762, + "step": 709 + }, + { + "epoch": 0.18103008669046405, + "grad_norm": 0.5072662234306335, + "learning_rate": 1.907856075420459e-05, + "loss": 0.5360819101333618, + "step": 710 + }, + { + "epoch": 0.1812850586435492, + "grad_norm": 0.5388319492340088, + "learning_rate": 1.9075021383414804e-05, + "loss": 0.5342385172843933, + "step": 711 + }, + { + "epoch": 0.18154003059663437, + "grad_norm": 1.0042855739593506, + "learning_rate": 1.907147555764618e-05, + "loss": 0.5308269262313843, + "step": 712 + }, + { + "epoch": 0.18179500254971953, + "grad_norm": 0.5531407594680786, + "learning_rate": 1.9067923279420842e-05, + "loss": 0.5368529558181763, + "step": 713 + }, + { + "epoch": 0.1820499745028047, + "grad_norm": 0.5203389525413513, + "learning_rate": 1.9064364551265483e-05, + "loss": 0.5198056101799011, + "step": 714 + }, + { + "epoch": 0.18230494645588985, + "grad_norm": 0.5286828279495239, + "learning_rate": 1.9060799375711397e-05, + "loss": 0.5205039381980896, + "step": 715 + }, + { + "epoch": 0.182559918408975, + "grad_norm": 0.5192756056785583, + "learning_rate": 1.905722775529446e-05, + "loss": 0.5289533138275146, + "step": 716 + }, + { + "epoch": 0.18281489036206017, + "grad_norm": 0.5478755235671997, + "learning_rate": 1.9053649692555135e-05, + "loss": 0.5210616588592529, + "step": 717 + }, + { + "epoch": 0.18306986231514533, + "grad_norm": 0.7221400141716003, + "learning_rate": 1.905006519003846e-05, + "loss": 0.5364406108856201, + "step": 718 + }, + { + "epoch": 0.18332483426823049, + "grad_norm": 0.5132589340209961, + "learning_rate": 1.9046474250294062e-05, + "loss": 0.5295343399047852, + "step": 719 + }, + { + "epoch": 0.18357980622131564, + "grad_norm": 0.513512909412384, + "learning_rate": 1.9042876875876145e-05, + "loss": 0.5361637473106384, + "step": 720 + }, + { + "epoch": 0.1838347781744008, + "grad_norm": 0.5017802715301514, + "learning_rate": 1.9039273069343482e-05, + "loss": 0.514184296131134, + "step": 721 + }, + { + "epoch": 0.18408975012748596, + "grad_norm": 0.5017067193984985, + "learning_rate": 1.9035662833259433e-05, + "loss": 0.5232157707214355, + "step": 722 + }, + { + "epoch": 0.18434472208057115, + "grad_norm": 0.5659171938896179, + "learning_rate": 1.903204617019192e-05, + "loss": 0.5253515839576721, + "step": 723 + }, + { + "epoch": 0.1845996940336563, + "grad_norm": 0.5655650496482849, + "learning_rate": 1.902842308271345e-05, + "loss": 0.532926082611084, + "step": 724 + }, + { + "epoch": 0.18485466598674147, + "grad_norm": 0.6155320405960083, + "learning_rate": 1.9024793573401087e-05, + "loss": 0.5291966795921326, + "step": 725 + }, + { + "epoch": 0.18510963793982663, + "grad_norm": 0.5772908926010132, + "learning_rate": 1.9021157644836465e-05, + "loss": 0.5200927257537842, + "step": 726 + }, + { + "epoch": 0.18536460989291179, + "grad_norm": 0.4874703884124756, + "learning_rate": 1.901751529960579e-05, + "loss": 0.5172476768493652, + "step": 727 + }, + { + "epoch": 0.18561958184599694, + "grad_norm": 0.5260182619094849, + "learning_rate": 1.9013866540299825e-05, + "loss": 0.5145378112792969, + "step": 728 + }, + { + "epoch": 0.1858745537990821, + "grad_norm": 0.48625022172927856, + "learning_rate": 1.9010211369513905e-05, + "loss": 0.5356984734535217, + "step": 729 + }, + { + "epoch": 0.18612952575216726, + "grad_norm": 0.5057938098907471, + "learning_rate": 1.9006549789847912e-05, + "loss": 0.5326985120773315, + "step": 730 + }, + { + "epoch": 0.18638449770525242, + "grad_norm": 0.5073822140693665, + "learning_rate": 1.90028818039063e-05, + "loss": 0.5180009007453918, + "step": 731 + }, + { + "epoch": 0.18663946965833758, + "grad_norm": 0.47638195753097534, + "learning_rate": 1.899920741429807e-05, + "loss": 0.5337942838668823, + "step": 732 + }, + { + "epoch": 0.18689444161142274, + "grad_norm": 0.4857496917247772, + "learning_rate": 1.899552662363678e-05, + "loss": 0.5289947986602783, + "step": 733 + }, + { + "epoch": 0.1871494135645079, + "grad_norm": 0.483283668756485, + "learning_rate": 1.8991839434540543e-05, + "loss": 0.5122606754302979, + "step": 734 + }, + { + "epoch": 0.18740438551759306, + "grad_norm": 0.5082046985626221, + "learning_rate": 1.898814584963203e-05, + "loss": 0.5219171047210693, + "step": 735 + }, + { + "epoch": 0.18765935747067822, + "grad_norm": 0.7273216247558594, + "learning_rate": 1.8984445871538444e-05, + "loss": 0.5157103538513184, + "step": 736 + }, + { + "epoch": 0.18791432942376338, + "grad_norm": 0.49670591950416565, + "learning_rate": 1.8980739502891548e-05, + "loss": 0.5254105925559998, + "step": 737 + }, + { + "epoch": 0.18816930137684854, + "grad_norm": 0.47980067133903503, + "learning_rate": 1.897702674632765e-05, + "loss": 0.5299422740936279, + "step": 738 + }, + { + "epoch": 0.1884242733299337, + "grad_norm": 0.49665823578834534, + "learning_rate": 1.89733076044876e-05, + "loss": 0.5355211496353149, + "step": 739 + }, + { + "epoch": 0.18867924528301888, + "grad_norm": 0.4740928113460541, + "learning_rate": 1.8969582080016783e-05, + "loss": 0.5235376358032227, + "step": 740 + }, + { + "epoch": 0.18893421723610404, + "grad_norm": 0.49630486965179443, + "learning_rate": 1.8965850175565136e-05, + "loss": 0.5099401473999023, + "step": 741 + }, + { + "epoch": 0.1891891891891892, + "grad_norm": 0.47335001826286316, + "learning_rate": 1.896211189378713e-05, + "loss": 0.5353489518165588, + "step": 742 + }, + { + "epoch": 0.18944416114227436, + "grad_norm": 0.5141160488128662, + "learning_rate": 1.8958367237341763e-05, + "loss": 0.5258992314338684, + "step": 743 + }, + { + "epoch": 0.18969913309535952, + "grad_norm": 0.4717693030834198, + "learning_rate": 1.895461620889258e-05, + "loss": 0.5221823453903198, + "step": 744 + }, + { + "epoch": 0.18995410504844468, + "grad_norm": 0.49536171555519104, + "learning_rate": 1.8950858811107654e-05, + "loss": 0.5208817720413208, + "step": 745 + }, + { + "epoch": 0.19020907700152984, + "grad_norm": 0.482179194688797, + "learning_rate": 1.894709504665958e-05, + "loss": 0.5294069051742554, + "step": 746 + }, + { + "epoch": 0.190464048954615, + "grad_norm": 0.49157753586769104, + "learning_rate": 1.8943324918225495e-05, + "loss": 0.5172535181045532, + "step": 747 + }, + { + "epoch": 0.19071902090770015, + "grad_norm": 0.47853702306747437, + "learning_rate": 1.8939548428487055e-05, + "loss": 0.5317491292953491, + "step": 748 + }, + { + "epoch": 0.1909739928607853, + "grad_norm": 0.5791231989860535, + "learning_rate": 1.8935765580130443e-05, + "loss": 0.5277394652366638, + "step": 749 + }, + { + "epoch": 0.19122896481387047, + "grad_norm": 0.5110471248626709, + "learning_rate": 1.8931976375846363e-05, + "loss": 0.5369372367858887, + "step": 750 + }, + { + "epoch": 0.19148393676695563, + "grad_norm": 0.5030074119567871, + "learning_rate": 1.892818081833004e-05, + "loss": 0.5264465808868408, + "step": 751 + }, + { + "epoch": 0.1917389087200408, + "grad_norm": 0.554189145565033, + "learning_rate": 1.892437891028122e-05, + "loss": 0.5304807424545288, + "step": 752 + }, + { + "epoch": 0.19199388067312595, + "grad_norm": 0.518322229385376, + "learning_rate": 1.8920570654404168e-05, + "loss": 0.510017454624176, + "step": 753 + }, + { + "epoch": 0.1922488526262111, + "grad_norm": 0.47035935521125793, + "learning_rate": 1.8916756053407658e-05, + "loss": 0.518936276435852, + "step": 754 + }, + { + "epoch": 0.19250382457929627, + "grad_norm": 0.5004586577415466, + "learning_rate": 1.8912935110004977e-05, + "loss": 0.5358216762542725, + "step": 755 + }, + { + "epoch": 0.19275879653238143, + "grad_norm": 0.5754180550575256, + "learning_rate": 1.8909107826913933e-05, + "loss": 0.521419107913971, + "step": 756 + }, + { + "epoch": 0.19301376848546659, + "grad_norm": 0.4933522939682007, + "learning_rate": 1.890527420685684e-05, + "loss": 0.5265296697616577, + "step": 757 + }, + { + "epoch": 0.19326874043855177, + "grad_norm": 0.48939067125320435, + "learning_rate": 1.8901434252560503e-05, + "loss": 0.5180717706680298, + "step": 758 + }, + { + "epoch": 0.19352371239163693, + "grad_norm": 0.5148706436157227, + "learning_rate": 1.8897587966756258e-05, + "loss": 0.5236243009567261, + "step": 759 + }, + { + "epoch": 0.1937786843447221, + "grad_norm": 0.5004563331604004, + "learning_rate": 1.889373535217993e-05, + "loss": 0.509172797203064, + "step": 760 + }, + { + "epoch": 0.19403365629780725, + "grad_norm": 0.46056416630744934, + "learning_rate": 1.8889876411571847e-05, + "loss": 0.5105539560317993, + "step": 761 + }, + { + "epoch": 0.1942886282508924, + "grad_norm": 0.4904102385044098, + "learning_rate": 1.8886011147676835e-05, + "loss": 0.5207321047782898, + "step": 762 + }, + { + "epoch": 0.19454360020397757, + "grad_norm": 0.48944613337516785, + "learning_rate": 1.8882139563244225e-05, + "loss": 0.5243609547615051, + "step": 763 + }, + { + "epoch": 0.19479857215706273, + "grad_norm": 0.4759974777698517, + "learning_rate": 1.8878261661027838e-05, + "loss": 0.5158101916313171, + "step": 764 + }, + { + "epoch": 0.1950535441101479, + "grad_norm": 0.525944709777832, + "learning_rate": 1.8874377443785986e-05, + "loss": 0.5378414392471313, + "step": 765 + }, + { + "epoch": 0.19530851606323305, + "grad_norm": 0.5273529887199402, + "learning_rate": 1.887048691428148e-05, + "loss": 0.5196689963340759, + "step": 766 + }, + { + "epoch": 0.1955634880163182, + "grad_norm": 0.4938674569129944, + "learning_rate": 1.8866590075281624e-05, + "loss": 0.5200862288475037, + "step": 767 + }, + { + "epoch": 0.19581845996940336, + "grad_norm": 0.5003823041915894, + "learning_rate": 1.8862686929558195e-05, + "loss": 0.5321277976036072, + "step": 768 + }, + { + "epoch": 0.19607343192248852, + "grad_norm": 0.4937255084514618, + "learning_rate": 1.8858777479887465e-05, + "loss": 0.5314494967460632, + "step": 769 + }, + { + "epoch": 0.19632840387557368, + "grad_norm": 0.5254429578781128, + "learning_rate": 1.8854861729050194e-05, + "loss": 0.5185518860816956, + "step": 770 + }, + { + "epoch": 0.19658337582865884, + "grad_norm": 0.4962460994720459, + "learning_rate": 1.8850939679831614e-05, + "loss": 0.5256330966949463, + "step": 771 + }, + { + "epoch": 0.196838347781744, + "grad_norm": 0.49898064136505127, + "learning_rate": 1.8847011335021447e-05, + "loss": 0.5047429800033569, + "step": 772 + }, + { + "epoch": 0.19709331973482916, + "grad_norm": 0.4960572123527527, + "learning_rate": 1.8843076697413888e-05, + "loss": 0.5207228660583496, + "step": 773 + }, + { + "epoch": 0.19734829168791432, + "grad_norm": 0.5059972405433655, + "learning_rate": 1.8839135769807605e-05, + "loss": 0.5412514209747314, + "step": 774 + }, + { + "epoch": 0.19760326364099948, + "grad_norm": 0.4811801016330719, + "learning_rate": 1.8835188555005744e-05, + "loss": 0.52277010679245, + "step": 775 + }, + { + "epoch": 0.19785823559408466, + "grad_norm": 0.5361527800559998, + "learning_rate": 1.8831235055815927e-05, + "loss": 0.5182678699493408, + "step": 776 + }, + { + "epoch": 0.19811320754716982, + "grad_norm": 0.48322221636772156, + "learning_rate": 1.8827275275050233e-05, + "loss": 0.513138473033905, + "step": 777 + }, + { + "epoch": 0.19836817950025498, + "grad_norm": 0.4969165027141571, + "learning_rate": 1.8823309215525227e-05, + "loss": 0.5136932134628296, + "step": 778 + }, + { + "epoch": 0.19862315145334014, + "grad_norm": 0.4899584949016571, + "learning_rate": 1.8819336880061926e-05, + "loss": 0.5196957588195801, + "step": 779 + }, + { + "epoch": 0.1988781234064253, + "grad_norm": 0.49217143654823303, + "learning_rate": 1.8815358271485814e-05, + "loss": 0.5142471790313721, + "step": 780 + }, + { + "epoch": 0.19913309535951046, + "grad_norm": 0.4637305736541748, + "learning_rate": 1.881137339262684e-05, + "loss": 0.5263211131095886, + "step": 781 + }, + { + "epoch": 0.19938806731259562, + "grad_norm": 0.5127052664756775, + "learning_rate": 1.8807382246319413e-05, + "loss": 0.5331568717956543, + "step": 782 + }, + { + "epoch": 0.19964303926568078, + "grad_norm": 0.4882904589176178, + "learning_rate": 1.8803384835402395e-05, + "loss": 0.5182445049285889, + "step": 783 + }, + { + "epoch": 0.19989801121876594, + "grad_norm": 0.511396586894989, + "learning_rate": 1.879938116271911e-05, + "loss": 0.529838502407074, + "step": 784 + }, + { + "epoch": 0.2001529831718511, + "grad_norm": 0.476744681596756, + "learning_rate": 1.8795371231117334e-05, + "loss": 0.5218862295150757, + "step": 785 + }, + { + "epoch": 0.20040795512493625, + "grad_norm": 0.46153387427330017, + "learning_rate": 1.879135504344929e-05, + "loss": 0.5229381322860718, + "step": 786 + }, + { + "epoch": 0.2006629270780214, + "grad_norm": 0.5047249794006348, + "learning_rate": 1.8787332602571663e-05, + "loss": 0.5238070487976074, + "step": 787 + }, + { + "epoch": 0.20091789903110657, + "grad_norm": 0.48882344365119934, + "learning_rate": 1.878330391134557e-05, + "loss": 0.5123788714408875, + "step": 788 + }, + { + "epoch": 0.20117287098419173, + "grad_norm": 0.5000123977661133, + "learning_rate": 1.8779268972636587e-05, + "loss": 0.5192744731903076, + "step": 789 + }, + { + "epoch": 0.2014278429372769, + "grad_norm": 0.4846855700016022, + "learning_rate": 1.8775227789314723e-05, + "loss": 0.5199167728424072, + "step": 790 + }, + { + "epoch": 0.20168281489036205, + "grad_norm": 0.4933943450450897, + "learning_rate": 1.8771180364254443e-05, + "loss": 0.513075590133667, + "step": 791 + }, + { + "epoch": 0.2019377868434472, + "grad_norm": 0.4882349371910095, + "learning_rate": 1.8767126700334633e-05, + "loss": 0.5088496804237366, + "step": 792 + }, + { + "epoch": 0.20219275879653237, + "grad_norm": 0.5605803728103638, + "learning_rate": 1.8763066800438638e-05, + "loss": 0.5345818996429443, + "step": 793 + }, + { + "epoch": 0.20244773074961755, + "grad_norm": 0.6046520471572876, + "learning_rate": 1.875900066745422e-05, + "loss": 0.522911787033081, + "step": 794 + }, + { + "epoch": 0.20270270270270271, + "grad_norm": 0.49711018800735474, + "learning_rate": 1.875492830427358e-05, + "loss": 0.5213525295257568, + "step": 795 + }, + { + "epoch": 0.20295767465578787, + "grad_norm": 0.4825911521911621, + "learning_rate": 1.8750849713793354e-05, + "loss": 0.5204763412475586, + "step": 796 + }, + { + "epoch": 0.20321264660887303, + "grad_norm": 0.4894961416721344, + "learning_rate": 1.874676489891461e-05, + "loss": 0.5249648690223694, + "step": 797 + }, + { + "epoch": 0.2034676185619582, + "grad_norm": 0.5199370980262756, + "learning_rate": 1.8742673862542834e-05, + "loss": 0.5049653053283691, + "step": 798 + }, + { + "epoch": 0.20372259051504335, + "grad_norm": 0.5881942510604858, + "learning_rate": 1.8738576607587946e-05, + "loss": 0.5105040073394775, + "step": 799 + }, + { + "epoch": 0.2039775624681285, + "grad_norm": 0.5093399882316589, + "learning_rate": 1.8734473136964283e-05, + "loss": 0.5335594415664673, + "step": 800 + }, + { + "epoch": 0.20423253442121367, + "grad_norm": 0.5274905562400818, + "learning_rate": 1.8730363453590607e-05, + "loss": 0.539413571357727, + "step": 801 + }, + { + "epoch": 0.20448750637429883, + "grad_norm": 0.4588984251022339, + "learning_rate": 1.87262475603901e-05, + "loss": 0.5119473934173584, + "step": 802 + }, + { + "epoch": 0.204742478327384, + "grad_norm": 0.5083139538764954, + "learning_rate": 1.872212546029035e-05, + "loss": 0.523451566696167, + "step": 803 + }, + { + "epoch": 0.20499745028046915, + "grad_norm": 0.5172069072723389, + "learning_rate": 1.8717997156223383e-05, + "loss": 0.49723517894744873, + "step": 804 + }, + { + "epoch": 0.2052524222335543, + "grad_norm": 0.5106121301651001, + "learning_rate": 1.8713862651125607e-05, + "loss": 0.5193416476249695, + "step": 805 + }, + { + "epoch": 0.20550739418663946, + "grad_norm": 0.5006569027900696, + "learning_rate": 1.870972194793787e-05, + "loss": 0.5207656621932983, + "step": 806 + }, + { + "epoch": 0.20576236613972462, + "grad_norm": 0.4751335680484772, + "learning_rate": 1.8705575049605415e-05, + "loss": 0.5251712799072266, + "step": 807 + }, + { + "epoch": 0.20601733809280978, + "grad_norm": 0.4934970438480377, + "learning_rate": 1.8701421959077884e-05, + "loss": 0.5144690871238708, + "step": 808 + }, + { + "epoch": 0.20627231004589494, + "grad_norm": 0.5313510298728943, + "learning_rate": 1.8697262679309335e-05, + "loss": 0.5420048236846924, + "step": 809 + }, + { + "epoch": 0.2065272819989801, + "grad_norm": 0.4750503599643707, + "learning_rate": 1.8693097213258235e-05, + "loss": 0.5261687636375427, + "step": 810 + }, + { + "epoch": 0.20678225395206526, + "grad_norm": 0.4985625743865967, + "learning_rate": 1.868892556388743e-05, + "loss": 0.5317679643630981, + "step": 811 + }, + { + "epoch": 0.20703722590515045, + "grad_norm": 0.5090624094009399, + "learning_rate": 1.8684747734164177e-05, + "loss": 0.5190541744232178, + "step": 812 + }, + { + "epoch": 0.2072921978582356, + "grad_norm": 0.48770126700401306, + "learning_rate": 1.8680563727060133e-05, + "loss": 0.5069824457168579, + "step": 813 + }, + { + "epoch": 0.20754716981132076, + "grad_norm": 0.49879923462867737, + "learning_rate": 1.867637354555134e-05, + "loss": 0.5261608362197876, + "step": 814 + }, + { + "epoch": 0.20780214176440592, + "grad_norm": 0.5008533000946045, + "learning_rate": 1.8672177192618234e-05, + "loss": 0.5157227516174316, + "step": 815 + }, + { + "epoch": 0.20805711371749108, + "grad_norm": 0.5074318051338196, + "learning_rate": 1.8667974671245645e-05, + "loss": 0.530713677406311, + "step": 816 + }, + { + "epoch": 0.20831208567057624, + "grad_norm": 0.48070028424263, + "learning_rate": 1.866376598442279e-05, + "loss": 0.5272051692008972, + "step": 817 + }, + { + "epoch": 0.2085670576236614, + "grad_norm": 0.5248457789421082, + "learning_rate": 1.8659551135143262e-05, + "loss": 0.5113994479179382, + "step": 818 + }, + { + "epoch": 0.20882202957674656, + "grad_norm": 0.5021365284919739, + "learning_rate": 1.865533012640505e-05, + "loss": 0.5125217437744141, + "step": 819 + }, + { + "epoch": 0.20907700152983172, + "grad_norm": 0.4999274015426636, + "learning_rate": 1.8651102961210516e-05, + "loss": 0.4997989535331726, + "step": 820 + }, + { + "epoch": 0.20933197348291688, + "grad_norm": 0.5285374522209167, + "learning_rate": 1.86468696425664e-05, + "loss": 0.5188378691673279, + "step": 821 + }, + { + "epoch": 0.20958694543600204, + "grad_norm": 0.49890968203544617, + "learning_rate": 1.8642630173483832e-05, + "loss": 0.5256074666976929, + "step": 822 + }, + { + "epoch": 0.2098419173890872, + "grad_norm": 0.49642476439476013, + "learning_rate": 1.8638384556978302e-05, + "loss": 0.5300315618515015, + "step": 823 + }, + { + "epoch": 0.21009688934217235, + "grad_norm": 0.52839595079422, + "learning_rate": 1.8634132796069674e-05, + "loss": 0.5289017558097839, + "step": 824 + }, + { + "epoch": 0.21035186129525751, + "grad_norm": 0.4960280656814575, + "learning_rate": 1.8629874893782195e-05, + "loss": 0.5117241144180298, + "step": 825 + }, + { + "epoch": 0.21060683324834267, + "grad_norm": 0.47426652908325195, + "learning_rate": 1.8625610853144463e-05, + "loss": 0.5090838670730591, + "step": 826 + }, + { + "epoch": 0.21086180520142783, + "grad_norm": 0.5177625417709351, + "learning_rate": 1.862134067718945e-05, + "loss": 0.5097435116767883, + "step": 827 + }, + { + "epoch": 0.211116777154513, + "grad_norm": 0.4960584044456482, + "learning_rate": 1.86170643689545e-05, + "loss": 0.5163490772247314, + "step": 828 + }, + { + "epoch": 0.21137174910759818, + "grad_norm": 0.5274242758750916, + "learning_rate": 1.861278193148131e-05, + "loss": 0.51758873462677, + "step": 829 + }, + { + "epoch": 0.21162672106068334, + "grad_norm": 0.533641517162323, + "learning_rate": 1.8608493367815933e-05, + "loss": 0.5184922218322754, + "step": 830 + }, + { + "epoch": 0.2118816930137685, + "grad_norm": 0.48885729908943176, + "learning_rate": 1.8604198681008793e-05, + "loss": 0.5227895975112915, + "step": 831 + }, + { + "epoch": 0.21213666496685366, + "grad_norm": 0.5027369856834412, + "learning_rate": 1.859989787411465e-05, + "loss": 0.5039633512496948, + "step": 832 + }, + { + "epoch": 0.21239163691993881, + "grad_norm": 0.49498140811920166, + "learning_rate": 1.859559095019264e-05, + "loss": 0.5215380191802979, + "step": 833 + }, + { + "epoch": 0.21264660887302397, + "grad_norm": 0.5174261331558228, + "learning_rate": 1.859127791230623e-05, + "loss": 0.5233101844787598, + "step": 834 + }, + { + "epoch": 0.21290158082610913, + "grad_norm": 0.49725356698036194, + "learning_rate": 1.8586958763523246e-05, + "loss": 0.5073238015174866, + "step": 835 + }, + { + "epoch": 0.2131565527791943, + "grad_norm": 0.48951879143714905, + "learning_rate": 1.858263350691586e-05, + "loss": 0.5236285924911499, + "step": 836 + }, + { + "epoch": 0.21341152473227945, + "grad_norm": 0.5008646845817566, + "learning_rate": 1.8578302145560586e-05, + "loss": 0.51987624168396, + "step": 837 + }, + { + "epoch": 0.2136664966853646, + "grad_norm": 0.5107278823852539, + "learning_rate": 1.857396468253828e-05, + "loss": 0.5350760221481323, + "step": 838 + }, + { + "epoch": 0.21392146863844977, + "grad_norm": 0.491146445274353, + "learning_rate": 1.856962112093414e-05, + "loss": 0.5397478342056274, + "step": 839 + }, + { + "epoch": 0.21417644059153493, + "grad_norm": 0.4717349708080292, + "learning_rate": 1.8565271463837706e-05, + "loss": 0.5235544443130493, + "step": 840 + }, + { + "epoch": 0.2144314125446201, + "grad_norm": 0.48198896646499634, + "learning_rate": 1.8560915714342842e-05, + "loss": 0.5057838559150696, + "step": 841 + }, + { + "epoch": 0.21468638449770525, + "grad_norm": 0.48079413175582886, + "learning_rate": 1.8556553875547755e-05, + "loss": 0.5073414444923401, + "step": 842 + }, + { + "epoch": 0.2149413564507904, + "grad_norm": 0.4871809780597687, + "learning_rate": 1.855218595055498e-05, + "loss": 0.5244035124778748, + "step": 843 + }, + { + "epoch": 0.21519632840387556, + "grad_norm": 0.47666579484939575, + "learning_rate": 1.8547811942471384e-05, + "loss": 0.515140950679779, + "step": 844 + }, + { + "epoch": 0.21545130035696072, + "grad_norm": 0.48149263858795166, + "learning_rate": 1.854343185440816e-05, + "loss": 0.5190153121948242, + "step": 845 + }, + { + "epoch": 0.21570627231004588, + "grad_norm": 0.5033882856369019, + "learning_rate": 1.8539045689480817e-05, + "loss": 0.5240796804428101, + "step": 846 + }, + { + "epoch": 0.21596124426313107, + "grad_norm": 0.47844886779785156, + "learning_rate": 1.85346534508092e-05, + "loss": 0.5184129476547241, + "step": 847 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 0.48677003383636475, + "learning_rate": 1.8530255141517465e-05, + "loss": 0.5040110349655151, + "step": 848 + }, + { + "epoch": 0.2164711881693014, + "grad_norm": 0.5304367542266846, + "learning_rate": 1.852585076473409e-05, + "loss": 0.504118800163269, + "step": 849 + }, + { + "epoch": 0.21672616012238655, + "grad_norm": 0.4905112087726593, + "learning_rate": 1.8521440323591866e-05, + "loss": 0.5151620507240295, + "step": 850 + }, + { + "epoch": 0.2169811320754717, + "grad_norm": 0.549328088760376, + "learning_rate": 1.85170238212279e-05, + "loss": 0.5425221920013428, + "step": 851 + }, + { + "epoch": 0.21723610402855686, + "grad_norm": 0.6872383952140808, + "learning_rate": 1.851260126078361e-05, + "loss": 0.5106749534606934, + "step": 852 + }, + { + "epoch": 0.21749107598164202, + "grad_norm": 0.4876777231693268, + "learning_rate": 1.850817264540472e-05, + "loss": 0.5124921202659607, + "step": 853 + }, + { + "epoch": 0.21774604793472718, + "grad_norm": 0.488199383020401, + "learning_rate": 1.8503737978241263e-05, + "loss": 0.5182828903198242, + "step": 854 + }, + { + "epoch": 0.21800101988781234, + "grad_norm": 0.4671249985694885, + "learning_rate": 1.849929726244758e-05, + "loss": 0.495769202709198, + "step": 855 + }, + { + "epoch": 0.2182559918408975, + "grad_norm": 0.5009353756904602, + "learning_rate": 1.849485050118231e-05, + "loss": 0.5188135504722595, + "step": 856 + }, + { + "epoch": 0.21851096379398266, + "grad_norm": 0.4932180643081665, + "learning_rate": 1.8490397697608394e-05, + "loss": 0.5201120972633362, + "step": 857 + }, + { + "epoch": 0.21876593574706782, + "grad_norm": 0.4824739992618561, + "learning_rate": 1.8485938854893067e-05, + "loss": 0.5199398398399353, + "step": 858 + }, + { + "epoch": 0.21902090770015298, + "grad_norm": 0.4766797721385956, + "learning_rate": 1.8481473976207866e-05, + "loss": 0.5209866762161255, + "step": 859 + }, + { + "epoch": 0.21927587965323814, + "grad_norm": 0.517713725566864, + "learning_rate": 1.8477003064728616e-05, + "loss": 0.5150636434555054, + "step": 860 + }, + { + "epoch": 0.2195308516063233, + "grad_norm": 0.4794352948665619, + "learning_rate": 1.8472526123635434e-05, + "loss": 0.5228415727615356, + "step": 861 + }, + { + "epoch": 0.21978582355940846, + "grad_norm": 0.5226784944534302, + "learning_rate": 1.846804315611273e-05, + "loss": 0.5156341791152954, + "step": 862 + }, + { + "epoch": 0.22004079551249361, + "grad_norm": 0.49431315064430237, + "learning_rate": 1.8463554165349195e-05, + "loss": 0.5137113928794861, + "step": 863 + }, + { + "epoch": 0.22029576746557877, + "grad_norm": 0.5030642151832581, + "learning_rate": 1.8459059154537806e-05, + "loss": 0.5218013525009155, + "step": 864 + }, + { + "epoch": 0.22055073941866396, + "grad_norm": 0.5052620768547058, + "learning_rate": 1.8454558126875825e-05, + "loss": 0.5037291646003723, + "step": 865 + }, + { + "epoch": 0.22080571137174912, + "grad_norm": 0.465748131275177, + "learning_rate": 1.845005108556479e-05, + "loss": 0.5203166007995605, + "step": 866 + }, + { + "epoch": 0.22106068332483428, + "grad_norm": 0.5093213319778442, + "learning_rate": 1.8445538033810515e-05, + "loss": 0.5137592554092407, + "step": 867 + }, + { + "epoch": 0.22131565527791944, + "grad_norm": 0.501781165599823, + "learning_rate": 1.8441018974823098e-05, + "loss": 0.5238536596298218, + "step": 868 + }, + { + "epoch": 0.2215706272310046, + "grad_norm": 0.48056527972221375, + "learning_rate": 1.8436493911816903e-05, + "loss": 0.5207821130752563, + "step": 869 + }, + { + "epoch": 0.22182559918408976, + "grad_norm": 0.4888818562030792, + "learning_rate": 1.8431962848010557e-05, + "loss": 0.5104517936706543, + "step": 870 + }, + { + "epoch": 0.22208057113717491, + "grad_norm": 0.5200516581535339, + "learning_rate": 1.8427425786626976e-05, + "loss": 0.5080585479736328, + "step": 871 + }, + { + "epoch": 0.22233554309026007, + "grad_norm": 0.5427224636077881, + "learning_rate": 1.8422882730893323e-05, + "loss": 0.4997643828392029, + "step": 872 + }, + { + "epoch": 0.22259051504334523, + "grad_norm": 0.4931401014328003, + "learning_rate": 1.8418333684041033e-05, + "loss": 0.5171279311180115, + "step": 873 + }, + { + "epoch": 0.2228454869964304, + "grad_norm": 0.5080557465553284, + "learning_rate": 1.8413778649305797e-05, + "loss": 0.5060961246490479, + "step": 874 + }, + { + "epoch": 0.22310045894951555, + "grad_norm": 0.5039787888526917, + "learning_rate": 1.8409217629927577e-05, + "loss": 0.5168291926383972, + "step": 875 + }, + { + "epoch": 0.2233554309026007, + "grad_norm": 0.4833467900753021, + "learning_rate": 1.8404650629150578e-05, + "loss": 0.5225592851638794, + "step": 876 + }, + { + "epoch": 0.22361040285568587, + "grad_norm": 0.5088043212890625, + "learning_rate": 1.8400077650223264e-05, + "loss": 0.5346219539642334, + "step": 877 + }, + { + "epoch": 0.22386537480877103, + "grad_norm": 0.5302433371543884, + "learning_rate": 1.839549869639836e-05, + "loss": 0.5219231843948364, + "step": 878 + }, + { + "epoch": 0.2241203467618562, + "grad_norm": 0.561053454875946, + "learning_rate": 1.8390913770932827e-05, + "loss": 0.5408127307891846, + "step": 879 + }, + { + "epoch": 0.22437531871494135, + "grad_norm": 0.47593629360198975, + "learning_rate": 1.8386322877087883e-05, + "loss": 0.5122065544128418, + "step": 880 + }, + { + "epoch": 0.2246302906680265, + "grad_norm": 0.5070580840110779, + "learning_rate": 1.838172601812899e-05, + "loss": 0.5167946815490723, + "step": 881 + }, + { + "epoch": 0.22488526262111166, + "grad_norm": 0.49819034337997437, + "learning_rate": 1.8377123197325843e-05, + "loss": 0.5193842649459839, + "step": 882 + }, + { + "epoch": 0.22514023457419685, + "grad_norm": 0.6639063358306885, + "learning_rate": 1.8372514417952398e-05, + "loss": 0.5219736099243164, + "step": 883 + }, + { + "epoch": 0.225395206527282, + "grad_norm": 0.5022265315055847, + "learning_rate": 1.8367899683286826e-05, + "loss": 0.5150482654571533, + "step": 884 + }, + { + "epoch": 0.22565017848036717, + "grad_norm": 0.5262812376022339, + "learning_rate": 1.8363278996611553e-05, + "loss": 0.5040405988693237, + "step": 885 + }, + { + "epoch": 0.22590515043345233, + "grad_norm": 0.48911675810813904, + "learning_rate": 1.8358652361213226e-05, + "loss": 0.5164714455604553, + "step": 886 + }, + { + "epoch": 0.2261601223865375, + "grad_norm": 0.48677948117256165, + "learning_rate": 1.8354019780382736e-05, + "loss": 0.514830470085144, + "step": 887 + }, + { + "epoch": 0.22641509433962265, + "grad_norm": 0.49844464659690857, + "learning_rate": 1.8349381257415185e-05, + "loss": 0.5102154016494751, + "step": 888 + }, + { + "epoch": 0.2266700662927078, + "grad_norm": 0.5603659152984619, + "learning_rate": 1.834473679560991e-05, + "loss": 0.5293639302253723, + "step": 889 + }, + { + "epoch": 0.22692503824579296, + "grad_norm": 0.538626492023468, + "learning_rate": 1.8340086398270486e-05, + "loss": 0.502727746963501, + "step": 890 + }, + { + "epoch": 0.22718001019887812, + "grad_norm": 0.4659905433654785, + "learning_rate": 1.8335430068704688e-05, + "loss": 0.5082594156265259, + "step": 891 + }, + { + "epoch": 0.22743498215196328, + "grad_norm": 0.4687235653400421, + "learning_rate": 1.8330767810224525e-05, + "loss": 0.502903938293457, + "step": 892 + }, + { + "epoch": 0.22768995410504844, + "grad_norm": 0.49669066071510315, + "learning_rate": 1.8326099626146214e-05, + "loss": 0.5199105143547058, + "step": 893 + }, + { + "epoch": 0.2279449260581336, + "grad_norm": 0.5097357034683228, + "learning_rate": 1.8321425519790193e-05, + "loss": 0.502683162689209, + "step": 894 + }, + { + "epoch": 0.22819989801121876, + "grad_norm": 0.5846214890480042, + "learning_rate": 1.8316745494481114e-05, + "loss": 0.5099455714225769, + "step": 895 + }, + { + "epoch": 0.22845486996430392, + "grad_norm": 0.4794066548347473, + "learning_rate": 1.8312059553547833e-05, + "loss": 0.5095470547676086, + "step": 896 + }, + { + "epoch": 0.22870984191738908, + "grad_norm": 0.5125945806503296, + "learning_rate": 1.8307367700323412e-05, + "loss": 0.5077476501464844, + "step": 897 + }, + { + "epoch": 0.22896481387047424, + "grad_norm": 0.46401146054267883, + "learning_rate": 1.830266993814513e-05, + "loss": 0.521671712398529, + "step": 898 + }, + { + "epoch": 0.2292197858235594, + "grad_norm": 0.46971526741981506, + "learning_rate": 1.829796627035446e-05, + "loss": 0.5065335035324097, + "step": 899 + }, + { + "epoch": 0.22947475777664456, + "grad_norm": 0.5173994898796082, + "learning_rate": 1.8293256700297072e-05, + "loss": 0.5077659487724304, + "step": 900 + }, + { + "epoch": 0.22972972972972974, + "grad_norm": 0.4828406572341919, + "learning_rate": 1.8288541231322853e-05, + "loss": 0.506639838218689, + "step": 901 + }, + { + "epoch": 0.2299847016828149, + "grad_norm": 0.4878862202167511, + "learning_rate": 1.8283819866785855e-05, + "loss": 0.5134996771812439, + "step": 902 + }, + { + "epoch": 0.23023967363590006, + "grad_norm": 0.5095584392547607, + "learning_rate": 1.827909261004435e-05, + "loss": 0.5173805356025696, + "step": 903 + }, + { + "epoch": 0.23049464558898522, + "grad_norm": 0.48897939920425415, + "learning_rate": 1.8274359464460796e-05, + "loss": 0.524322509765625, + "step": 904 + }, + { + "epoch": 0.23074961754207038, + "grad_norm": 0.47126173973083496, + "learning_rate": 1.826962043340183e-05, + "loss": 0.5017592310905457, + "step": 905 + }, + { + "epoch": 0.23100458949515554, + "grad_norm": 0.49605822563171387, + "learning_rate": 1.826487552023828e-05, + "loss": 0.5261249542236328, + "step": 906 + }, + { + "epoch": 0.2312595614482407, + "grad_norm": 0.5319313406944275, + "learning_rate": 1.8260124728345163e-05, + "loss": 0.5224013328552246, + "step": 907 + }, + { + "epoch": 0.23151453340132586, + "grad_norm": 0.47262606024742126, + "learning_rate": 1.825536806110167e-05, + "loss": 0.511128306388855, + "step": 908 + }, + { + "epoch": 0.23176950535441102, + "grad_norm": 0.4793703556060791, + "learning_rate": 1.825060552189118e-05, + "loss": 0.5372747778892517, + "step": 909 + }, + { + "epoch": 0.23202447730749617, + "grad_norm": 0.5034754872322083, + "learning_rate": 1.8245837114101235e-05, + "loss": 0.5220484733581543, + "step": 910 + }, + { + "epoch": 0.23227944926058133, + "grad_norm": 0.49909234046936035, + "learning_rate": 1.8241062841123564e-05, + "loss": 0.5091926455497742, + "step": 911 + }, + { + "epoch": 0.2325344212136665, + "grad_norm": 0.5578997731208801, + "learning_rate": 1.8236282706354064e-05, + "loss": 0.5100309252738953, + "step": 912 + }, + { + "epoch": 0.23278939316675165, + "grad_norm": 0.5156607031822205, + "learning_rate": 1.82314967131928e-05, + "loss": 0.5231087803840637, + "step": 913 + }, + { + "epoch": 0.2330443651198368, + "grad_norm": 0.478680819272995, + "learning_rate": 1.8226704865044e-05, + "loss": 0.508722722530365, + "step": 914 + }, + { + "epoch": 0.23329933707292197, + "grad_norm": 0.5336467027664185, + "learning_rate": 1.822190716531607e-05, + "loss": 0.5129567980766296, + "step": 915 + }, + { + "epoch": 0.23355430902600713, + "grad_norm": 0.471403032541275, + "learning_rate": 1.821710361742156e-05, + "loss": 0.5059640407562256, + "step": 916 + }, + { + "epoch": 0.2338092809790923, + "grad_norm": 0.48177075386047363, + "learning_rate": 1.82122942247772e-05, + "loss": 0.5262762308120728, + "step": 917 + }, + { + "epoch": 0.23406425293217745, + "grad_norm": 0.5124832391738892, + "learning_rate": 1.820747899080386e-05, + "loss": 0.5098938345909119, + "step": 918 + }, + { + "epoch": 0.23431922488526263, + "grad_norm": 0.4705912470817566, + "learning_rate": 1.8202657918926566e-05, + "loss": 0.49921292066574097, + "step": 919 + }, + { + "epoch": 0.2345741968383478, + "grad_norm": 0.5206021070480347, + "learning_rate": 1.819783101257451e-05, + "loss": 0.525272011756897, + "step": 920 + }, + { + "epoch": 0.23482916879143295, + "grad_norm": 0.5261361598968506, + "learning_rate": 1.8192998275181026e-05, + "loss": 0.5254808664321899, + "step": 921 + }, + { + "epoch": 0.2350841407445181, + "grad_norm": 0.5328448414802551, + "learning_rate": 1.8188159710183595e-05, + "loss": 0.5316905379295349, + "step": 922 + }, + { + "epoch": 0.23533911269760327, + "grad_norm": 0.4848942756652832, + "learning_rate": 1.8183315321023837e-05, + "loss": 0.5214201211929321, + "step": 923 + }, + { + "epoch": 0.23559408465068843, + "grad_norm": 0.49192479252815247, + "learning_rate": 1.817846511114753e-05, + "loss": 0.5270960330963135, + "step": 924 + }, + { + "epoch": 0.2358490566037736, + "grad_norm": 0.49427223205566406, + "learning_rate": 1.8173609084004577e-05, + "loss": 0.5243868827819824, + "step": 925 + }, + { + "epoch": 0.23610402855685875, + "grad_norm": 0.47979867458343506, + "learning_rate": 1.8168747243049026e-05, + "loss": 0.5152573585510254, + "step": 926 + }, + { + "epoch": 0.2363590005099439, + "grad_norm": 0.5014990568161011, + "learning_rate": 1.8163879591739067e-05, + "loss": 0.5148708820343018, + "step": 927 + }, + { + "epoch": 0.23661397246302907, + "grad_norm": 0.5115143656730652, + "learning_rate": 1.8159006133537004e-05, + "loss": 0.5039348602294922, + "step": 928 + }, + { + "epoch": 0.23686894441611422, + "grad_norm": 0.48674800992012024, + "learning_rate": 1.8154126871909296e-05, + "loss": 0.5124222636222839, + "step": 929 + }, + { + "epoch": 0.23712391636919938, + "grad_norm": 0.5349897146224976, + "learning_rate": 1.81492418103265e-05, + "loss": 0.5278897881507874, + "step": 930 + }, + { + "epoch": 0.23737888832228454, + "grad_norm": 0.5227882862091064, + "learning_rate": 1.814435095226333e-05, + "loss": 0.5197198390960693, + "step": 931 + }, + { + "epoch": 0.2376338602753697, + "grad_norm": 0.5830060839653015, + "learning_rate": 1.81394543011986e-05, + "loss": 0.5197795629501343, + "step": 932 + }, + { + "epoch": 0.23788883222845486, + "grad_norm": 0.4716755747795105, + "learning_rate": 1.8134551860615256e-05, + "loss": 0.5050005316734314, + "step": 933 + }, + { + "epoch": 0.23814380418154002, + "grad_norm": 0.4908246397972107, + "learning_rate": 1.8129643634000357e-05, + "loss": 0.5052043795585632, + "step": 934 + }, + { + "epoch": 0.23839877613462518, + "grad_norm": 0.49863147735595703, + "learning_rate": 1.8124729624845077e-05, + "loss": 0.5203200578689575, + "step": 935 + }, + { + "epoch": 0.23865374808771037, + "grad_norm": 0.49130505323410034, + "learning_rate": 1.8119809836644715e-05, + "loss": 0.511245846748352, + "step": 936 + }, + { + "epoch": 0.23890872004079552, + "grad_norm": 0.4879443347454071, + "learning_rate": 1.8114884272898662e-05, + "loss": 0.5171563029289246, + "step": 937 + }, + { + "epoch": 0.23916369199388068, + "grad_norm": 0.4863815903663635, + "learning_rate": 1.8109952937110425e-05, + "loss": 0.5202454328536987, + "step": 938 + }, + { + "epoch": 0.23941866394696584, + "grad_norm": 0.5152843594551086, + "learning_rate": 1.8105015832787623e-05, + "loss": 0.5006053447723389, + "step": 939 + }, + { + "epoch": 0.239673635900051, + "grad_norm": 0.533755898475647, + "learning_rate": 1.810007296344197e-05, + "loss": 0.5041182637214661, + "step": 940 + }, + { + "epoch": 0.23992860785313616, + "grad_norm": 0.4880644679069519, + "learning_rate": 1.8095124332589284e-05, + "loss": 0.5128837823867798, + "step": 941 + }, + { + "epoch": 0.24018357980622132, + "grad_norm": 0.5024210810661316, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.5149589776992798, + "step": 942 + }, + { + "epoch": 0.24043855175930648, + "grad_norm": 0.4898494780063629, + "learning_rate": 1.8085209800446564e-05, + "loss": 0.519150972366333, + "step": 943 + }, + { + "epoch": 0.24069352371239164, + "grad_norm": 0.5067214369773865, + "learning_rate": 1.808024390620865e-05, + "loss": 0.4951241910457611, + "step": 944 + }, + { + "epoch": 0.2409484956654768, + "grad_norm": 0.5119669437408447, + "learning_rate": 1.8075272264567925e-05, + "loss": 0.5104391574859619, + "step": 945 + }, + { + "epoch": 0.24120346761856196, + "grad_norm": 0.47356361150741577, + "learning_rate": 1.8070294879060678e-05, + "loss": 0.5130054354667664, + "step": 946 + }, + { + "epoch": 0.24145843957164712, + "grad_norm": 0.48387038707733154, + "learning_rate": 1.8065311753227272e-05, + "loss": 0.5082079768180847, + "step": 947 + }, + { + "epoch": 0.24171341152473227, + "grad_norm": 0.4830719530582428, + "learning_rate": 1.806032289061216e-05, + "loss": 0.5125942826271057, + "step": 948 + }, + { + "epoch": 0.24196838347781743, + "grad_norm": 0.4798039495944977, + "learning_rate": 1.8055328294763874e-05, + "loss": 0.5218983888626099, + "step": 949 + }, + { + "epoch": 0.2422233554309026, + "grad_norm": 0.47178006172180176, + "learning_rate": 1.8050327969235024e-05, + "loss": 0.5177539587020874, + "step": 950 + }, + { + "epoch": 0.24247832738398775, + "grad_norm": 0.5267336964607239, + "learning_rate": 1.8045321917582293e-05, + "loss": 0.5238089561462402, + "step": 951 + }, + { + "epoch": 0.2427332993370729, + "grad_norm": 0.4602765738964081, + "learning_rate": 1.8040310143366447e-05, + "loss": 0.4993072748184204, + "step": 952 + }, + { + "epoch": 0.24298827129015807, + "grad_norm": 0.4600813686847687, + "learning_rate": 1.8035292650152303e-05, + "loss": 0.5135704874992371, + "step": 953 + }, + { + "epoch": 0.24324324324324326, + "grad_norm": 0.475060373544693, + "learning_rate": 1.803026944150877e-05, + "loss": 0.5125201940536499, + "step": 954 + }, + { + "epoch": 0.24349821519632842, + "grad_norm": 0.48474636673927307, + "learning_rate": 1.8025240521008798e-05, + "loss": 0.4982396364212036, + "step": 955 + }, + { + "epoch": 0.24375318714941357, + "grad_norm": 0.5275590419769287, + "learning_rate": 1.802020589222942e-05, + "loss": 0.5201438069343567, + "step": 956 + }, + { + "epoch": 0.24400815910249873, + "grad_norm": 0.48619845509529114, + "learning_rate": 1.801516555875172e-05, + "loss": 0.521652102470398, + "step": 957 + }, + { + "epoch": 0.2442631310555839, + "grad_norm": 0.4878775477409363, + "learning_rate": 1.8010119524160834e-05, + "loss": 0.5095280408859253, + "step": 958 + }, + { + "epoch": 0.24451810300866905, + "grad_norm": 0.49116265773773193, + "learning_rate": 1.8005067792045965e-05, + "loss": 0.4987580180168152, + "step": 959 + }, + { + "epoch": 0.2447730749617542, + "grad_norm": 0.4573895335197449, + "learning_rate": 1.8000010366000366e-05, + "loss": 0.5119349956512451, + "step": 960 + }, + { + "epoch": 0.24502804691483937, + "grad_norm": 0.5173366069793701, + "learning_rate": 1.7994947249621333e-05, + "loss": 0.5049844980239868, + "step": 961 + }, + { + "epoch": 0.24528301886792453, + "grad_norm": 0.48248374462127686, + "learning_rate": 1.7989878446510215e-05, + "loss": 0.505333423614502, + "step": 962 + }, + { + "epoch": 0.2455379908210097, + "grad_norm": 0.46847787499427795, + "learning_rate": 1.7984803960272405e-05, + "loss": 0.4996873736381531, + "step": 963 + }, + { + "epoch": 0.24579296277409485, + "grad_norm": 0.4857884645462036, + "learning_rate": 1.7979723794517338e-05, + "loss": 0.5094532370567322, + "step": 964 + }, + { + "epoch": 0.24604793472718, + "grad_norm": 0.47273755073547363, + "learning_rate": 1.7974637952858493e-05, + "loss": 0.5089906454086304, + "step": 965 + }, + { + "epoch": 0.24630290668026517, + "grad_norm": 0.48037955164909363, + "learning_rate": 1.796954643891337e-05, + "loss": 0.5090777277946472, + "step": 966 + }, + { + "epoch": 0.24655787863335032, + "grad_norm": 0.4806813597679138, + "learning_rate": 1.796444925630353e-05, + "loss": 0.5150277614593506, + "step": 967 + }, + { + "epoch": 0.24681285058643548, + "grad_norm": 0.47713831067085266, + "learning_rate": 1.7959346408654547e-05, + "loss": 0.5063602924346924, + "step": 968 + }, + { + "epoch": 0.24706782253952064, + "grad_norm": 0.463236927986145, + "learning_rate": 1.7954237899596027e-05, + "loss": 0.5151035189628601, + "step": 969 + }, + { + "epoch": 0.2473227944926058, + "grad_norm": 0.496243417263031, + "learning_rate": 1.7949123732761603e-05, + "loss": 0.5035381317138672, + "step": 970 + }, + { + "epoch": 0.24757776644569096, + "grad_norm": 0.5023394227027893, + "learning_rate": 1.7944003911788943e-05, + "loss": 0.5111215710639954, + "step": 971 + }, + { + "epoch": 0.24783273839877615, + "grad_norm": 0.48129037022590637, + "learning_rate": 1.7938878440319722e-05, + "loss": 0.5170261263847351, + "step": 972 + }, + { + "epoch": 0.2480877103518613, + "grad_norm": 0.493413507938385, + "learning_rate": 1.7933747321999642e-05, + "loss": 0.5111275911331177, + "step": 973 + }, + { + "epoch": 0.24834268230494647, + "grad_norm": 0.4867372214794159, + "learning_rate": 1.792861056047842e-05, + "loss": 0.5078480243682861, + "step": 974 + }, + { + "epoch": 0.24859765425803163, + "grad_norm": 0.509955883026123, + "learning_rate": 1.7923468159409786e-05, + "loss": 0.5027446150779724, + "step": 975 + }, + { + "epoch": 0.24885262621111678, + "grad_norm": 0.4628150463104248, + "learning_rate": 1.7918320122451486e-05, + "loss": 0.5124740600585938, + "step": 976 + }, + { + "epoch": 0.24910759816420194, + "grad_norm": 0.45736798644065857, + "learning_rate": 1.7913166453265264e-05, + "loss": 0.509383499622345, + "step": 977 + }, + { + "epoch": 0.2493625701172871, + "grad_norm": 0.5170514583587646, + "learning_rate": 1.7908007155516887e-05, + "loss": 0.5118707418441772, + "step": 978 + }, + { + "epoch": 0.24961754207037226, + "grad_norm": 0.4959959089756012, + "learning_rate": 1.7902842232876105e-05, + "loss": 0.5089007616043091, + "step": 979 + }, + { + "epoch": 0.24987251402345742, + "grad_norm": 0.4515939950942993, + "learning_rate": 1.789767168901669e-05, + "loss": 0.5101932883262634, + "step": 980 + }, + { + "epoch": 0.2501274859765426, + "grad_norm": 0.46624132990837097, + "learning_rate": 1.7892495527616397e-05, + "loss": 0.5061900615692139, + "step": 981 + }, + { + "epoch": 0.25038245792962777, + "grad_norm": 0.44490790367126465, + "learning_rate": 1.788731375235698e-05, + "loss": 0.5091356039047241, + "step": 982 + }, + { + "epoch": 0.2506374298827129, + "grad_norm": 0.4683645963668823, + "learning_rate": 1.788212636692419e-05, + "loss": 0.5200745463371277, + "step": 983 + }, + { + "epoch": 0.2508924018357981, + "grad_norm": 0.4631229639053345, + "learning_rate": 1.787693337500777e-05, + "loss": 0.5115489363670349, + "step": 984 + }, + { + "epoch": 0.25114737378888324, + "grad_norm": 0.47554415464401245, + "learning_rate": 1.787173478030144e-05, + "loss": 0.5071451663970947, + "step": 985 + }, + { + "epoch": 0.2514023457419684, + "grad_norm": 0.45346564054489136, + "learning_rate": 1.7866530586502914e-05, + "loss": 0.5114556550979614, + "step": 986 + }, + { + "epoch": 0.25165731769505356, + "grad_norm": 0.4637988209724426, + "learning_rate": 1.786132079731389e-05, + "loss": 0.5111753344535828, + "step": 987 + }, + { + "epoch": 0.2519122896481387, + "grad_norm": 0.4647560119628906, + "learning_rate": 1.7856105416440046e-05, + "loss": 0.5089646577835083, + "step": 988 + }, + { + "epoch": 0.2521672616012239, + "grad_norm": 0.47878068685531616, + "learning_rate": 1.7850884447591025e-05, + "loss": 0.5066558122634888, + "step": 989 + }, + { + "epoch": 0.25242223355430904, + "grad_norm": 0.4984032213687897, + "learning_rate": 1.784565789448046e-05, + "loss": 0.5191236734390259, + "step": 990 + }, + { + "epoch": 0.2526772055073942, + "grad_norm": 0.4744832515716553, + "learning_rate": 1.784042576082595e-05, + "loss": 0.48514270782470703, + "step": 991 + }, + { + "epoch": 0.25293217746047936, + "grad_norm": 0.4698421061038971, + "learning_rate": 1.7835188050349064e-05, + "loss": 0.5130078196525574, + "step": 992 + }, + { + "epoch": 0.2531871494135645, + "grad_norm": 0.4446653127670288, + "learning_rate": 1.7829944766775333e-05, + "loss": 0.5099542140960693, + "step": 993 + }, + { + "epoch": 0.2534421213666497, + "grad_norm": 0.48649677634239197, + "learning_rate": 1.7824695913834257e-05, + "loss": 0.5056322813034058, + "step": 994 + }, + { + "epoch": 0.25369709331973483, + "grad_norm": 0.45543864369392395, + "learning_rate": 1.7819441495259297e-05, + "loss": 0.5074928998947144, + "step": 995 + }, + { + "epoch": 0.25395206527282, + "grad_norm": 0.4768550992012024, + "learning_rate": 1.7814181514787873e-05, + "loss": 0.5242055058479309, + "step": 996 + }, + { + "epoch": 0.25420703722590515, + "grad_norm": 0.48187726736068726, + "learning_rate": 1.7808915976161364e-05, + "loss": 0.5093708634376526, + "step": 997 + }, + { + "epoch": 0.2544620091789903, + "grad_norm": 0.4623137414455414, + "learning_rate": 1.7803644883125093e-05, + "loss": 0.49990314245224, + "step": 998 + }, + { + "epoch": 0.25471698113207547, + "grad_norm": 0.437720388174057, + "learning_rate": 1.779836823942834e-05, + "loss": 0.50358647108078, + "step": 999 + }, + { + "epoch": 0.25497195308516063, + "grad_norm": 0.4898304045200348, + "learning_rate": 1.779308604882434e-05, + "loss": 0.5073832869529724, + "step": 1000 + }, + { + "epoch": 0.2552269250382458, + "grad_norm": 0.4843921363353729, + "learning_rate": 1.7787798315070263e-05, + "loss": 0.494332492351532, + "step": 1001 + }, + { + "epoch": 0.25548189699133095, + "grad_norm": 0.48126739263534546, + "learning_rate": 1.7782505041927218e-05, + "loss": 0.5016703009605408, + "step": 1002 + }, + { + "epoch": 0.2557368689444161, + "grad_norm": 0.4517021179199219, + "learning_rate": 1.777720623316027e-05, + "loss": 0.49795588850975037, + "step": 1003 + }, + { + "epoch": 0.25599184089750127, + "grad_norm": 0.49286332726478577, + "learning_rate": 1.777190189253841e-05, + "loss": 0.4952079951763153, + "step": 1004 + }, + { + "epoch": 0.2562468128505864, + "grad_norm": 0.4811077117919922, + "learning_rate": 1.7766592023834567e-05, + "loss": 0.5128995776176453, + "step": 1005 + }, + { + "epoch": 0.2565017848036716, + "grad_norm": 0.4894014000892639, + "learning_rate": 1.77612766308256e-05, + "loss": 0.5113130807876587, + "step": 1006 + }, + { + "epoch": 0.25675675675675674, + "grad_norm": 0.47017577290534973, + "learning_rate": 1.77559557172923e-05, + "loss": 0.5148601531982422, + "step": 1007 + }, + { + "epoch": 0.2570117287098419, + "grad_norm": 0.4653587341308594, + "learning_rate": 1.775062928701938e-05, + "loss": 0.49841389060020447, + "step": 1008 + }, + { + "epoch": 0.25726670066292706, + "grad_norm": 0.5072566270828247, + "learning_rate": 1.7745297343795487e-05, + "loss": 0.5057612657546997, + "step": 1009 + }, + { + "epoch": 0.2575216726160122, + "grad_norm": 0.48639681935310364, + "learning_rate": 1.7739959891413175e-05, + "loss": 0.5132037997245789, + "step": 1010 + }, + { + "epoch": 0.2577766445690974, + "grad_norm": 0.4910087287425995, + "learning_rate": 1.7734616933668936e-05, + "loss": 0.5040243864059448, + "step": 1011 + }, + { + "epoch": 0.25803161652218254, + "grad_norm": 0.4876771569252014, + "learning_rate": 1.7729268474363156e-05, + "loss": 0.5224146842956543, + "step": 1012 + }, + { + "epoch": 0.2582865884752677, + "grad_norm": 0.4741784930229187, + "learning_rate": 1.772391451730015e-05, + "loss": 0.49925094842910767, + "step": 1013 + }, + { + "epoch": 0.25854156042835286, + "grad_norm": 0.48314356803894043, + "learning_rate": 1.7718555066288138e-05, + "loss": 0.4966428279876709, + "step": 1014 + }, + { + "epoch": 0.258796532381438, + "grad_norm": 0.48094186186790466, + "learning_rate": 1.7713190125139243e-05, + "loss": 0.5235569477081299, + "step": 1015 + }, + { + "epoch": 0.2590515043345232, + "grad_norm": 0.47423312067985535, + "learning_rate": 1.7707819697669504e-05, + "loss": 0.5283186435699463, + "step": 1016 + }, + { + "epoch": 0.2593064762876084, + "grad_norm": 0.49974769353866577, + "learning_rate": 1.7702443787698855e-05, + "loss": 0.5045661926269531, + "step": 1017 + }, + { + "epoch": 0.25956144824069355, + "grad_norm": 0.4792061448097229, + "learning_rate": 1.7697062399051126e-05, + "loss": 0.5071189999580383, + "step": 1018 + }, + { + "epoch": 0.2598164201937787, + "grad_norm": 0.47046658396720886, + "learning_rate": 1.7691675535554057e-05, + "loss": 0.5016986727714539, + "step": 1019 + }, + { + "epoch": 0.26007139214686387, + "grad_norm": 0.540090024471283, + "learning_rate": 1.768628320103927e-05, + "loss": 0.5116336345672607, + "step": 1020 + }, + { + "epoch": 0.260326364099949, + "grad_norm": 0.47801148891448975, + "learning_rate": 1.7680885399342283e-05, + "loss": 0.5074282884597778, + "step": 1021 + }, + { + "epoch": 0.2605813360530342, + "grad_norm": 0.4674408435821533, + "learning_rate": 1.7675482134302503e-05, + "loss": 0.5008707046508789, + "step": 1022 + }, + { + "epoch": 0.26083630800611934, + "grad_norm": 0.4926578104496002, + "learning_rate": 1.7670073409763218e-05, + "loss": 0.5130398273468018, + "step": 1023 + }, + { + "epoch": 0.2610912799592045, + "grad_norm": 0.49176183342933655, + "learning_rate": 1.7664659229571612e-05, + "loss": 0.509769082069397, + "step": 1024 + }, + { + "epoch": 0.26134625191228966, + "grad_norm": 0.49057114124298096, + "learning_rate": 1.7659239597578738e-05, + "loss": 0.49287480115890503, + "step": 1025 + }, + { + "epoch": 0.2616012238653748, + "grad_norm": 0.46402209997177124, + "learning_rate": 1.7653814517639525e-05, + "loss": 0.516800045967102, + "step": 1026 + }, + { + "epoch": 0.26185619581846, + "grad_norm": 0.5012035965919495, + "learning_rate": 1.764838399361279e-05, + "loss": 0.5082567930221558, + "step": 1027 + }, + { + "epoch": 0.26211116777154514, + "grad_norm": 0.47564834356307983, + "learning_rate": 1.764294802936121e-05, + "loss": 0.5159832835197449, + "step": 1028 + }, + { + "epoch": 0.2623661397246303, + "grad_norm": 0.4816213548183441, + "learning_rate": 1.7637506628751335e-05, + "loss": 0.5017706155776978, + "step": 1029 + }, + { + "epoch": 0.26262111167771546, + "grad_norm": 0.4737376868724823, + "learning_rate": 1.763205979565359e-05, + "loss": 0.501492977142334, + "step": 1030 + }, + { + "epoch": 0.2628760836308006, + "grad_norm": 0.49498605728149414, + "learning_rate": 1.762660753394225e-05, + "loss": 0.5140003561973572, + "step": 1031 + }, + { + "epoch": 0.2631310555838858, + "grad_norm": 0.46646806597709656, + "learning_rate": 1.7621149847495458e-05, + "loss": 0.5102156400680542, + "step": 1032 + }, + { + "epoch": 0.26338602753697093, + "grad_norm": 0.4754180908203125, + "learning_rate": 1.761568674019522e-05, + "loss": 0.49969398975372314, + "step": 1033 + }, + { + "epoch": 0.2636409994900561, + "grad_norm": 0.46612799167633057, + "learning_rate": 1.7610218215927393e-05, + "loss": 0.5041083097457886, + "step": 1034 + }, + { + "epoch": 0.26389597144314125, + "grad_norm": 0.46223193407058716, + "learning_rate": 1.760474427858169e-05, + "loss": 0.488689661026001, + "step": 1035 + }, + { + "epoch": 0.2641509433962264, + "grad_norm": 0.4570687711238861, + "learning_rate": 1.7599264932051664e-05, + "loss": 0.4999356269836426, + "step": 1036 + }, + { + "epoch": 0.26440591534931157, + "grad_norm": 0.4681054949760437, + "learning_rate": 1.759378018023473e-05, + "loss": 0.504220187664032, + "step": 1037 + }, + { + "epoch": 0.26466088730239673, + "grad_norm": 0.48575031757354736, + "learning_rate": 1.7588290027032137e-05, + "loss": 0.5085967779159546, + "step": 1038 + }, + { + "epoch": 0.2649158592554819, + "grad_norm": 0.4551243185997009, + "learning_rate": 1.758279447634899e-05, + "loss": 0.5101237297058105, + "step": 1039 + }, + { + "epoch": 0.26517083120856705, + "grad_norm": 0.455987811088562, + "learning_rate": 1.757729353209421e-05, + "loss": 0.5142706632614136, + "step": 1040 + }, + { + "epoch": 0.2654258031616522, + "grad_norm": 0.47160667181015015, + "learning_rate": 1.7571787198180578e-05, + "loss": 0.5077152252197266, + "step": 1041 + }, + { + "epoch": 0.26568077511473737, + "grad_norm": 0.45185431838035583, + "learning_rate": 1.7566275478524694e-05, + "loss": 0.5094373226165771, + "step": 1042 + }, + { + "epoch": 0.2659357470678225, + "grad_norm": 0.4466405212879181, + "learning_rate": 1.7560758377046994e-05, + "loss": 0.4995154142379761, + "step": 1043 + }, + { + "epoch": 0.2661907190209077, + "grad_norm": 0.4542228877544403, + "learning_rate": 1.755523589767174e-05, + "loss": 0.5100736618041992, + "step": 1044 + }, + { + "epoch": 0.26644569097399284, + "grad_norm": 0.4614347517490387, + "learning_rate": 1.7549708044327024e-05, + "loss": 0.5141627788543701, + "step": 1045 + }, + { + "epoch": 0.266700662927078, + "grad_norm": 0.44910943508148193, + "learning_rate": 1.7544174820944752e-05, + "loss": 0.5136409997940063, + "step": 1046 + }, + { + "epoch": 0.26695563488016316, + "grad_norm": 0.46518927812576294, + "learning_rate": 1.753863623146066e-05, + "loss": 0.4939352869987488, + "step": 1047 + }, + { + "epoch": 0.2672106068332483, + "grad_norm": 0.4545368552207947, + "learning_rate": 1.753309227981429e-05, + "loss": 0.5089648962020874, + "step": 1048 + }, + { + "epoch": 0.2674655787863335, + "grad_norm": 0.4595126509666443, + "learning_rate": 1.7527542969949008e-05, + "loss": 0.5027369260787964, + "step": 1049 + }, + { + "epoch": 0.26772055073941864, + "grad_norm": 0.45682191848754883, + "learning_rate": 1.7521988305811986e-05, + "loss": 0.5061858892440796, + "step": 1050 + }, + { + "epoch": 0.2679755226925038, + "grad_norm": 0.5684783458709717, + "learning_rate": 1.7516428291354205e-05, + "loss": 0.4948621690273285, + "step": 1051 + }, + { + "epoch": 0.268230494645589, + "grad_norm": 0.4799656271934509, + "learning_rate": 1.751086293053045e-05, + "loss": 0.5029177665710449, + "step": 1052 + }, + { + "epoch": 0.26848546659867417, + "grad_norm": 0.48182642459869385, + "learning_rate": 1.7505292227299314e-05, + "loss": 0.5024133920669556, + "step": 1053 + }, + { + "epoch": 0.26874043855175933, + "grad_norm": 0.4441515803337097, + "learning_rate": 1.7499716185623183e-05, + "loss": 0.5072953104972839, + "step": 1054 + }, + { + "epoch": 0.2689954105048445, + "grad_norm": 0.46608737111091614, + "learning_rate": 1.7494134809468248e-05, + "loss": 0.5079753398895264, + "step": 1055 + }, + { + "epoch": 0.26925038245792965, + "grad_norm": 0.47361519932746887, + "learning_rate": 1.7488548102804482e-05, + "loss": 0.5090345144271851, + "step": 1056 + }, + { + "epoch": 0.2695053544110148, + "grad_norm": 0.5031419992446899, + "learning_rate": 1.748295606960567e-05, + "loss": 0.4993833899497986, + "step": 1057 + }, + { + "epoch": 0.26976032636409997, + "grad_norm": 0.46434053778648376, + "learning_rate": 1.7477358713849368e-05, + "loss": 0.5092716813087463, + "step": 1058 + }, + { + "epoch": 0.2700152983171851, + "grad_norm": 0.4608798027038574, + "learning_rate": 1.7471756039516917e-05, + "loss": 0.5093937516212463, + "step": 1059 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.47563883662223816, + "learning_rate": 1.7466148050593458e-05, + "loss": 0.5046992897987366, + "step": 1060 + }, + { + "epoch": 0.27052524222335544, + "grad_norm": 0.5088484287261963, + "learning_rate": 1.74605347510679e-05, + "loss": 0.509278416633606, + "step": 1061 + }, + { + "epoch": 0.2707802141764406, + "grad_norm": 0.4759383797645569, + "learning_rate": 1.7454916144932923e-05, + "loss": 0.500342845916748, + "step": 1062 + }, + { + "epoch": 0.27103518612952576, + "grad_norm": 0.4903341233730316, + "learning_rate": 1.7449292236184994e-05, + "loss": 0.5136889219284058, + "step": 1063 + }, + { + "epoch": 0.2712901580826109, + "grad_norm": 0.4696923494338989, + "learning_rate": 1.744366302882435e-05, + "loss": 0.5082424283027649, + "step": 1064 + }, + { + "epoch": 0.2715451300356961, + "grad_norm": 0.46577882766723633, + "learning_rate": 1.7438028526854993e-05, + "loss": 0.5139687657356262, + "step": 1065 + }, + { + "epoch": 0.27180010198878124, + "grad_norm": 0.49244529008865356, + "learning_rate": 1.743238873428469e-05, + "loss": 0.5190452933311462, + "step": 1066 + }, + { + "epoch": 0.2720550739418664, + "grad_norm": 0.4551467001438141, + "learning_rate": 1.7426743655124973e-05, + "loss": 0.5102889537811279, + "step": 1067 + }, + { + "epoch": 0.27231004589495156, + "grad_norm": 0.46808892488479614, + "learning_rate": 1.742109329339114e-05, + "loss": 0.49744510650634766, + "step": 1068 + }, + { + "epoch": 0.2725650178480367, + "grad_norm": 0.4782489836215973, + "learning_rate": 1.7415437653102237e-05, + "loss": 0.502158522605896, + "step": 1069 + }, + { + "epoch": 0.2728199898011219, + "grad_norm": 0.5074446201324463, + "learning_rate": 1.7409776738281067e-05, + "loss": 0.5154489278793335, + "step": 1070 + }, + { + "epoch": 0.27307496175420704, + "grad_norm": 0.4884415566921234, + "learning_rate": 1.740411055295419e-05, + "loss": 0.5210711359977722, + "step": 1071 + }, + { + "epoch": 0.2733299337072922, + "grad_norm": 0.4918287694454193, + "learning_rate": 1.7398439101151908e-05, + "loss": 0.49791979789733887, + "step": 1072 + }, + { + "epoch": 0.27358490566037735, + "grad_norm": 0.5024614334106445, + "learning_rate": 1.7392762386908275e-05, + "loss": 0.5004453063011169, + "step": 1073 + }, + { + "epoch": 0.2738398776134625, + "grad_norm": 0.4789541959762573, + "learning_rate": 1.7387080414261084e-05, + "loss": 0.49946314096450806, + "step": 1074 + }, + { + "epoch": 0.27409484956654767, + "grad_norm": 0.5202962160110474, + "learning_rate": 1.738139318725187e-05, + "loss": 0.5039060711860657, + "step": 1075 + }, + { + "epoch": 0.27434982151963283, + "grad_norm": 0.4789374768733978, + "learning_rate": 1.7375700709925903e-05, + "loss": 0.5128728151321411, + "step": 1076 + }, + { + "epoch": 0.274604793472718, + "grad_norm": 0.46692997217178345, + "learning_rate": 1.7370002986332195e-05, + "loss": 0.5212869644165039, + "step": 1077 + }, + { + "epoch": 0.27485976542580315, + "grad_norm": 0.45039069652557373, + "learning_rate": 1.7364300020523475e-05, + "loss": 0.5023295879364014, + "step": 1078 + }, + { + "epoch": 0.2751147373788883, + "grad_norm": 0.44396907091140747, + "learning_rate": 1.7358591816556223e-05, + "loss": 0.5003005266189575, + "step": 1079 + }, + { + "epoch": 0.27536970933197347, + "grad_norm": 0.5632413625717163, + "learning_rate": 1.7352878378490622e-05, + "loss": 0.4984963834285736, + "step": 1080 + }, + { + "epoch": 0.2756246812850586, + "grad_norm": 0.4821857810020447, + "learning_rate": 1.7347159710390585e-05, + "loss": 0.5095998048782349, + "step": 1081 + }, + { + "epoch": 0.2758796532381438, + "grad_norm": 0.509071409702301, + "learning_rate": 1.7341435816323756e-05, + "loss": 0.5028913617134094, + "step": 1082 + }, + { + "epoch": 0.27613462519122894, + "grad_norm": 0.4965466558933258, + "learning_rate": 1.7335706700361488e-05, + "loss": 0.5158098936080933, + "step": 1083 + }, + { + "epoch": 0.2763895971443141, + "grad_norm": 0.4649892747402191, + "learning_rate": 1.732997236657884e-05, + "loss": 0.5070985555648804, + "step": 1084 + }, + { + "epoch": 0.27664456909739926, + "grad_norm": 0.46375176310539246, + "learning_rate": 1.73242328190546e-05, + "loss": 0.5019338130950928, + "step": 1085 + }, + { + "epoch": 0.2768995410504844, + "grad_norm": 0.4885120689868927, + "learning_rate": 1.7318488061871245e-05, + "loss": 0.5054546594619751, + "step": 1086 + }, + { + "epoch": 0.2771545130035696, + "grad_norm": 0.47661104798316956, + "learning_rate": 1.7312738099114973e-05, + "loss": 0.49401187896728516, + "step": 1087 + }, + { + "epoch": 0.2774094849566548, + "grad_norm": 0.4707930088043213, + "learning_rate": 1.7306982934875684e-05, + "loss": 0.5095722079277039, + "step": 1088 + }, + { + "epoch": 0.27766445690973995, + "grad_norm": 0.45717406272888184, + "learning_rate": 1.730122257324697e-05, + "loss": 0.5120978951454163, + "step": 1089 + }, + { + "epoch": 0.2779194288628251, + "grad_norm": 0.46347159147262573, + "learning_rate": 1.7295457018326117e-05, + "loss": 0.4990503191947937, + "step": 1090 + }, + { + "epoch": 0.27817440081591027, + "grad_norm": 0.4729735255241394, + "learning_rate": 1.7289686274214116e-05, + "loss": 0.5116077661514282, + "step": 1091 + }, + { + "epoch": 0.27842937276899543, + "grad_norm": 0.4576144516468048, + "learning_rate": 1.7283910345015645e-05, + "loss": 0.5087347030639648, + "step": 1092 + }, + { + "epoch": 0.2786843447220806, + "grad_norm": 0.4772886335849762, + "learning_rate": 1.727812923483907e-05, + "loss": 0.5092043280601501, + "step": 1093 + }, + { + "epoch": 0.27893931667516575, + "grad_norm": 0.4713665246963501, + "learning_rate": 1.7272342947796433e-05, + "loss": 0.5062289834022522, + "step": 1094 + }, + { + "epoch": 0.2791942886282509, + "grad_norm": 0.4943859875202179, + "learning_rate": 1.7266551488003478e-05, + "loss": 0.5019166469573975, + "step": 1095 + }, + { + "epoch": 0.27944926058133607, + "grad_norm": 0.49818283319473267, + "learning_rate": 1.726075485957961e-05, + "loss": 0.5120074152946472, + "step": 1096 + }, + { + "epoch": 0.2797042325344212, + "grad_norm": 0.4867881238460541, + "learning_rate": 1.7254953066647915e-05, + "loss": 0.4957153797149658, + "step": 1097 + }, + { + "epoch": 0.2799592044875064, + "grad_norm": 0.49564328789711, + "learning_rate": 1.724914611333516e-05, + "loss": 0.4979622960090637, + "step": 1098 + }, + { + "epoch": 0.28021417644059154, + "grad_norm": 0.47495225071907043, + "learning_rate": 1.7243334003771778e-05, + "loss": 0.4982151985168457, + "step": 1099 + }, + { + "epoch": 0.2804691483936767, + "grad_norm": 0.45300769805908203, + "learning_rate": 1.7237516742091863e-05, + "loss": 0.5014975070953369, + "step": 1100 + }, + { + "epoch": 0.28072412034676186, + "grad_norm": 0.44557687640190125, + "learning_rate": 1.7231694332433184e-05, + "loss": 0.4973270297050476, + "step": 1101 + }, + { + "epoch": 0.280979092299847, + "grad_norm": 0.4794977009296417, + "learning_rate": 1.7225866778937168e-05, + "loss": 0.5073505640029907, + "step": 1102 + }, + { + "epoch": 0.2812340642529322, + "grad_norm": 0.4475419819355011, + "learning_rate": 1.722003408574889e-05, + "loss": 0.5049219131469727, + "step": 1103 + }, + { + "epoch": 0.28148903620601734, + "grad_norm": 0.4797416925430298, + "learning_rate": 1.7214196257017106e-05, + "loss": 0.5003278851509094, + "step": 1104 + }, + { + "epoch": 0.2817440081591025, + "grad_norm": 0.47968360781669617, + "learning_rate": 1.72083532968942e-05, + "loss": 0.5042279958724976, + "step": 1105 + }, + { + "epoch": 0.28199898011218766, + "grad_norm": 0.4770395755767822, + "learning_rate": 1.720250520953622e-05, + "loss": 0.5177443623542786, + "step": 1106 + }, + { + "epoch": 0.2822539520652728, + "grad_norm": 0.4603266417980194, + "learning_rate": 1.719665199910285e-05, + "loss": 0.5057562589645386, + "step": 1107 + }, + { + "epoch": 0.282508924018358, + "grad_norm": 0.46381697058677673, + "learning_rate": 1.7190793669757427e-05, + "loss": 0.5043715238571167, + "step": 1108 + }, + { + "epoch": 0.28276389597144314, + "grad_norm": 0.4711867868900299, + "learning_rate": 1.7184930225666932e-05, + "loss": 0.5236672163009644, + "step": 1109 + }, + { + "epoch": 0.2830188679245283, + "grad_norm": 0.4568409025669098, + "learning_rate": 1.7179061671001974e-05, + "loss": 0.5128495097160339, + "step": 1110 + }, + { + "epoch": 0.28327383987761345, + "grad_norm": 0.45788177847862244, + "learning_rate": 1.71731880099368e-05, + "loss": 0.5006764531135559, + "step": 1111 + }, + { + "epoch": 0.2835288118306986, + "grad_norm": 0.44360288977622986, + "learning_rate": 1.7167309246649298e-05, + "loss": 0.49972233176231384, + "step": 1112 + }, + { + "epoch": 0.28378378378378377, + "grad_norm": 0.4582159221172333, + "learning_rate": 1.716142538532097e-05, + "loss": 0.5080081224441528, + "step": 1113 + }, + { + "epoch": 0.28403875573686893, + "grad_norm": 0.45554617047309875, + "learning_rate": 1.7155536430136958e-05, + "loss": 0.5192968845367432, + "step": 1114 + }, + { + "epoch": 0.2842937276899541, + "grad_norm": 0.46286189556121826, + "learning_rate": 1.7149642385286015e-05, + "loss": 0.5007770657539368, + "step": 1115 + }, + { + "epoch": 0.28454869964303925, + "grad_norm": 0.45503437519073486, + "learning_rate": 1.7143743254960526e-05, + "loss": 0.49312448501586914, + "step": 1116 + }, + { + "epoch": 0.2848036715961244, + "grad_norm": 0.4485381841659546, + "learning_rate": 1.7137839043356486e-05, + "loss": 0.5041566491127014, + "step": 1117 + }, + { + "epoch": 0.28505864354920957, + "grad_norm": 0.5287951231002808, + "learning_rate": 1.7131929754673505e-05, + "loss": 0.4854404926300049, + "step": 1118 + }, + { + "epoch": 0.2853136155022947, + "grad_norm": 0.48092010617256165, + "learning_rate": 1.712601539311481e-05, + "loss": 0.5001811981201172, + "step": 1119 + }, + { + "epoch": 0.2855685874553799, + "grad_norm": 0.4755527377128601, + "learning_rate": 1.7120095962887226e-05, + "loss": 0.5057350397109985, + "step": 1120 + }, + { + "epoch": 0.28582355940846504, + "grad_norm": 0.5042078495025635, + "learning_rate": 1.7114171468201192e-05, + "loss": 0.5058793425559998, + "step": 1121 + }, + { + "epoch": 0.2860785313615502, + "grad_norm": 0.46610692143440247, + "learning_rate": 1.710824191327075e-05, + "loss": 0.5130771398544312, + "step": 1122 + }, + { + "epoch": 0.28633350331463536, + "grad_norm": 0.4715431332588196, + "learning_rate": 1.710230730231353e-05, + "loss": 0.5075538158416748, + "step": 1123 + }, + { + "epoch": 0.2865884752677206, + "grad_norm": 0.4927000403404236, + "learning_rate": 1.7096367639550767e-05, + "loss": 0.5113017559051514, + "step": 1124 + }, + { + "epoch": 0.28684344722080574, + "grad_norm": 0.4718981683254242, + "learning_rate": 1.7090422929207295e-05, + "loss": 0.49854955077171326, + "step": 1125 + }, + { + "epoch": 0.2870984191738909, + "grad_norm": 0.49637195467948914, + "learning_rate": 1.7084473175511527e-05, + "loss": 0.4942783713340759, + "step": 1126 + }, + { + "epoch": 0.28735339112697605, + "grad_norm": 0.48915430903434753, + "learning_rate": 1.7078518382695467e-05, + "loss": 0.49409493803977966, + "step": 1127 + }, + { + "epoch": 0.2876083630800612, + "grad_norm": 0.5150572061538696, + "learning_rate": 1.7072558554994705e-05, + "loss": 0.5223121047019958, + "step": 1128 + }, + { + "epoch": 0.2878633350331464, + "grad_norm": 0.4622846841812134, + "learning_rate": 1.7066593696648408e-05, + "loss": 0.5007497668266296, + "step": 1129 + }, + { + "epoch": 0.28811830698623153, + "grad_norm": 0.48057541251182556, + "learning_rate": 1.7060623811899332e-05, + "loss": 0.49329742789268494, + "step": 1130 + }, + { + "epoch": 0.2883732789393167, + "grad_norm": 0.47378435730934143, + "learning_rate": 1.7054648904993795e-05, + "loss": 0.5099858045578003, + "step": 1131 + }, + { + "epoch": 0.28862825089240185, + "grad_norm": 0.4834018647670746, + "learning_rate": 1.7048668980181696e-05, + "loss": 0.5121385455131531, + "step": 1132 + }, + { + "epoch": 0.288883222845487, + "grad_norm": 0.47347232699394226, + "learning_rate": 1.70426840417165e-05, + "loss": 0.5076555609703064, + "step": 1133 + }, + { + "epoch": 0.28913819479857217, + "grad_norm": 0.48978927731513977, + "learning_rate": 1.7036694093855238e-05, + "loss": 0.5149083137512207, + "step": 1134 + }, + { + "epoch": 0.2893931667516573, + "grad_norm": 0.4732295572757721, + "learning_rate": 1.7030699140858503e-05, + "loss": 0.49698835611343384, + "step": 1135 + }, + { + "epoch": 0.2896481387047425, + "grad_norm": 0.4847556948661804, + "learning_rate": 1.702469918699045e-05, + "loss": 0.5097410678863525, + "step": 1136 + }, + { + "epoch": 0.28990311065782765, + "grad_norm": 0.45591381192207336, + "learning_rate": 1.7018694236518792e-05, + "loss": 0.5003314018249512, + "step": 1137 + }, + { + "epoch": 0.2901580826109128, + "grad_norm": 0.49350666999816895, + "learning_rate": 1.7012684293714797e-05, + "loss": 0.5001873970031738, + "step": 1138 + }, + { + "epoch": 0.29041305456399796, + "grad_norm": 0.467124879360199, + "learning_rate": 1.7006669362853275e-05, + "loss": 0.48504096269607544, + "step": 1139 + }, + { + "epoch": 0.2906680265170831, + "grad_norm": 0.45942822098731995, + "learning_rate": 1.7000649448212595e-05, + "loss": 0.49284881353378296, + "step": 1140 + }, + { + "epoch": 0.2909229984701683, + "grad_norm": 0.4662016034126282, + "learning_rate": 1.6994624554074667e-05, + "loss": 0.49966710805892944, + "step": 1141 + }, + { + "epoch": 0.29117797042325344, + "grad_norm": 0.46195200085639954, + "learning_rate": 1.6988594684724947e-05, + "loss": 0.49967098236083984, + "step": 1142 + }, + { + "epoch": 0.2914329423763386, + "grad_norm": 0.4631154537200928, + "learning_rate": 1.6982559844452414e-05, + "loss": 0.4934503436088562, + "step": 1143 + }, + { + "epoch": 0.29168791432942376, + "grad_norm": 0.46881651878356934, + "learning_rate": 1.69765200375496e-05, + "loss": 0.4958922863006592, + "step": 1144 + }, + { + "epoch": 0.2919428862825089, + "grad_norm": 0.47307780385017395, + "learning_rate": 1.6970475268312568e-05, + "loss": 0.49355167150497437, + "step": 1145 + }, + { + "epoch": 0.2921978582355941, + "grad_norm": 0.48002174496650696, + "learning_rate": 1.6964425541040904e-05, + "loss": 0.4948916435241699, + "step": 1146 + }, + { + "epoch": 0.29245283018867924, + "grad_norm": 0.45839452743530273, + "learning_rate": 1.695837086003772e-05, + "loss": 0.5025448203086853, + "step": 1147 + }, + { + "epoch": 0.2927078021417644, + "grad_norm": 0.4786364436149597, + "learning_rate": 1.6952311229609656e-05, + "loss": 0.49452894926071167, + "step": 1148 + }, + { + "epoch": 0.29296277409484955, + "grad_norm": 0.5036335587501526, + "learning_rate": 1.694624665406687e-05, + "loss": 0.4942960739135742, + "step": 1149 + }, + { + "epoch": 0.2932177460479347, + "grad_norm": 0.47374340891838074, + "learning_rate": 1.6940177137723046e-05, + "loss": 0.48950761556625366, + "step": 1150 + }, + { + "epoch": 0.29347271800101987, + "grad_norm": 0.4503117799758911, + "learning_rate": 1.693410268489536e-05, + "loss": 0.4947274327278137, + "step": 1151 + }, + { + "epoch": 0.29372768995410503, + "grad_norm": 0.49066653847694397, + "learning_rate": 1.692802329990453e-05, + "loss": 0.5052638053894043, + "step": 1152 + }, + { + "epoch": 0.2939826619071902, + "grad_norm": 0.5150389671325684, + "learning_rate": 1.6921938987074756e-05, + "loss": 0.5081547498703003, + "step": 1153 + }, + { + "epoch": 0.29423763386027535, + "grad_norm": 0.4666706621646881, + "learning_rate": 1.691584975073376e-05, + "loss": 0.5027748346328735, + "step": 1154 + }, + { + "epoch": 0.2944926058133605, + "grad_norm": 0.5015774369239807, + "learning_rate": 1.6909755595212755e-05, + "loss": 0.5036760568618774, + "step": 1155 + }, + { + "epoch": 0.29474757776644567, + "grad_norm": 0.472493439912796, + "learning_rate": 1.6903656524846458e-05, + "loss": 0.5006581544876099, + "step": 1156 + }, + { + "epoch": 0.2950025497195308, + "grad_norm": 0.5009905099868774, + "learning_rate": 1.6897552543973084e-05, + "loss": 0.5129563212394714, + "step": 1157 + }, + { + "epoch": 0.295257521672616, + "grad_norm": 0.46842071413993835, + "learning_rate": 1.6891443656934336e-05, + "loss": 0.4934506416320801, + "step": 1158 + }, + { + "epoch": 0.2955124936257012, + "grad_norm": 0.46922436356544495, + "learning_rate": 1.688532986807541e-05, + "loss": 0.5039920806884766, + "step": 1159 + }, + { + "epoch": 0.29576746557878636, + "grad_norm": 0.4793666899204254, + "learning_rate": 1.6879211181744986e-05, + "loss": 0.4972963333129883, + "step": 1160 + }, + { + "epoch": 0.2960224375318715, + "grad_norm": 0.47085341811180115, + "learning_rate": 1.6873087602295232e-05, + "loss": 0.5129951238632202, + "step": 1161 + }, + { + "epoch": 0.2962774094849567, + "grad_norm": 0.45663273334503174, + "learning_rate": 1.686695913408179e-05, + "loss": 0.4943583607673645, + "step": 1162 + }, + { + "epoch": 0.29653238143804184, + "grad_norm": 0.5101933479309082, + "learning_rate": 1.6860825781463783e-05, + "loss": 0.49064767360687256, + "step": 1163 + }, + { + "epoch": 0.296787353391127, + "grad_norm": 0.5012008547782898, + "learning_rate": 1.685468754880381e-05, + "loss": 0.49900519847869873, + "step": 1164 + }, + { + "epoch": 0.29704232534421215, + "grad_norm": 0.4591823220252991, + "learning_rate": 1.684854444046794e-05, + "loss": 0.49930524826049805, + "step": 1165 + }, + { + "epoch": 0.2972972972972973, + "grad_norm": 0.48761099576950073, + "learning_rate": 1.6842396460825706e-05, + "loss": 0.4923129975795746, + "step": 1166 + }, + { + "epoch": 0.2975522692503825, + "grad_norm": 0.45887067914009094, + "learning_rate": 1.6836243614250113e-05, + "loss": 0.5167244672775269, + "step": 1167 + }, + { + "epoch": 0.29780724120346763, + "grad_norm": 0.48813778162002563, + "learning_rate": 1.683008590511762e-05, + "loss": 0.4933728575706482, + "step": 1168 + }, + { + "epoch": 0.2980622131565528, + "grad_norm": 0.4750381410121918, + "learning_rate": 1.682392333780815e-05, + "loss": 0.4995809495449066, + "step": 1169 + }, + { + "epoch": 0.29831718510963795, + "grad_norm": 0.47015345096588135, + "learning_rate": 1.6817755916705084e-05, + "loss": 0.49063003063201904, + "step": 1170 + }, + { + "epoch": 0.2985721570627231, + "grad_norm": 0.47086119651794434, + "learning_rate": 1.6811583646195242e-05, + "loss": 0.5013208985328674, + "step": 1171 + }, + { + "epoch": 0.29882712901580827, + "grad_norm": 0.48746731877326965, + "learning_rate": 1.680540653066891e-05, + "loss": 0.5028801560401917, + "step": 1172 + }, + { + "epoch": 0.2990821009688934, + "grad_norm": 0.4941359758377075, + "learning_rate": 1.6799224574519818e-05, + "loss": 0.5044407844543457, + "step": 1173 + }, + { + "epoch": 0.2993370729219786, + "grad_norm": 0.47060495615005493, + "learning_rate": 1.6793037782145122e-05, + "loss": 0.5153166055679321, + "step": 1174 + }, + { + "epoch": 0.29959204487506375, + "grad_norm": 0.4782000482082367, + "learning_rate": 1.6786846157945437e-05, + "loss": 0.49174898862838745, + "step": 1175 + }, + { + "epoch": 0.2998470168281489, + "grad_norm": 0.47206997871398926, + "learning_rate": 1.678064970632481e-05, + "loss": 0.49420687556266785, + "step": 1176 + }, + { + "epoch": 0.30010198878123406, + "grad_norm": 0.5003753304481506, + "learning_rate": 1.677444843169072e-05, + "loss": 0.5016045570373535, + "step": 1177 + }, + { + "epoch": 0.3003569607343192, + "grad_norm": 0.4960240423679352, + "learning_rate": 1.676824233845407e-05, + "loss": 0.4992179572582245, + "step": 1178 + }, + { + "epoch": 0.3006119326874044, + "grad_norm": 0.4680156409740448, + "learning_rate": 1.6762031431029203e-05, + "loss": 0.5058457255363464, + "step": 1179 + }, + { + "epoch": 0.30086690464048954, + "grad_norm": 0.5267618894577026, + "learning_rate": 1.6755815713833876e-05, + "loss": 0.4940145015716553, + "step": 1180 + }, + { + "epoch": 0.3011218765935747, + "grad_norm": 0.4637565612792969, + "learning_rate": 1.674959519128927e-05, + "loss": 0.5075904130935669, + "step": 1181 + }, + { + "epoch": 0.30137684854665986, + "grad_norm": 0.6613916754722595, + "learning_rate": 1.674336986781999e-05, + "loss": 0.5021583437919617, + "step": 1182 + }, + { + "epoch": 0.301631820499745, + "grad_norm": 0.5022047758102417, + "learning_rate": 1.6737139747854055e-05, + "loss": 0.49105164408683777, + "step": 1183 + }, + { + "epoch": 0.3018867924528302, + "grad_norm": 0.5665522813796997, + "learning_rate": 1.673090483582288e-05, + "loss": 0.49535173177719116, + "step": 1184 + }, + { + "epoch": 0.30214176440591534, + "grad_norm": 0.4763643145561218, + "learning_rate": 1.6724665136161305e-05, + "loss": 0.496861070394516, + "step": 1185 + }, + { + "epoch": 0.3023967363590005, + "grad_norm": 0.49327611923217773, + "learning_rate": 1.6718420653307578e-05, + "loss": 0.5091848373413086, + "step": 1186 + }, + { + "epoch": 0.30265170831208565, + "grad_norm": 0.44212591648101807, + "learning_rate": 1.6712171391703332e-05, + "loss": 0.49904412031173706, + "step": 1187 + }, + { + "epoch": 0.3029066802651708, + "grad_norm": 0.48958224058151245, + "learning_rate": 1.670591735579361e-05, + "loss": 0.4978852868080139, + "step": 1188 + }, + { + "epoch": 0.303161652218256, + "grad_norm": 0.4796984791755676, + "learning_rate": 1.6699658550026855e-05, + "loss": 0.49702033400535583, + "step": 1189 + }, + { + "epoch": 0.30341662417134113, + "grad_norm": 0.6042547225952148, + "learning_rate": 1.6693394978854898e-05, + "loss": 0.5002682209014893, + "step": 1190 + }, + { + "epoch": 0.3036715961244263, + "grad_norm": 0.4669792652130127, + "learning_rate": 1.6687126646732955e-05, + "loss": 0.4984990060329437, + "step": 1191 + }, + { + "epoch": 0.30392656807751145, + "grad_norm": 0.451721727848053, + "learning_rate": 1.668085355811963e-05, + "loss": 0.501367449760437, + "step": 1192 + }, + { + "epoch": 0.3041815400305966, + "grad_norm": 0.44385984539985657, + "learning_rate": 1.6674575717476923e-05, + "loss": 0.4959375262260437, + "step": 1193 + }, + { + "epoch": 0.30443651198368177, + "grad_norm": 0.47235265374183655, + "learning_rate": 1.66682931292702e-05, + "loss": 0.49185383319854736, + "step": 1194 + }, + { + "epoch": 0.304691483936767, + "grad_norm": 0.46303853392601013, + "learning_rate": 1.66620057979682e-05, + "loss": 0.5002027153968811, + "step": 1195 + }, + { + "epoch": 0.30494645588985214, + "grad_norm": 0.4476638734340668, + "learning_rate": 1.6655713728043056e-05, + "loss": 0.49557623267173767, + "step": 1196 + }, + { + "epoch": 0.3052014278429373, + "grad_norm": 0.47302544116973877, + "learning_rate": 1.6649416923970248e-05, + "loss": 0.4934016466140747, + "step": 1197 + }, + { + "epoch": 0.30545639979602246, + "grad_norm": 0.4599985182285309, + "learning_rate": 1.6643115390228642e-05, + "loss": 0.5042760372161865, + "step": 1198 + }, + { + "epoch": 0.3057113717491076, + "grad_norm": 0.47824129462242126, + "learning_rate": 1.663680913130046e-05, + "loss": 0.4973282516002655, + "step": 1199 + }, + { + "epoch": 0.3059663437021928, + "grad_norm": 0.4603481590747833, + "learning_rate": 1.6630498151671282e-05, + "loss": 0.485217422246933, + "step": 1200 + }, + { + "epoch": 0.30622131565527794, + "grad_norm": 0.48893457651138306, + "learning_rate": 1.662418245583005e-05, + "loss": 0.4935639798641205, + "step": 1201 + }, + { + "epoch": 0.3064762876083631, + "grad_norm": 0.4877832233905792, + "learning_rate": 1.6617862048269065e-05, + "loss": 0.5135349631309509, + "step": 1202 + }, + { + "epoch": 0.30673125956144826, + "grad_norm": 0.45736855268478394, + "learning_rate": 1.6611536933483966e-05, + "loss": 0.48187631368637085, + "step": 1203 + }, + { + "epoch": 0.3069862315145334, + "grad_norm": 0.48175039887428284, + "learning_rate": 1.660520711597376e-05, + "loss": 0.5004374384880066, + "step": 1204 + }, + { + "epoch": 0.3072412034676186, + "grad_norm": 0.45422685146331787, + "learning_rate": 1.659887260024078e-05, + "loss": 0.4838033616542816, + "step": 1205 + }, + { + "epoch": 0.30749617542070373, + "grad_norm": 0.49426570534706116, + "learning_rate": 1.659253339079071e-05, + "loss": 0.5089007616043091, + "step": 1206 + }, + { + "epoch": 0.3077511473737889, + "grad_norm": 0.48816773295402527, + "learning_rate": 1.6586189492132567e-05, + "loss": 0.4888179898262024, + "step": 1207 + }, + { + "epoch": 0.30800611932687405, + "grad_norm": 0.4737013876438141, + "learning_rate": 1.6579840908778714e-05, + "loss": 0.5024397373199463, + "step": 1208 + }, + { + "epoch": 0.3082610912799592, + "grad_norm": 0.4843386709690094, + "learning_rate": 1.6573487645244838e-05, + "loss": 0.5023159980773926, + "step": 1209 + }, + { + "epoch": 0.30851606323304437, + "grad_norm": 0.4812295436859131, + "learning_rate": 1.6567129706049953e-05, + "loss": 0.5020912885665894, + "step": 1210 + }, + { + "epoch": 0.3087710351861295, + "grad_norm": 0.5185731053352356, + "learning_rate": 1.656076709571641e-05, + "loss": 0.4940309226512909, + "step": 1211 + }, + { + "epoch": 0.3090260071392147, + "grad_norm": 0.5270474553108215, + "learning_rate": 1.655439981876987e-05, + "loss": 0.5050083994865417, + "step": 1212 + }, + { + "epoch": 0.30928097909229985, + "grad_norm": 0.4855048358440399, + "learning_rate": 1.6548027879739314e-05, + "loss": 0.5098885297775269, + "step": 1213 + }, + { + "epoch": 0.309535951045385, + "grad_norm": 0.4971138536930084, + "learning_rate": 1.654165128315705e-05, + "loss": 0.5043885707855225, + "step": 1214 + }, + { + "epoch": 0.30979092299847016, + "grad_norm": 0.47204726934432983, + "learning_rate": 1.6535270033558692e-05, + "loss": 0.5070247054100037, + "step": 1215 + }, + { + "epoch": 0.3100458949515553, + "grad_norm": 0.8165231347084045, + "learning_rate": 1.652888413548316e-05, + "loss": 0.49824613332748413, + "step": 1216 + }, + { + "epoch": 0.3103008669046405, + "grad_norm": 0.4515669047832489, + "learning_rate": 1.6522493593472683e-05, + "loss": 0.4811893403530121, + "step": 1217 + }, + { + "epoch": 0.31055583885772564, + "grad_norm": 0.4702485501766205, + "learning_rate": 1.65160984120728e-05, + "loss": 0.4961361289024353, + "step": 1218 + }, + { + "epoch": 0.3108108108108108, + "grad_norm": 0.4543362855911255, + "learning_rate": 1.6509698595832343e-05, + "loss": 0.49158987402915955, + "step": 1219 + }, + { + "epoch": 0.31106578276389596, + "grad_norm": 0.4930214583873749, + "learning_rate": 1.650329414930344e-05, + "loss": 0.5052799582481384, + "step": 1220 + }, + { + "epoch": 0.3113207547169811, + "grad_norm": 0.45656049251556396, + "learning_rate": 1.649688507704152e-05, + "loss": 0.49146366119384766, + "step": 1221 + }, + { + "epoch": 0.3115757266700663, + "grad_norm": 0.4643915891647339, + "learning_rate": 1.649047138360529e-05, + "loss": 0.4719155728816986, + "step": 1222 + }, + { + "epoch": 0.31183069862315144, + "grad_norm": 0.4875204265117645, + "learning_rate": 1.648405307355675e-05, + "loss": 0.4880620241165161, + "step": 1223 + }, + { + "epoch": 0.3120856705762366, + "grad_norm": 0.8442656993865967, + "learning_rate": 1.6477630151461192e-05, + "loss": 0.5070099830627441, + "step": 1224 + }, + { + "epoch": 0.31234064252932175, + "grad_norm": 0.4763401448726654, + "learning_rate": 1.6471202621887173e-05, + "loss": 0.505492091178894, + "step": 1225 + }, + { + "epoch": 0.3125956144824069, + "grad_norm": 0.44397473335266113, + "learning_rate": 1.6464770489406543e-05, + "loss": 0.5049811601638794, + "step": 1226 + }, + { + "epoch": 0.3128505864354921, + "grad_norm": 0.470048725605011, + "learning_rate": 1.6458333758594415e-05, + "loss": 0.4918753504753113, + "step": 1227 + }, + { + "epoch": 0.31310555838857723, + "grad_norm": 0.4405840039253235, + "learning_rate": 1.6451892434029175e-05, + "loss": 0.48752328753471375, + "step": 1228 + }, + { + "epoch": 0.3133605303416624, + "grad_norm": 0.4632117450237274, + "learning_rate": 1.6445446520292477e-05, + "loss": 0.4995397925376892, + "step": 1229 + }, + { + "epoch": 0.31361550229474755, + "grad_norm": 0.44940271973609924, + "learning_rate": 1.6438996021969245e-05, + "loss": 0.49737581610679626, + "step": 1230 + }, + { + "epoch": 0.31387047424783276, + "grad_norm": 0.45170217752456665, + "learning_rate": 1.6432540943647658e-05, + "loss": 0.4811992347240448, + "step": 1231 + }, + { + "epoch": 0.3141254462009179, + "grad_norm": 0.4569378197193146, + "learning_rate": 1.6426081289919144e-05, + "loss": 0.4972044825553894, + "step": 1232 + }, + { + "epoch": 0.3143804181540031, + "grad_norm": 0.468311607837677, + "learning_rate": 1.6419617065378405e-05, + "loss": 0.5013965368270874, + "step": 1233 + }, + { + "epoch": 0.31463539010708824, + "grad_norm": 0.48914453387260437, + "learning_rate": 1.6413148274623384e-05, + "loss": 0.5007435083389282, + "step": 1234 + }, + { + "epoch": 0.3148903620601734, + "grad_norm": 0.5679212808609009, + "learning_rate": 1.6406674922255262e-05, + "loss": 0.49458086490631104, + "step": 1235 + }, + { + "epoch": 0.31514533401325856, + "grad_norm": 0.4546929597854614, + "learning_rate": 1.640019701287849e-05, + "loss": 0.4977070689201355, + "step": 1236 + }, + { + "epoch": 0.3154003059663437, + "grad_norm": 0.4733176827430725, + "learning_rate": 1.6393714551100734e-05, + "loss": 0.5150038599967957, + "step": 1237 + }, + { + "epoch": 0.3156552779194289, + "grad_norm": 0.4902767837047577, + "learning_rate": 1.6387227541532912e-05, + "loss": 0.5028160810470581, + "step": 1238 + }, + { + "epoch": 0.31591024987251404, + "grad_norm": 0.4904122054576874, + "learning_rate": 1.6380735988789172e-05, + "loss": 0.48970431089401245, + "step": 1239 + }, + { + "epoch": 0.3161652218255992, + "grad_norm": 0.4793023467063904, + "learning_rate": 1.63742398974869e-05, + "loss": 0.4952084422111511, + "step": 1240 + }, + { + "epoch": 0.31642019377868436, + "grad_norm": 0.5026772618293762, + "learning_rate": 1.6367739272246702e-05, + "loss": 0.48636317253112793, + "step": 1241 + }, + { + "epoch": 0.3166751657317695, + "grad_norm": 0.48422929644584656, + "learning_rate": 1.6361234117692414e-05, + "loss": 0.4868415296077728, + "step": 1242 + }, + { + "epoch": 0.3169301376848547, + "grad_norm": 0.4590810239315033, + "learning_rate": 1.6354724438451094e-05, + "loss": 0.49675533175468445, + "step": 1243 + }, + { + "epoch": 0.31718510963793983, + "grad_norm": 0.46764785051345825, + "learning_rate": 1.634821023915301e-05, + "loss": 0.49604731798171997, + "step": 1244 + }, + { + "epoch": 0.317440081591025, + "grad_norm": 0.47263312339782715, + "learning_rate": 1.6341691524431662e-05, + "loss": 0.5027939081192017, + "step": 1245 + }, + { + "epoch": 0.31769505354411015, + "grad_norm": 0.45748889446258545, + "learning_rate": 1.6335168298923743e-05, + "loss": 0.49228137731552124, + "step": 1246 + }, + { + "epoch": 0.3179500254971953, + "grad_norm": 0.473275363445282, + "learning_rate": 1.632864056726917e-05, + "loss": 0.4914236068725586, + "step": 1247 + }, + { + "epoch": 0.31820499745028047, + "grad_norm": 0.45597827434539795, + "learning_rate": 1.632210833411105e-05, + "loss": 0.4925191104412079, + "step": 1248 + }, + { + "epoch": 0.31845996940336563, + "grad_norm": 0.4696197211742401, + "learning_rate": 1.6315571604095706e-05, + "loss": 0.4934743046760559, + "step": 1249 + }, + { + "epoch": 0.3187149413564508, + "grad_norm": 0.4852502644062042, + "learning_rate": 1.6309030381872656e-05, + "loss": 0.496171772480011, + "step": 1250 + }, + { + "epoch": 0.31896991330953595, + "grad_norm": 0.4474034905433655, + "learning_rate": 1.6302484672094604e-05, + "loss": 0.4935624301433563, + "step": 1251 + }, + { + "epoch": 0.3192248852626211, + "grad_norm": 0.47168582677841187, + "learning_rate": 1.6295934479417454e-05, + "loss": 0.4883151054382324, + "step": 1252 + }, + { + "epoch": 0.31947985721570626, + "grad_norm": 0.4466879367828369, + "learning_rate": 1.6289379808500303e-05, + "loss": 0.4885556697845459, + "step": 1253 + }, + { + "epoch": 0.3197348291687914, + "grad_norm": 0.47400885820388794, + "learning_rate": 1.628282066400542e-05, + "loss": 0.49073100090026855, + "step": 1254 + }, + { + "epoch": 0.3199898011218766, + "grad_norm": 0.4575072228908539, + "learning_rate": 1.6276257050598273e-05, + "loss": 0.4954169988632202, + "step": 1255 + }, + { + "epoch": 0.32024477307496174, + "grad_norm": 0.4483925700187683, + "learning_rate": 1.626968897294749e-05, + "loss": 0.4948553442955017, + "step": 1256 + }, + { + "epoch": 0.3204997450280469, + "grad_norm": 0.4554179906845093, + "learning_rate": 1.626311643572489e-05, + "loss": 0.4990859925746918, + "step": 1257 + }, + { + "epoch": 0.32075471698113206, + "grad_norm": 0.5914171934127808, + "learning_rate": 1.625653944360546e-05, + "loss": 0.49615174531936646, + "step": 1258 + }, + { + "epoch": 0.3210096889342172, + "grad_norm": 0.4487502872943878, + "learning_rate": 1.6249958001267345e-05, + "loss": 0.503517746925354, + "step": 1259 + }, + { + "epoch": 0.3212646608873024, + "grad_norm": 0.4658033549785614, + "learning_rate": 1.624337211339187e-05, + "loss": 0.49343857169151306, + "step": 1260 + }, + { + "epoch": 0.32151963284038754, + "grad_norm": 0.4669674038887024, + "learning_rate": 1.623678178466352e-05, + "loss": 0.5054748058319092, + "step": 1261 + }, + { + "epoch": 0.3217746047934727, + "grad_norm": 0.4506138265132904, + "learning_rate": 1.623018701976993e-05, + "loss": 0.4952157437801361, + "step": 1262 + }, + { + "epoch": 0.32202957674655786, + "grad_norm": 0.47126540541648865, + "learning_rate": 1.622358782340189e-05, + "loss": 0.5030245780944824, + "step": 1263 + }, + { + "epoch": 0.322284548699643, + "grad_norm": 0.44352665543556213, + "learning_rate": 1.6216984200253355e-05, + "loss": 0.48594093322753906, + "step": 1264 + }, + { + "epoch": 0.3225395206527282, + "grad_norm": 0.4284340441226959, + "learning_rate": 1.621037615502142e-05, + "loss": 0.49887198209762573, + "step": 1265 + }, + { + "epoch": 0.3227944926058134, + "grad_norm": 0.44898107647895813, + "learning_rate": 1.6203763692406328e-05, + "loss": 0.48706620931625366, + "step": 1266 + }, + { + "epoch": 0.32304946455889855, + "grad_norm": 0.440380334854126, + "learning_rate": 1.619714681711146e-05, + "loss": 0.49467596411705017, + "step": 1267 + }, + { + "epoch": 0.3233044365119837, + "grad_norm": 0.46733948588371277, + "learning_rate": 1.6190525533843337e-05, + "loss": 0.5020816922187805, + "step": 1268 + }, + { + "epoch": 0.32355940846506887, + "grad_norm": 0.6229173541069031, + "learning_rate": 1.618389984731162e-05, + "loss": 0.49869582056999207, + "step": 1269 + }, + { + "epoch": 0.323814380418154, + "grad_norm": 0.48032182455062866, + "learning_rate": 1.6177269762229096e-05, + "loss": 0.4977878928184509, + "step": 1270 + }, + { + "epoch": 0.3240693523712392, + "grad_norm": 0.46267253160476685, + "learning_rate": 1.6170635283311684e-05, + "loss": 0.5060316324234009, + "step": 1271 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 0.4646252989768982, + "learning_rate": 1.6163996415278423e-05, + "loss": 0.4943225085735321, + "step": 1272 + }, + { + "epoch": 0.3245792962774095, + "grad_norm": 0.44283971190452576, + "learning_rate": 1.6157353162851486e-05, + "loss": 0.47732409834861755, + "step": 1273 + }, + { + "epoch": 0.32483426823049466, + "grad_norm": 0.48292553424835205, + "learning_rate": 1.6150705530756155e-05, + "loss": 0.5045241117477417, + "step": 1274 + }, + { + "epoch": 0.3250892401835798, + "grad_norm": 0.48017552495002747, + "learning_rate": 1.6144053523720832e-05, + "loss": 0.4933950901031494, + "step": 1275 + }, + { + "epoch": 0.325344212136665, + "grad_norm": 0.45280221104621887, + "learning_rate": 1.613739714647702e-05, + "loss": 0.49421560764312744, + "step": 1276 + }, + { + "epoch": 0.32559918408975014, + "grad_norm": 0.48675018548965454, + "learning_rate": 1.6130736403759342e-05, + "loss": 0.5033859014511108, + "step": 1277 + }, + { + "epoch": 0.3258541560428353, + "grad_norm": 0.4708247780799866, + "learning_rate": 1.6124071300305527e-05, + "loss": 0.4810166358947754, + "step": 1278 + }, + { + "epoch": 0.32610912799592046, + "grad_norm": 0.4413648545742035, + "learning_rate": 1.61174018408564e-05, + "loss": 0.49215754866600037, + "step": 1279 + }, + { + "epoch": 0.3263640999490056, + "grad_norm": 0.49567604064941406, + "learning_rate": 1.6110728030155875e-05, + "loss": 0.500187337398529, + "step": 1280 + }, + { + "epoch": 0.3266190719020908, + "grad_norm": 0.47270676493644714, + "learning_rate": 1.6104049872950988e-05, + "loss": 0.5070374011993408, + "step": 1281 + }, + { + "epoch": 0.32687404385517593, + "grad_norm": 0.4364311099052429, + "learning_rate": 1.6097367373991842e-05, + "loss": 0.5007890462875366, + "step": 1282 + }, + { + "epoch": 0.3271290158082611, + "grad_norm": 0.48195508122444153, + "learning_rate": 1.609068053803164e-05, + "loss": 0.48835358023643494, + "step": 1283 + }, + { + "epoch": 0.32738398776134625, + "grad_norm": 0.4663650691509247, + "learning_rate": 1.6083989369826664e-05, + "loss": 0.4924512803554535, + "step": 1284 + }, + { + "epoch": 0.3276389597144314, + "grad_norm": 0.4929714500904083, + "learning_rate": 1.607729387413628e-05, + "loss": 0.48393774032592773, + "step": 1285 + }, + { + "epoch": 0.32789393166751657, + "grad_norm": 0.5165911912918091, + "learning_rate": 1.607059405572293e-05, + "loss": 0.4980706572532654, + "step": 1286 + }, + { + "epoch": 0.32814890362060173, + "grad_norm": 0.44888025522232056, + "learning_rate": 1.606388991935214e-05, + "loss": 0.5012993812561035, + "step": 1287 + }, + { + "epoch": 0.3284038755736869, + "grad_norm": 0.4578009843826294, + "learning_rate": 1.6057181469792497e-05, + "loss": 0.49674296379089355, + "step": 1288 + }, + { + "epoch": 0.32865884752677205, + "grad_norm": 0.4720461368560791, + "learning_rate": 1.6050468711815658e-05, + "loss": 0.4820350110530853, + "step": 1289 + }, + { + "epoch": 0.3289138194798572, + "grad_norm": 0.4323115050792694, + "learning_rate": 1.6043751650196344e-05, + "loss": 0.48184940218925476, + "step": 1290 + }, + { + "epoch": 0.32916879143294236, + "grad_norm": 0.5161405205726624, + "learning_rate": 1.6037030289712344e-05, + "loss": 0.5068054795265198, + "step": 1291 + }, + { + "epoch": 0.3294237633860275, + "grad_norm": 0.44478678703308105, + "learning_rate": 1.6030304635144493e-05, + "loss": 0.5015352368354797, + "step": 1292 + }, + { + "epoch": 0.3296787353391127, + "grad_norm": 0.46428152918815613, + "learning_rate": 1.6023574691276693e-05, + "loss": 0.49676769971847534, + "step": 1293 + }, + { + "epoch": 0.32993370729219784, + "grad_norm": 0.45837974548339844, + "learning_rate": 1.6016840462895887e-05, + "loss": 0.5077446103096008, + "step": 1294 + }, + { + "epoch": 0.330188679245283, + "grad_norm": 0.48795589804649353, + "learning_rate": 1.6010101954792073e-05, + "loss": 0.5017638206481934, + "step": 1295 + }, + { + "epoch": 0.33044365119836816, + "grad_norm": 0.4359217882156372, + "learning_rate": 1.6003359171758286e-05, + "loss": 0.5008919835090637, + "step": 1296 + }, + { + "epoch": 0.3306986231514533, + "grad_norm": 0.4801797568798065, + "learning_rate": 1.5996612118590604e-05, + "loss": 0.5036278963088989, + "step": 1297 + }, + { + "epoch": 0.3309535951045385, + "grad_norm": 0.45398226380348206, + "learning_rate": 1.598986080008815e-05, + "loss": 0.48134273290634155, + "step": 1298 + }, + { + "epoch": 0.33120856705762364, + "grad_norm": 0.469053715467453, + "learning_rate": 1.598310522105307e-05, + "loss": 0.4934031367301941, + "step": 1299 + }, + { + "epoch": 0.3314635390107088, + "grad_norm": 0.44086888432502747, + "learning_rate": 1.5976345386290546e-05, + "loss": 0.499302476644516, + "step": 1300 + }, + { + "epoch": 0.33171851096379396, + "grad_norm": 0.47684571146965027, + "learning_rate": 1.5969581300608783e-05, + "loss": 0.4984762668609619, + "step": 1301 + }, + { + "epoch": 0.33197348291687917, + "grad_norm": 0.4727766215801239, + "learning_rate": 1.5962812968819018e-05, + "loss": 0.498884379863739, + "step": 1302 + }, + { + "epoch": 0.33222845486996433, + "grad_norm": 0.4573783874511719, + "learning_rate": 1.5956040395735498e-05, + "loss": 0.49887561798095703, + "step": 1303 + }, + { + "epoch": 0.3324834268230495, + "grad_norm": 0.4735158681869507, + "learning_rate": 1.5949263586175492e-05, + "loss": 0.49824100732803345, + "step": 1304 + }, + { + "epoch": 0.33273839877613465, + "grad_norm": 0.45007798075675964, + "learning_rate": 1.5942482544959286e-05, + "loss": 0.48702430725097656, + "step": 1305 + }, + { + "epoch": 0.3329933707292198, + "grad_norm": 0.44589129090309143, + "learning_rate": 1.593569727691017e-05, + "loss": 0.49279114603996277, + "step": 1306 + }, + { + "epoch": 0.33324834268230497, + "grad_norm": 0.4649209976196289, + "learning_rate": 1.592890778685444e-05, + "loss": 0.47921788692474365, + "step": 1307 + }, + { + "epoch": 0.3335033146353901, + "grad_norm": 0.4656672775745392, + "learning_rate": 1.59221140796214e-05, + "loss": 0.5048980712890625, + "step": 1308 + }, + { + "epoch": 0.3337582865884753, + "grad_norm": 0.434708833694458, + "learning_rate": 1.5915316160043355e-05, + "loss": 0.504131555557251, + "step": 1309 + }, + { + "epoch": 0.33401325854156044, + "grad_norm": 0.443644255399704, + "learning_rate": 1.5908514032955592e-05, + "loss": 0.4883410930633545, + "step": 1310 + }, + { + "epoch": 0.3342682304946456, + "grad_norm": 0.47313472628593445, + "learning_rate": 1.5901707703196412e-05, + "loss": 0.49209117889404297, + "step": 1311 + }, + { + "epoch": 0.33452320244773076, + "grad_norm": 0.4823039174079895, + "learning_rate": 1.589489717560709e-05, + "loss": 0.4971883296966553, + "step": 1312 + }, + { + "epoch": 0.3347781744008159, + "grad_norm": 0.4580431282520294, + "learning_rate": 1.5888082455031885e-05, + "loss": 0.49591752886772156, + "step": 1313 + }, + { + "epoch": 0.3350331463539011, + "grad_norm": 0.5018422603607178, + "learning_rate": 1.5881263546318052e-05, + "loss": 0.4978068768978119, + "step": 1314 + }, + { + "epoch": 0.33528811830698624, + "grad_norm": 0.5090048313140869, + "learning_rate": 1.5874440454315823e-05, + "loss": 0.4929397702217102, + "step": 1315 + }, + { + "epoch": 0.3355430902600714, + "grad_norm": 0.4691426157951355, + "learning_rate": 1.5867613183878387e-05, + "loss": 0.49518275260925293, + "step": 1316 + }, + { + "epoch": 0.33579806221315656, + "grad_norm": 0.48400259017944336, + "learning_rate": 1.5860781739861928e-05, + "loss": 0.48870205879211426, + "step": 1317 + }, + { + "epoch": 0.3360530341662417, + "grad_norm": 0.48520103096961975, + "learning_rate": 1.5853946127125585e-05, + "loss": 0.4775869846343994, + "step": 1318 + }, + { + "epoch": 0.3363080061193269, + "grad_norm": 0.4813341796398163, + "learning_rate": 1.584710635053147e-05, + "loss": 0.47866344451904297, + "step": 1319 + }, + { + "epoch": 0.33656297807241203, + "grad_norm": 0.46262988448143005, + "learning_rate": 1.5840262414944647e-05, + "loss": 0.48800715804100037, + "step": 1320 + }, + { + "epoch": 0.3368179500254972, + "grad_norm": 0.4679766595363617, + "learning_rate": 1.583341432523315e-05, + "loss": 0.48198017477989197, + "step": 1321 + }, + { + "epoch": 0.33707292197858235, + "grad_norm": 0.458714097738266, + "learning_rate": 1.5826562086267956e-05, + "loss": 0.4898045063018799, + "step": 1322 + }, + { + "epoch": 0.3373278939316675, + "grad_norm": 0.4584973156452179, + "learning_rate": 1.5819705702923004e-05, + "loss": 0.4798343777656555, + "step": 1323 + }, + { + "epoch": 0.33758286588475267, + "grad_norm": 0.4500448405742645, + "learning_rate": 1.5812845180075175e-05, + "loss": 0.49616456031799316, + "step": 1324 + }, + { + "epoch": 0.33783783783783783, + "grad_norm": 0.43624138832092285, + "learning_rate": 1.5805980522604295e-05, + "loss": 0.49544939398765564, + "step": 1325 + }, + { + "epoch": 0.338092809790923, + "grad_norm": 0.44867441058158875, + "learning_rate": 1.579911173539313e-05, + "loss": 0.49614858627319336, + "step": 1326 + }, + { + "epoch": 0.33834778174400815, + "grad_norm": 0.45365074276924133, + "learning_rate": 1.5792238823327387e-05, + "loss": 0.48979371786117554, + "step": 1327 + }, + { + "epoch": 0.3386027536970933, + "grad_norm": 0.4778674840927124, + "learning_rate": 1.5785361791295706e-05, + "loss": 0.4943079948425293, + "step": 1328 + }, + { + "epoch": 0.33885772565017847, + "grad_norm": 0.45407992601394653, + "learning_rate": 1.5778480644189654e-05, + "loss": 0.501994252204895, + "step": 1329 + }, + { + "epoch": 0.3391126976032636, + "grad_norm": 0.47102952003479004, + "learning_rate": 1.5771595386903728e-05, + "loss": 0.4933057725429535, + "step": 1330 + }, + { + "epoch": 0.3393676695563488, + "grad_norm": 0.5028448700904846, + "learning_rate": 1.5764706024335343e-05, + "loss": 0.48624253273010254, + "step": 1331 + }, + { + "epoch": 0.33962264150943394, + "grad_norm": 0.4622834324836731, + "learning_rate": 1.5757812561384848e-05, + "loss": 0.5023224353790283, + "step": 1332 + }, + { + "epoch": 0.3398776134625191, + "grad_norm": 0.4559676945209503, + "learning_rate": 1.5750915002955497e-05, + "loss": 0.4845350682735443, + "step": 1333 + }, + { + "epoch": 0.34013258541560426, + "grad_norm": 0.5004395246505737, + "learning_rate": 1.5744013353953455e-05, + "loss": 0.4978603720664978, + "step": 1334 + }, + { + "epoch": 0.3403875573686894, + "grad_norm": 0.4740391969680786, + "learning_rate": 1.5737107619287802e-05, + "loss": 0.4802100658416748, + "step": 1335 + }, + { + "epoch": 0.3406425293217746, + "grad_norm": 0.4601091146469116, + "learning_rate": 1.573019780387053e-05, + "loss": 0.49661630392074585, + "step": 1336 + }, + { + "epoch": 0.34089750127485974, + "grad_norm": 0.43999335169792175, + "learning_rate": 1.5723283912616515e-05, + "loss": 0.4958333373069763, + "step": 1337 + }, + { + "epoch": 0.34115247322794495, + "grad_norm": 0.4198979139328003, + "learning_rate": 1.5716365950443556e-05, + "loss": 0.49501991271972656, + "step": 1338 + }, + { + "epoch": 0.3414074451810301, + "grad_norm": 0.45959314703941345, + "learning_rate": 1.5709443922272326e-05, + "loss": 0.4792822599411011, + "step": 1339 + }, + { + "epoch": 0.34166241713411527, + "grad_norm": 0.47105738520622253, + "learning_rate": 1.5702517833026408e-05, + "loss": 0.4770154654979706, + "step": 1340 + }, + { + "epoch": 0.34191738908720043, + "grad_norm": 0.4640108048915863, + "learning_rate": 1.569558768763226e-05, + "loss": 0.4815002381801605, + "step": 1341 + }, + { + "epoch": 0.3421723610402856, + "grad_norm": 0.4672930836677551, + "learning_rate": 1.568865349101923e-05, + "loss": 0.4833976626396179, + "step": 1342 + }, + { + "epoch": 0.34242733299337075, + "grad_norm": 0.467336505651474, + "learning_rate": 1.5681715248119552e-05, + "loss": 0.4880855977535248, + "step": 1343 + }, + { + "epoch": 0.3426823049464559, + "grad_norm": 0.45184525847435, + "learning_rate": 1.5674772963868333e-05, + "loss": 0.5052072405815125, + "step": 1344 + }, + { + "epoch": 0.34293727689954107, + "grad_norm": 0.4374590516090393, + "learning_rate": 1.566782664320355e-05, + "loss": 0.4983629584312439, + "step": 1345 + }, + { + "epoch": 0.3431922488526262, + "grad_norm": 0.4904771149158478, + "learning_rate": 1.5660876291066066e-05, + "loss": 0.49454396963119507, + "step": 1346 + }, + { + "epoch": 0.3434472208057114, + "grad_norm": 0.43185144662857056, + "learning_rate": 1.565392191239959e-05, + "loss": 0.4821247458457947, + "step": 1347 + }, + { + "epoch": 0.34370219275879654, + "grad_norm": 0.4500340223312378, + "learning_rate": 1.564696351215072e-05, + "loss": 0.5049868226051331, + "step": 1348 + }, + { + "epoch": 0.3439571647118817, + "grad_norm": 0.44509389996528625, + "learning_rate": 1.5640001095268894e-05, + "loss": 0.4985738694667816, + "step": 1349 + }, + { + "epoch": 0.34421213666496686, + "grad_norm": 0.5017538666725159, + "learning_rate": 1.563303466670642e-05, + "loss": 0.47815823554992676, + "step": 1350 + }, + { + "epoch": 0.344467108618052, + "grad_norm": 0.4683600664138794, + "learning_rate": 1.562606423141845e-05, + "loss": 0.5037387609481812, + "step": 1351 + }, + { + "epoch": 0.3447220805711372, + "grad_norm": 0.46206045150756836, + "learning_rate": 1.561908979436299e-05, + "loss": 0.49229001998901367, + "step": 1352 + }, + { + "epoch": 0.34497705252422234, + "grad_norm": 0.4904536008834839, + "learning_rate": 1.561211136050089e-05, + "loss": 0.4933377206325531, + "step": 1353 + }, + { + "epoch": 0.3452320244773075, + "grad_norm": 0.47214558720588684, + "learning_rate": 1.5605128934795858e-05, + "loss": 0.4849925935268402, + "step": 1354 + }, + { + "epoch": 0.34548699643039266, + "grad_norm": 0.4356664717197418, + "learning_rate": 1.5598142522214417e-05, + "loss": 0.48362982273101807, + "step": 1355 + }, + { + "epoch": 0.3457419683834778, + "grad_norm": 0.5014368295669556, + "learning_rate": 1.559115212772594e-05, + "loss": 0.4885830283164978, + "step": 1356 + }, + { + "epoch": 0.345996940336563, + "grad_norm": 0.4560520052909851, + "learning_rate": 1.5584157756302635e-05, + "loss": 0.4855990409851074, + "step": 1357 + }, + { + "epoch": 0.34625191228964813, + "grad_norm": 0.44068634510040283, + "learning_rate": 1.5577159412919524e-05, + "loss": 0.4836045205593109, + "step": 1358 + }, + { + "epoch": 0.3465068842427333, + "grad_norm": 0.4414604604244232, + "learning_rate": 1.5570157102554475e-05, + "loss": 0.5005707144737244, + "step": 1359 + }, + { + "epoch": 0.34676185619581845, + "grad_norm": 0.47479185461997986, + "learning_rate": 1.556315083018816e-05, + "loss": 0.4892706573009491, + "step": 1360 + }, + { + "epoch": 0.3470168281489036, + "grad_norm": 0.4664304256439209, + "learning_rate": 1.5556140600804076e-05, + "loss": 0.48691055178642273, + "step": 1361 + }, + { + "epoch": 0.34727180010198877, + "grad_norm": 0.4774048626422882, + "learning_rate": 1.5549126419388538e-05, + "loss": 0.49023565649986267, + "step": 1362 + }, + { + "epoch": 0.34752677205507393, + "grad_norm": 0.4945896565914154, + "learning_rate": 1.554210829093066e-05, + "loss": 0.49875587224960327, + "step": 1363 + }, + { + "epoch": 0.3477817440081591, + "grad_norm": 0.49446311593055725, + "learning_rate": 1.5535086220422386e-05, + "loss": 0.5095691680908203, + "step": 1364 + }, + { + "epoch": 0.34803671596124425, + "grad_norm": 0.5272205471992493, + "learning_rate": 1.5528060212858436e-05, + "loss": 0.49271202087402344, + "step": 1365 + }, + { + "epoch": 0.3482916879143294, + "grad_norm": 0.45778605341911316, + "learning_rate": 1.5521030273236354e-05, + "loss": 0.4978727698326111, + "step": 1366 + }, + { + "epoch": 0.34854665986741457, + "grad_norm": 0.4778561592102051, + "learning_rate": 1.5513996406556465e-05, + "loss": 0.4920888841152191, + "step": 1367 + }, + { + "epoch": 0.3488016318204997, + "grad_norm": 0.47162318229675293, + "learning_rate": 1.55069586178219e-05, + "loss": 0.4907873570919037, + "step": 1368 + }, + { + "epoch": 0.3490566037735849, + "grad_norm": 0.4643825590610504, + "learning_rate": 1.5499916912038563e-05, + "loss": 0.4940566420555115, + "step": 1369 + }, + { + "epoch": 0.34931157572667004, + "grad_norm": 0.48091721534729004, + "learning_rate": 1.5492871294215162e-05, + "loss": 0.48381727933883667, + "step": 1370 + }, + { + "epoch": 0.3495665476797552, + "grad_norm": 0.4480636715888977, + "learning_rate": 1.5485821769363178e-05, + "loss": 0.4959699511528015, + "step": 1371 + }, + { + "epoch": 0.34982151963284036, + "grad_norm": 0.44783326983451843, + "learning_rate": 1.5478768342496872e-05, + "loss": 0.48227232694625854, + "step": 1372 + }, + { + "epoch": 0.3500764915859256, + "grad_norm": 0.45729002356529236, + "learning_rate": 1.547171101863328e-05, + "loss": 0.4862489700317383, + "step": 1373 + }, + { + "epoch": 0.35033146353901073, + "grad_norm": 0.4617900848388672, + "learning_rate": 1.5464649802792215e-05, + "loss": 0.4825901985168457, + "step": 1374 + }, + { + "epoch": 0.3505864354920959, + "grad_norm": 0.45521557331085205, + "learning_rate": 1.5457584699996253e-05, + "loss": 0.47123441100120544, + "step": 1375 + }, + { + "epoch": 0.35084140744518105, + "grad_norm": 0.49398496747016907, + "learning_rate": 1.5450515715270733e-05, + "loss": 0.4958046078681946, + "step": 1376 + }, + { + "epoch": 0.3510963793982662, + "grad_norm": 0.506957471370697, + "learning_rate": 1.5443442853643765e-05, + "loss": 0.5013394355773926, + "step": 1377 + }, + { + "epoch": 0.35135135135135137, + "grad_norm": 0.467107892036438, + "learning_rate": 1.5436366120146206e-05, + "loss": 0.48585793375968933, + "step": 1378 + }, + { + "epoch": 0.35160632330443653, + "grad_norm": 0.4466833174228668, + "learning_rate": 1.5429285519811672e-05, + "loss": 0.5096202492713928, + "step": 1379 + }, + { + "epoch": 0.3518612952575217, + "grad_norm": 0.5030296444892883, + "learning_rate": 1.5422201057676525e-05, + "loss": 0.4857207238674164, + "step": 1380 + }, + { + "epoch": 0.35211626721060685, + "grad_norm": 0.45916059613227844, + "learning_rate": 1.5415112738779887e-05, + "loss": 0.4840683937072754, + "step": 1381 + }, + { + "epoch": 0.352371239163692, + "grad_norm": 0.4854084849357605, + "learning_rate": 1.5408020568163602e-05, + "loss": 0.48948368430137634, + "step": 1382 + }, + { + "epoch": 0.35262621111677717, + "grad_norm": 0.49546507000923157, + "learning_rate": 1.540092455087227e-05, + "loss": 0.4880248010158539, + "step": 1383 + }, + { + "epoch": 0.3528811830698623, + "grad_norm": 0.47287800908088684, + "learning_rate": 1.539382469195323e-05, + "loss": 0.5022507905960083, + "step": 1384 + }, + { + "epoch": 0.3531361550229475, + "grad_norm": 0.4680861830711365, + "learning_rate": 1.5386720996456533e-05, + "loss": 0.4974364936351776, + "step": 1385 + }, + { + "epoch": 0.35339112697603264, + "grad_norm": 0.4794875383377075, + "learning_rate": 1.5379613469434983e-05, + "loss": 0.50507652759552, + "step": 1386 + }, + { + "epoch": 0.3536460989291178, + "grad_norm": 0.463410347700119, + "learning_rate": 1.537250211594409e-05, + "loss": 0.4969019889831543, + "step": 1387 + }, + { + "epoch": 0.35390107088220296, + "grad_norm": 0.4773065447807312, + "learning_rate": 1.53653869410421e-05, + "loss": 0.4828874468803406, + "step": 1388 + }, + { + "epoch": 0.3541560428352881, + "grad_norm": 0.5189251899719238, + "learning_rate": 1.5358267949789968e-05, + "loss": 0.4929976463317871, + "step": 1389 + }, + { + "epoch": 0.3544110147883733, + "grad_norm": 0.49028658866882324, + "learning_rate": 1.5351145147251364e-05, + "loss": 0.49443262815475464, + "step": 1390 + }, + { + "epoch": 0.35466598674145844, + "grad_norm": 0.47114813327789307, + "learning_rate": 1.5344018538492682e-05, + "loss": 0.494369238615036, + "step": 1391 + }, + { + "epoch": 0.3549209586945436, + "grad_norm": 0.4885484278202057, + "learning_rate": 1.5336888128583002e-05, + "loss": 0.48555788397789, + "step": 1392 + }, + { + "epoch": 0.35517593064762876, + "grad_norm": 0.47980996966362, + "learning_rate": 1.5329753922594122e-05, + "loss": 0.4955976903438568, + "step": 1393 + }, + { + "epoch": 0.3554309026007139, + "grad_norm": 0.4988878667354584, + "learning_rate": 1.532261592560054e-05, + "loss": 0.49415457248687744, + "step": 1394 + }, + { + "epoch": 0.3556858745537991, + "grad_norm": 0.49632182717323303, + "learning_rate": 1.5315474142679442e-05, + "loss": 0.4775877892971039, + "step": 1395 + }, + { + "epoch": 0.35594084650688423, + "grad_norm": 0.48797720670700073, + "learning_rate": 1.530832857891072e-05, + "loss": 0.4879951477050781, + "step": 1396 + }, + { + "epoch": 0.3561958184599694, + "grad_norm": 0.48648008704185486, + "learning_rate": 1.5301179239376936e-05, + "loss": 0.48652490973472595, + "step": 1397 + }, + { + "epoch": 0.35645079041305455, + "grad_norm": 0.46025142073631287, + "learning_rate": 1.529402612916336e-05, + "loss": 0.5025362968444824, + "step": 1398 + }, + { + "epoch": 0.3567057623661397, + "grad_norm": 0.4712684750556946, + "learning_rate": 1.5286869253357925e-05, + "loss": 0.48671525716781616, + "step": 1399 + }, + { + "epoch": 0.35696073431922487, + "grad_norm": 0.4850005507469177, + "learning_rate": 1.5279708617051258e-05, + "loss": 0.4923925995826721, + "step": 1400 + }, + { + "epoch": 0.35721570627231003, + "grad_norm": 0.455744206905365, + "learning_rate": 1.5272544225336646e-05, + "loss": 0.4913860261440277, + "step": 1401 + }, + { + "epoch": 0.3574706782253952, + "grad_norm": 0.4736967086791992, + "learning_rate": 1.5265376083310063e-05, + "loss": 0.4790980815887451, + "step": 1402 + }, + { + "epoch": 0.35772565017848035, + "grad_norm": 0.5233493447303772, + "learning_rate": 1.5258204196070136e-05, + "loss": 0.4792144298553467, + "step": 1403 + }, + { + "epoch": 0.3579806221315655, + "grad_norm": 0.45144742727279663, + "learning_rate": 1.5251028568718163e-05, + "loss": 0.4849224090576172, + "step": 1404 + }, + { + "epoch": 0.35823559408465067, + "grad_norm": 0.45398980379104614, + "learning_rate": 1.5243849206358099e-05, + "loss": 0.4965559244155884, + "step": 1405 + }, + { + "epoch": 0.3584905660377358, + "grad_norm": 0.49643367528915405, + "learning_rate": 1.5236666114096562e-05, + "loss": 0.4921931028366089, + "step": 1406 + }, + { + "epoch": 0.358745537990821, + "grad_norm": 0.4721378982067108, + "learning_rate": 1.5229479297042825e-05, + "loss": 0.49166640639305115, + "step": 1407 + }, + { + "epoch": 0.35900050994390614, + "grad_norm": 0.4492199718952179, + "learning_rate": 1.5222288760308792e-05, + "loss": 0.4912944734096527, + "step": 1408 + }, + { + "epoch": 0.35925548189699136, + "grad_norm": 0.4733271598815918, + "learning_rate": 1.521509450900903e-05, + "loss": 0.48487481474876404, + "step": 1409 + }, + { + "epoch": 0.3595104538500765, + "grad_norm": 0.45325836539268494, + "learning_rate": 1.520789654826075e-05, + "loss": 0.4862060546875, + "step": 1410 + }, + { + "epoch": 0.3597654258031617, + "grad_norm": 0.43667829036712646, + "learning_rate": 1.5200694883183787e-05, + "loss": 0.49290984869003296, + "step": 1411 + }, + { + "epoch": 0.36002039775624683, + "grad_norm": 0.4515577256679535, + "learning_rate": 1.519348951890062e-05, + "loss": 0.4928954541683197, + "step": 1412 + }, + { + "epoch": 0.360275369709332, + "grad_norm": 0.4574653208255768, + "learning_rate": 1.5186280460536358e-05, + "loss": 0.49473699927330017, + "step": 1413 + }, + { + "epoch": 0.36053034166241715, + "grad_norm": 0.4594244062900543, + "learning_rate": 1.5179067713218743e-05, + "loss": 0.4945780038833618, + "step": 1414 + }, + { + "epoch": 0.3607853136155023, + "grad_norm": 0.46252748370170593, + "learning_rate": 1.517185128207813e-05, + "loss": 0.47525572776794434, + "step": 1415 + }, + { + "epoch": 0.36104028556858747, + "grad_norm": 0.4991248846054077, + "learning_rate": 1.5164631172247501e-05, + "loss": 0.48477134108543396, + "step": 1416 + }, + { + "epoch": 0.36129525752167263, + "grad_norm": 0.45575839281082153, + "learning_rate": 1.5157407388862452e-05, + "loss": 0.4752116799354553, + "step": 1417 + }, + { + "epoch": 0.3615502294747578, + "grad_norm": 0.48317432403564453, + "learning_rate": 1.5150179937061196e-05, + "loss": 0.48912566900253296, + "step": 1418 + }, + { + "epoch": 0.36180520142784295, + "grad_norm": 0.7431215643882751, + "learning_rate": 1.5142948821984553e-05, + "loss": 0.4992019534111023, + "step": 1419 + }, + { + "epoch": 0.3620601733809281, + "grad_norm": 0.45986902713775635, + "learning_rate": 1.513571404877595e-05, + "loss": 0.498704195022583, + "step": 1420 + }, + { + "epoch": 0.36231514533401327, + "grad_norm": 0.457722544670105, + "learning_rate": 1.5128475622581404e-05, + "loss": 0.48822641372680664, + "step": 1421 + }, + { + "epoch": 0.3625701172870984, + "grad_norm": 0.4633914828300476, + "learning_rate": 1.512123354854955e-05, + "loss": 0.4874158799648285, + "step": 1422 + }, + { + "epoch": 0.3628250892401836, + "grad_norm": 0.48305845260620117, + "learning_rate": 1.5113987831831609e-05, + "loss": 0.49463486671447754, + "step": 1423 + }, + { + "epoch": 0.36308006119326874, + "grad_norm": 0.44547805190086365, + "learning_rate": 1.5106738477581388e-05, + "loss": 0.4956837296485901, + "step": 1424 + }, + { + "epoch": 0.3633350331463539, + "grad_norm": 0.4877088963985443, + "learning_rate": 1.5099485490955288e-05, + "loss": 0.47981151938438416, + "step": 1425 + }, + { + "epoch": 0.36359000509943906, + "grad_norm": 0.44127556681632996, + "learning_rate": 1.509222887711229e-05, + "loss": 0.4929065704345703, + "step": 1426 + }, + { + "epoch": 0.3638449770525242, + "grad_norm": 0.47561880946159363, + "learning_rate": 1.5084968641213958e-05, + "loss": 0.48719948530197144, + "step": 1427 + }, + { + "epoch": 0.3640999490056094, + "grad_norm": 0.4477303922176361, + "learning_rate": 1.507770478842443e-05, + "loss": 0.4831518828868866, + "step": 1428 + }, + { + "epoch": 0.36435492095869454, + "grad_norm": 0.45889630913734436, + "learning_rate": 1.5070437323910414e-05, + "loss": 0.4897419810295105, + "step": 1429 + }, + { + "epoch": 0.3646098929117797, + "grad_norm": 0.46235835552215576, + "learning_rate": 1.5063166252841197e-05, + "loss": 0.48954832553863525, + "step": 1430 + }, + { + "epoch": 0.36486486486486486, + "grad_norm": 0.4915541410446167, + "learning_rate": 1.5055891580388617e-05, + "loss": 0.49446234107017517, + "step": 1431 + }, + { + "epoch": 0.36511983681795, + "grad_norm": 0.44475996494293213, + "learning_rate": 1.5048613311727089e-05, + "loss": 0.489834725856781, + "step": 1432 + }, + { + "epoch": 0.3653748087710352, + "grad_norm": 0.5123434066772461, + "learning_rate": 1.5041331452033572e-05, + "loss": 0.48975151777267456, + "step": 1433 + }, + { + "epoch": 0.36562978072412033, + "grad_norm": 0.45602652430534363, + "learning_rate": 1.5034046006487591e-05, + "loss": 0.49672383069992065, + "step": 1434 + }, + { + "epoch": 0.3658847526772055, + "grad_norm": 0.5002124905586243, + "learning_rate": 1.502675698027121e-05, + "loss": 0.48555001616477966, + "step": 1435 + }, + { + "epoch": 0.36613972463029065, + "grad_norm": 0.46187663078308105, + "learning_rate": 1.5019464378569054e-05, + "loss": 0.4871925115585327, + "step": 1436 + }, + { + "epoch": 0.3663946965833758, + "grad_norm": 0.4582308232784271, + "learning_rate": 1.501216820656827e-05, + "loss": 0.4774112105369568, + "step": 1437 + }, + { + "epoch": 0.36664966853646097, + "grad_norm": 0.44312283396720886, + "learning_rate": 1.5004868469458572e-05, + "loss": 0.4840548038482666, + "step": 1438 + }, + { + "epoch": 0.36690464048954613, + "grad_norm": 0.4665316939353943, + "learning_rate": 1.4997565172432189e-05, + "loss": 0.5036348700523376, + "step": 1439 + }, + { + "epoch": 0.3671596124426313, + "grad_norm": 0.4862591326236725, + "learning_rate": 1.4990258320683888e-05, + "loss": 0.48736369609832764, + "step": 1440 + }, + { + "epoch": 0.36741458439571645, + "grad_norm": 0.4475901424884796, + "learning_rate": 1.4982947919410966e-05, + "loss": 0.4928306043148041, + "step": 1441 + }, + { + "epoch": 0.3676695563488016, + "grad_norm": 0.4491722285747528, + "learning_rate": 1.4975633973813244e-05, + "loss": 0.47156596183776855, + "step": 1442 + }, + { + "epoch": 0.36792452830188677, + "grad_norm": 0.4628240168094635, + "learning_rate": 1.4968316489093067e-05, + "loss": 0.5090243220329285, + "step": 1443 + }, + { + "epoch": 0.3681795002549719, + "grad_norm": 0.4485695958137512, + "learning_rate": 1.4960995470455288e-05, + "loss": 0.5036436319351196, + "step": 1444 + }, + { + "epoch": 0.36843447220805714, + "grad_norm": 0.439031720161438, + "learning_rate": 1.4953670923107284e-05, + "loss": 0.4707303047180176, + "step": 1445 + }, + { + "epoch": 0.3686894441611423, + "grad_norm": 0.4461120069026947, + "learning_rate": 1.4946342852258937e-05, + "loss": 0.4881828725337982, + "step": 1446 + }, + { + "epoch": 0.36894441611422746, + "grad_norm": 0.5070061683654785, + "learning_rate": 1.4939011263122635e-05, + "loss": 0.49580106139183044, + "step": 1447 + }, + { + "epoch": 0.3691993880673126, + "grad_norm": 0.4657633900642395, + "learning_rate": 1.4931676160913276e-05, + "loss": 0.48893028497695923, + "step": 1448 + }, + { + "epoch": 0.3694543600203978, + "grad_norm": 0.44490692019462585, + "learning_rate": 1.4924337550848238e-05, + "loss": 0.4781566262245178, + "step": 1449 + }, + { + "epoch": 0.36970933197348294, + "grad_norm": 0.4616687595844269, + "learning_rate": 1.4916995438147417e-05, + "loss": 0.4804666042327881, + "step": 1450 + }, + { + "epoch": 0.3699643039265681, + "grad_norm": 0.49199023842811584, + "learning_rate": 1.4909649828033188e-05, + "loss": 0.49582383036613464, + "step": 1451 + }, + { + "epoch": 0.37021927587965325, + "grad_norm": 0.4535580277442932, + "learning_rate": 1.4902300725730414e-05, + "loss": 0.4697763919830322, + "step": 1452 + }, + { + "epoch": 0.3704742478327384, + "grad_norm": 0.5966619253158569, + "learning_rate": 1.489494813646644e-05, + "loss": 0.48284289240837097, + "step": 1453 + }, + { + "epoch": 0.37072921978582357, + "grad_norm": 0.4896046817302704, + "learning_rate": 1.48875920654711e-05, + "loss": 0.4833661913871765, + "step": 1454 + }, + { + "epoch": 0.37098419173890873, + "grad_norm": 0.4610314965248108, + "learning_rate": 1.48802325179767e-05, + "loss": 0.48917660117149353, + "step": 1455 + }, + { + "epoch": 0.3712391636919939, + "grad_norm": 0.44801920652389526, + "learning_rate": 1.4872869499218015e-05, + "loss": 0.48730477690696716, + "step": 1456 + }, + { + "epoch": 0.37149413564507905, + "grad_norm": 0.47437381744384766, + "learning_rate": 1.4865503014432291e-05, + "loss": 0.4959720969200134, + "step": 1457 + }, + { + "epoch": 0.3717491075981642, + "grad_norm": 0.4891728162765503, + "learning_rate": 1.4858133068859245e-05, + "loss": 0.4845280349254608, + "step": 1458 + }, + { + "epoch": 0.37200407955124937, + "grad_norm": 0.46612614393234253, + "learning_rate": 1.4850759667741046e-05, + "loss": 0.4805614948272705, + "step": 1459 + }, + { + "epoch": 0.3722590515043345, + "grad_norm": 0.4583685100078583, + "learning_rate": 1.4843382816322331e-05, + "loss": 0.47898387908935547, + "step": 1460 + }, + { + "epoch": 0.3725140234574197, + "grad_norm": 0.4775019586086273, + "learning_rate": 1.4836002519850183e-05, + "loss": 0.47897419333457947, + "step": 1461 + }, + { + "epoch": 0.37276899541050484, + "grad_norm": 0.46514609456062317, + "learning_rate": 1.482861878357414e-05, + "loss": 0.4846462607383728, + "step": 1462 + }, + { + "epoch": 0.37302396736359, + "grad_norm": 0.4577699303627014, + "learning_rate": 1.4821231612746185e-05, + "loss": 0.4773539900779724, + "step": 1463 + }, + { + "epoch": 0.37327893931667516, + "grad_norm": 0.47564896941185, + "learning_rate": 1.4813841012620746e-05, + "loss": 0.48534271121025085, + "step": 1464 + }, + { + "epoch": 0.3735339112697603, + "grad_norm": 0.4468478858470917, + "learning_rate": 1.4806446988454683e-05, + "loss": 0.4761880040168762, + "step": 1465 + }, + { + "epoch": 0.3737888832228455, + "grad_norm": 0.4458772838115692, + "learning_rate": 1.4799049545507305e-05, + "loss": 0.47753477096557617, + "step": 1466 + }, + { + "epoch": 0.37404385517593064, + "grad_norm": 0.4572882354259491, + "learning_rate": 1.479164868904034e-05, + "loss": 0.4821872115135193, + "step": 1467 + }, + { + "epoch": 0.3742988271290158, + "grad_norm": 0.4762170910835266, + "learning_rate": 1.478424442431795e-05, + "loss": 0.47601696848869324, + "step": 1468 + }, + { + "epoch": 0.37455379908210096, + "grad_norm": 0.4635324776172638, + "learning_rate": 1.477683675660672e-05, + "loss": 0.4912150502204895, + "step": 1469 + }, + { + "epoch": 0.3748087710351861, + "grad_norm": 0.43993592262268066, + "learning_rate": 1.4769425691175652e-05, + "loss": 0.47345566749572754, + "step": 1470 + }, + { + "epoch": 0.3750637429882713, + "grad_norm": 0.4620898962020874, + "learning_rate": 1.4762011233296174e-05, + "loss": 0.4752131402492523, + "step": 1471 + }, + { + "epoch": 0.37531871494135643, + "grad_norm": 0.46631693840026855, + "learning_rate": 1.4754593388242117e-05, + "loss": 0.49605101346969604, + "step": 1472 + }, + { + "epoch": 0.3755736868944416, + "grad_norm": 0.43644899129867554, + "learning_rate": 1.4747172161289732e-05, + "loss": 0.4809049367904663, + "step": 1473 + }, + { + "epoch": 0.37582865884752675, + "grad_norm": 0.4631332457065582, + "learning_rate": 1.473974755771766e-05, + "loss": 0.4931192398071289, + "step": 1474 + }, + { + "epoch": 0.3760836308006119, + "grad_norm": 0.4428750276565552, + "learning_rate": 1.473231958280696e-05, + "loss": 0.48310884833335876, + "step": 1475 + }, + { + "epoch": 0.37633860275369707, + "grad_norm": 0.4648493230342865, + "learning_rate": 1.472488824184108e-05, + "loss": 0.47687384486198425, + "step": 1476 + }, + { + "epoch": 0.37659357470678223, + "grad_norm": 0.47747692465782166, + "learning_rate": 1.4717453540105862e-05, + "loss": 0.49597570300102234, + "step": 1477 + }, + { + "epoch": 0.3768485466598674, + "grad_norm": 0.45823588967323303, + "learning_rate": 1.4710015482889544e-05, + "loss": 0.4796806573867798, + "step": 1478 + }, + { + "epoch": 0.37710351861295255, + "grad_norm": 0.4600730538368225, + "learning_rate": 1.4702574075482746e-05, + "loss": 0.48539218306541443, + "step": 1479 + }, + { + "epoch": 0.37735849056603776, + "grad_norm": 0.4480803906917572, + "learning_rate": 1.469512932317847e-05, + "loss": 0.488616406917572, + "step": 1480 + }, + { + "epoch": 0.3776134625191229, + "grad_norm": 0.48798975348472595, + "learning_rate": 1.4687681231272104e-05, + "loss": 0.4985082149505615, + "step": 1481 + }, + { + "epoch": 0.3778684344722081, + "grad_norm": 0.45104506611824036, + "learning_rate": 1.4680229805061399e-05, + "loss": 0.4792388379573822, + "step": 1482 + }, + { + "epoch": 0.37812340642529324, + "grad_norm": 0.44524678587913513, + "learning_rate": 1.4672775049846498e-05, + "loss": 0.47451266646385193, + "step": 1483 + }, + { + "epoch": 0.3783783783783784, + "grad_norm": 0.46526607871055603, + "learning_rate": 1.4665316970929888e-05, + "loss": 0.48592525720596313, + "step": 1484 + }, + { + "epoch": 0.37863335033146356, + "grad_norm": 0.4819578230381012, + "learning_rate": 1.4657855573616437e-05, + "loss": 0.4838479459285736, + "step": 1485 + }, + { + "epoch": 0.3788883222845487, + "grad_norm": 0.44666457176208496, + "learning_rate": 1.4650390863213366e-05, + "loss": 0.49645447731018066, + "step": 1486 + }, + { + "epoch": 0.3791432942376339, + "grad_norm": 0.45488491654396057, + "learning_rate": 1.4642922845030258e-05, + "loss": 0.4942684769630432, + "step": 1487 + }, + { + "epoch": 0.37939826619071904, + "grad_norm": 0.47846585512161255, + "learning_rate": 1.4635451524379043e-05, + "loss": 0.48699718713760376, + "step": 1488 + }, + { + "epoch": 0.3796532381438042, + "grad_norm": 0.46078893542289734, + "learning_rate": 1.4627976906573999e-05, + "loss": 0.48280471563339233, + "step": 1489 + }, + { + "epoch": 0.37990821009688935, + "grad_norm": 0.4552493989467621, + "learning_rate": 1.462049899693176e-05, + "loss": 0.48949164152145386, + "step": 1490 + }, + { + "epoch": 0.3801631820499745, + "grad_norm": 0.4367724061012268, + "learning_rate": 1.4613017800771286e-05, + "loss": 0.49780935049057007, + "step": 1491 + }, + { + "epoch": 0.38041815400305967, + "grad_norm": 0.4415394961833954, + "learning_rate": 1.4605533323413887e-05, + "loss": 0.5012547969818115, + "step": 1492 + }, + { + "epoch": 0.38067312595614483, + "grad_norm": 0.45368072390556335, + "learning_rate": 1.4598045570183203e-05, + "loss": 0.47883474826812744, + "step": 1493 + }, + { + "epoch": 0.38092809790923, + "grad_norm": 0.4611485004425049, + "learning_rate": 1.4590554546405203e-05, + "loss": 0.4865252673625946, + "step": 1494 + }, + { + "epoch": 0.38118306986231515, + "grad_norm": 0.47810161113739014, + "learning_rate": 1.4583060257408183e-05, + "loss": 0.47490066289901733, + "step": 1495 + }, + { + "epoch": 0.3814380418154003, + "grad_norm": 0.45693275332450867, + "learning_rate": 1.4575562708522761e-05, + "loss": 0.4761950671672821, + "step": 1496 + }, + { + "epoch": 0.38169301376848547, + "grad_norm": 0.4426383972167969, + "learning_rate": 1.4568061905081874e-05, + "loss": 0.4906729459762573, + "step": 1497 + }, + { + "epoch": 0.3819479857215706, + "grad_norm": 0.45328155159950256, + "learning_rate": 1.4560557852420773e-05, + "loss": 0.4902084767818451, + "step": 1498 + }, + { + "epoch": 0.3822029576746558, + "grad_norm": 0.4416186511516571, + "learning_rate": 1.4553050555877026e-05, + "loss": 0.5001434087753296, + "step": 1499 + }, + { + "epoch": 0.38245792962774094, + "grad_norm": 0.44836804270744324, + "learning_rate": 1.45455400207905e-05, + "loss": 0.49423956871032715, + "step": 1500 + }, + { + "epoch": 0.3827129015808261, + "grad_norm": 0.4632645547389984, + "learning_rate": 1.4538026252503367e-05, + "loss": 0.47067809104919434, + "step": 1501 + }, + { + "epoch": 0.38296787353391126, + "grad_norm": 0.4599841833114624, + "learning_rate": 1.4530509256360104e-05, + "loss": 0.4771389365196228, + "step": 1502 + }, + { + "epoch": 0.3832228454869964, + "grad_norm": 0.4621497094631195, + "learning_rate": 1.4522989037707482e-05, + "loss": 0.49022993445396423, + "step": 1503 + }, + { + "epoch": 0.3834778174400816, + "grad_norm": 0.4501090347766876, + "learning_rate": 1.4515465601894561e-05, + "loss": 0.5076841115951538, + "step": 1504 + }, + { + "epoch": 0.38373278939316674, + "grad_norm": 0.4534502923488617, + "learning_rate": 1.4507938954272693e-05, + "loss": 0.4904021918773651, + "step": 1505 + }, + { + "epoch": 0.3839877613462519, + "grad_norm": 0.46439942717552185, + "learning_rate": 1.450040910019551e-05, + "loss": 0.4915975332260132, + "step": 1506 + }, + { + "epoch": 0.38424273329933706, + "grad_norm": 0.47011348605155945, + "learning_rate": 1.449287604501893e-05, + "loss": 0.4765165448188782, + "step": 1507 + }, + { + "epoch": 0.3844977052524222, + "grad_norm": 0.45521828532218933, + "learning_rate": 1.4485339794101144e-05, + "loss": 0.49013710021972656, + "step": 1508 + }, + { + "epoch": 0.3847526772055074, + "grad_norm": 0.4601080119609833, + "learning_rate": 1.4477800352802623e-05, + "loss": 0.47809481620788574, + "step": 1509 + }, + { + "epoch": 0.38500764915859254, + "grad_norm": 0.462210088968277, + "learning_rate": 1.447025772648609e-05, + "loss": 0.48346203565597534, + "step": 1510 + }, + { + "epoch": 0.3852626211116777, + "grad_norm": 0.4716843366622925, + "learning_rate": 1.4462711920516557e-05, + "loss": 0.4802989363670349, + "step": 1511 + }, + { + "epoch": 0.38551759306476285, + "grad_norm": 0.6230674386024475, + "learning_rate": 1.4455162940261284e-05, + "loss": 0.47767776250839233, + "step": 1512 + }, + { + "epoch": 0.385772565017848, + "grad_norm": 0.44257497787475586, + "learning_rate": 1.4447610791089785e-05, + "loss": 0.47903773188591003, + "step": 1513 + }, + { + "epoch": 0.38602753697093317, + "grad_norm": 0.4705200493335724, + "learning_rate": 1.4440055478373839e-05, + "loss": 0.4785939157009125, + "step": 1514 + }, + { + "epoch": 0.38628250892401833, + "grad_norm": 0.4540731906890869, + "learning_rate": 1.443249700748747e-05, + "loss": 0.47946688532829285, + "step": 1515 + }, + { + "epoch": 0.38653748087710355, + "grad_norm": 0.4631092846393585, + "learning_rate": 1.4424935383806946e-05, + "loss": 0.48664236068725586, + "step": 1516 + }, + { + "epoch": 0.3867924528301887, + "grad_norm": 0.4636557102203369, + "learning_rate": 1.441737061271078e-05, + "loss": 0.4810870289802551, + "step": 1517 + }, + { + "epoch": 0.38704742478327386, + "grad_norm": 0.4769573509693146, + "learning_rate": 1.4409802699579725e-05, + "loss": 0.4827399253845215, + "step": 1518 + }, + { + "epoch": 0.387302396736359, + "grad_norm": 0.46991199254989624, + "learning_rate": 1.4402231649796769e-05, + "loss": 0.5055210590362549, + "step": 1519 + }, + { + "epoch": 0.3875573686894442, + "grad_norm": 0.47453632950782776, + "learning_rate": 1.4394657468747129e-05, + "loss": 0.48708269000053406, + "step": 1520 + }, + { + "epoch": 0.38781234064252934, + "grad_norm": 0.44457998871803284, + "learning_rate": 1.438708016181825e-05, + "loss": 0.4817838668823242, + "step": 1521 + }, + { + "epoch": 0.3880673125956145, + "grad_norm": 0.45942017436027527, + "learning_rate": 1.4379499734399797e-05, + "loss": 0.4798167943954468, + "step": 1522 + }, + { + "epoch": 0.38832228454869966, + "grad_norm": 0.46707138419151306, + "learning_rate": 1.4371916191883663e-05, + "loss": 0.46871715784072876, + "step": 1523 + }, + { + "epoch": 0.3885772565017848, + "grad_norm": 0.4826716184616089, + "learning_rate": 1.4364329539663949e-05, + "loss": 0.4892393946647644, + "step": 1524 + }, + { + "epoch": 0.38883222845487, + "grad_norm": 0.4482455849647522, + "learning_rate": 1.4356739783136972e-05, + "loss": 0.46966874599456787, + "step": 1525 + }, + { + "epoch": 0.38908720040795514, + "grad_norm": 0.4489781856536865, + "learning_rate": 1.4349146927701252e-05, + "loss": 0.47867918014526367, + "step": 1526 + }, + { + "epoch": 0.3893421723610403, + "grad_norm": 0.47932168841362, + "learning_rate": 1.4341550978757521e-05, + "loss": 0.4873637557029724, + "step": 1527 + }, + { + "epoch": 0.38959714431412545, + "grad_norm": 0.44425198435783386, + "learning_rate": 1.4333951941708706e-05, + "loss": 0.4994814395904541, + "step": 1528 + }, + { + "epoch": 0.3898521162672106, + "grad_norm": 0.45439931750297546, + "learning_rate": 1.4326349821959933e-05, + "loss": 0.47517848014831543, + "step": 1529 + }, + { + "epoch": 0.3901070882202958, + "grad_norm": 0.4534931182861328, + "learning_rate": 1.4318744624918514e-05, + "loss": 0.48021823167800903, + "step": 1530 + }, + { + "epoch": 0.39036206017338093, + "grad_norm": 0.583962082862854, + "learning_rate": 1.4311136355993966e-05, + "loss": 0.49174126982688904, + "step": 1531 + }, + { + "epoch": 0.3906170321264661, + "grad_norm": 0.46215638518333435, + "learning_rate": 1.430352502059797e-05, + "loss": 0.4798983037471771, + "step": 1532 + }, + { + "epoch": 0.39087200407955125, + "grad_norm": 0.5120609998703003, + "learning_rate": 1.4295910624144402e-05, + "loss": 0.4900760054588318, + "step": 1533 + }, + { + "epoch": 0.3911269760326364, + "grad_norm": 0.45972445607185364, + "learning_rate": 1.4288293172049313e-05, + "loss": 0.47815102338790894, + "step": 1534 + }, + { + "epoch": 0.39138194798572157, + "grad_norm": 0.4667312800884247, + "learning_rate": 1.4280672669730927e-05, + "loss": 0.496964693069458, + "step": 1535 + }, + { + "epoch": 0.3916369199388067, + "grad_norm": 0.47719672322273254, + "learning_rate": 1.4273049122609633e-05, + "loss": 0.48423081636428833, + "step": 1536 + }, + { + "epoch": 0.3918918918918919, + "grad_norm": 0.48460742831230164, + "learning_rate": 1.4265422536107994e-05, + "loss": 0.48138755559921265, + "step": 1537 + }, + { + "epoch": 0.39214686384497704, + "grad_norm": 0.5079189538955688, + "learning_rate": 1.4257792915650728e-05, + "loss": 0.4881475567817688, + "step": 1538 + }, + { + "epoch": 0.3924018357980622, + "grad_norm": 0.4590676724910736, + "learning_rate": 1.4250160266664712e-05, + "loss": 0.49422332644462585, + "step": 1539 + }, + { + "epoch": 0.39265680775114736, + "grad_norm": 0.4414307177066803, + "learning_rate": 1.4242524594578984e-05, + "loss": 0.48674750328063965, + "step": 1540 + }, + { + "epoch": 0.3929117797042325, + "grad_norm": 0.4570107161998749, + "learning_rate": 1.4234885904824725e-05, + "loss": 0.48515844345092773, + "step": 1541 + }, + { + "epoch": 0.3931667516573177, + "grad_norm": 0.47521156072616577, + "learning_rate": 1.4227244202835258e-05, + "loss": 0.4922315776348114, + "step": 1542 + }, + { + "epoch": 0.39342172361040284, + "grad_norm": 0.4279789328575134, + "learning_rate": 1.4219599494046062e-05, + "loss": 0.47620928287506104, + "step": 1543 + }, + { + "epoch": 0.393676695563488, + "grad_norm": 0.47957128286361694, + "learning_rate": 1.4211951783894749e-05, + "loss": 0.47388386726379395, + "step": 1544 + }, + { + "epoch": 0.39393166751657316, + "grad_norm": 0.45006492733955383, + "learning_rate": 1.4204301077821059e-05, + "loss": 0.46949005126953125, + "step": 1545 + }, + { + "epoch": 0.3941866394696583, + "grad_norm": 0.5271633863449097, + "learning_rate": 1.419664738126687e-05, + "loss": 0.49612054228782654, + "step": 1546 + }, + { + "epoch": 0.3944416114227435, + "grad_norm": 0.45255032181739807, + "learning_rate": 1.4188990699676186e-05, + "loss": 0.481834352016449, + "step": 1547 + }, + { + "epoch": 0.39469658337582864, + "grad_norm": 0.46046289801597595, + "learning_rate": 1.4181331038495136e-05, + "loss": 0.4820711016654968, + "step": 1548 + }, + { + "epoch": 0.3949515553289138, + "grad_norm": 0.4877573549747467, + "learning_rate": 1.4173668403171962e-05, + "loss": 0.48516762256622314, + "step": 1549 + }, + { + "epoch": 0.39520652728199895, + "grad_norm": 0.49049946665763855, + "learning_rate": 1.4166002799157032e-05, + "loss": 0.48174232244491577, + "step": 1550 + }, + { + "epoch": 0.39546149923508417, + "grad_norm": 0.46354466676712036, + "learning_rate": 1.4158334231902812e-05, + "loss": 0.47682464122772217, + "step": 1551 + }, + { + "epoch": 0.3957164711881693, + "grad_norm": 0.49040186405181885, + "learning_rate": 1.415066270686389e-05, + "loss": 0.48382410407066345, + "step": 1552 + }, + { + "epoch": 0.3959714431412545, + "grad_norm": 0.43498021364212036, + "learning_rate": 1.4142988229496945e-05, + "loss": 0.49451208114624023, + "step": 1553 + }, + { + "epoch": 0.39622641509433965, + "grad_norm": 0.5171186923980713, + "learning_rate": 1.4135310805260766e-05, + "loss": 0.48215898871421814, + "step": 1554 + }, + { + "epoch": 0.3964813870474248, + "grad_norm": 0.4854249954223633, + "learning_rate": 1.4127630439616231e-05, + "loss": 0.48182177543640137, + "step": 1555 + }, + { + "epoch": 0.39673635900050996, + "grad_norm": 0.4441761374473572, + "learning_rate": 1.4119947138026313e-05, + "loss": 0.47478580474853516, + "step": 1556 + }, + { + "epoch": 0.3969913309535951, + "grad_norm": 0.4775533974170685, + "learning_rate": 1.4112260905956079e-05, + "loss": 0.47552570700645447, + "step": 1557 + }, + { + "epoch": 0.3972463029066803, + "grad_norm": 0.5103408694267273, + "learning_rate": 1.4104571748872667e-05, + "loss": 0.502683162689209, + "step": 1558 + }, + { + "epoch": 0.39750127485976544, + "grad_norm": 0.4726421535015106, + "learning_rate": 1.4096879672245305e-05, + "loss": 0.47500142455101013, + "step": 1559 + }, + { + "epoch": 0.3977562468128506, + "grad_norm": 0.4710962176322937, + "learning_rate": 1.4089184681545303e-05, + "loss": 0.49768611788749695, + "step": 1560 + }, + { + "epoch": 0.39801121876593576, + "grad_norm": 0.509074330329895, + "learning_rate": 1.4081486782246027e-05, + "loss": 0.4758099913597107, + "step": 1561 + }, + { + "epoch": 0.3982661907190209, + "grad_norm": 0.4465167224407196, + "learning_rate": 1.4073785979822929e-05, + "loss": 0.49819037318229675, + "step": 1562 + }, + { + "epoch": 0.3985211626721061, + "grad_norm": 0.4520632326602936, + "learning_rate": 1.4066082279753515e-05, + "loss": 0.4983295798301697, + "step": 1563 + }, + { + "epoch": 0.39877613462519124, + "grad_norm": 0.46213945746421814, + "learning_rate": 1.4058375687517355e-05, + "loss": 0.46227526664733887, + "step": 1564 + }, + { + "epoch": 0.3990311065782764, + "grad_norm": 0.4812057614326477, + "learning_rate": 1.405066620859608e-05, + "loss": 0.48829787969589233, + "step": 1565 + }, + { + "epoch": 0.39928607853136155, + "grad_norm": 0.7864221930503845, + "learning_rate": 1.4042953848473368e-05, + "loss": 0.4807780683040619, + "step": 1566 + }, + { + "epoch": 0.3995410504844467, + "grad_norm": 0.46031373739242554, + "learning_rate": 1.4035238612634951e-05, + "loss": 0.49220967292785645, + "step": 1567 + }, + { + "epoch": 0.3997960224375319, + "grad_norm": 0.478533536195755, + "learning_rate": 1.4027520506568605e-05, + "loss": 0.4768020808696747, + "step": 1568 + }, + { + "epoch": 0.40005099439061703, + "grad_norm": 0.45310717821121216, + "learning_rate": 1.401979953576415e-05, + "loss": 0.4713541865348816, + "step": 1569 + }, + { + "epoch": 0.4003059663437022, + "grad_norm": 0.4644032120704651, + "learning_rate": 1.4012075705713434e-05, + "loss": 0.4951431155204773, + "step": 1570 + }, + { + "epoch": 0.40056093829678735, + "grad_norm": 0.4290669858455658, + "learning_rate": 1.4004349021910353e-05, + "loss": 0.47089534997940063, + "step": 1571 + }, + { + "epoch": 0.4008159102498725, + "grad_norm": 0.46978506445884705, + "learning_rate": 1.3996619489850822e-05, + "loss": 0.4897878170013428, + "step": 1572 + }, + { + "epoch": 0.40107088220295767, + "grad_norm": 0.4528856873512268, + "learning_rate": 1.398888711503279e-05, + "loss": 0.48548150062561035, + "step": 1573 + }, + { + "epoch": 0.4013258541560428, + "grad_norm": 0.4511377811431885, + "learning_rate": 1.3981151902956217e-05, + "loss": 0.4705016016960144, + "step": 1574 + }, + { + "epoch": 0.401580826109128, + "grad_norm": 0.43902209401130676, + "learning_rate": 1.3973413859123092e-05, + "loss": 0.48379310965538025, + "step": 1575 + }, + { + "epoch": 0.40183579806221315, + "grad_norm": 0.45791682600975037, + "learning_rate": 1.3965672989037417e-05, + "loss": 0.48469385504722595, + "step": 1576 + }, + { + "epoch": 0.4020907700152983, + "grad_norm": 0.4580422341823578, + "learning_rate": 1.3957929298205195e-05, + "loss": 0.48687800765037537, + "step": 1577 + }, + { + "epoch": 0.40234574196838346, + "grad_norm": 0.47267118096351624, + "learning_rate": 1.3950182792134446e-05, + "loss": 0.4877247214317322, + "step": 1578 + }, + { + "epoch": 0.4026007139214686, + "grad_norm": 0.4548337757587433, + "learning_rate": 1.3942433476335184e-05, + "loss": 0.48541364073753357, + "step": 1579 + }, + { + "epoch": 0.4028556858745538, + "grad_norm": 0.43207213282585144, + "learning_rate": 1.393468135631943e-05, + "loss": 0.4798010587692261, + "step": 1580 + }, + { + "epoch": 0.40311065782763894, + "grad_norm": 0.4571256637573242, + "learning_rate": 1.3926926437601197e-05, + "loss": 0.4921513497829437, + "step": 1581 + }, + { + "epoch": 0.4033656297807241, + "grad_norm": 0.48809128999710083, + "learning_rate": 1.3919168725696478e-05, + "loss": 0.48802515864372253, + "step": 1582 + }, + { + "epoch": 0.40362060173380926, + "grad_norm": 0.45787227153778076, + "learning_rate": 1.391140822612327e-05, + "loss": 0.49700865149497986, + "step": 1583 + }, + { + "epoch": 0.4038755736868944, + "grad_norm": 0.44923320412635803, + "learning_rate": 1.3903644944401545e-05, + "loss": 0.4944605529308319, + "step": 1584 + }, + { + "epoch": 0.4041305456399796, + "grad_norm": 0.45642563700675964, + "learning_rate": 1.3895878886053246e-05, + "loss": 0.4735734462738037, + "step": 1585 + }, + { + "epoch": 0.40438551759306474, + "grad_norm": 0.4567015767097473, + "learning_rate": 1.3888110056602308e-05, + "loss": 0.4728502035140991, + "step": 1586 + }, + { + "epoch": 0.40464048954614995, + "grad_norm": 0.4450572729110718, + "learning_rate": 1.388033846157462e-05, + "loss": 0.49534106254577637, + "step": 1587 + }, + { + "epoch": 0.4048954614992351, + "grad_norm": 0.44211024045944214, + "learning_rate": 1.3872564106498047e-05, + "loss": 0.47716912627220154, + "step": 1588 + }, + { + "epoch": 0.40515043345232027, + "grad_norm": 0.4511987268924713, + "learning_rate": 1.3864786996902423e-05, + "loss": 0.4985756278038025, + "step": 1589 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 0.4538654088973999, + "learning_rate": 1.3857007138319527e-05, + "loss": 0.4770917296409607, + "step": 1590 + }, + { + "epoch": 0.4056603773584906, + "grad_norm": 0.43509507179260254, + "learning_rate": 1.3849224536283097e-05, + "loss": 0.48660558462142944, + "step": 1591 + }, + { + "epoch": 0.40591534931157575, + "grad_norm": 0.4530210494995117, + "learning_rate": 1.3841439196328836e-05, + "loss": 0.49161213636398315, + "step": 1592 + }, + { + "epoch": 0.4061703212646609, + "grad_norm": 0.47141772508621216, + "learning_rate": 1.3833651123994379e-05, + "loss": 0.4738878905773163, + "step": 1593 + }, + { + "epoch": 0.40642529321774606, + "grad_norm": 0.46208295226097107, + "learning_rate": 1.3825860324819308e-05, + "loss": 0.48128652572631836, + "step": 1594 + }, + { + "epoch": 0.4066802651708312, + "grad_norm": 0.44921064376831055, + "learning_rate": 1.3818066804345151e-05, + "loss": 0.49665015935897827, + "step": 1595 + }, + { + "epoch": 0.4069352371239164, + "grad_norm": 0.47046971321105957, + "learning_rate": 1.3810270568115366e-05, + "loss": 0.46492284536361694, + "step": 1596 + }, + { + "epoch": 0.40719020907700154, + "grad_norm": 0.4615946412086487, + "learning_rate": 1.3802471621675337e-05, + "loss": 0.48955702781677246, + "step": 1597 + }, + { + "epoch": 0.4074451810300867, + "grad_norm": 0.46036478877067566, + "learning_rate": 1.3794669970572395e-05, + "loss": 0.48940813541412354, + "step": 1598 + }, + { + "epoch": 0.40770015298317186, + "grad_norm": 0.43279993534088135, + "learning_rate": 1.378686562035577e-05, + "loss": 0.480970561504364, + "step": 1599 + }, + { + "epoch": 0.407955124936257, + "grad_norm": 0.45972615480422974, + "learning_rate": 1.377905857657663e-05, + "loss": 0.4687748849391937, + "step": 1600 + }, + { + "epoch": 0.4082100968893422, + "grad_norm": 0.4757559895515442, + "learning_rate": 1.3771248844788055e-05, + "loss": 0.4859820306301117, + "step": 1601 + }, + { + "epoch": 0.40846506884242734, + "grad_norm": 0.4991218149662018, + "learning_rate": 1.3763436430545034e-05, + "loss": 0.47516581416130066, + "step": 1602 + }, + { + "epoch": 0.4087200407955125, + "grad_norm": 0.45847487449645996, + "learning_rate": 1.3755621339404459e-05, + "loss": 0.4729556441307068, + "step": 1603 + }, + { + "epoch": 0.40897501274859765, + "grad_norm": 0.4737587571144104, + "learning_rate": 1.374780357692514e-05, + "loss": 0.48022347688674927, + "step": 1604 + }, + { + "epoch": 0.4092299847016828, + "grad_norm": 0.4732692539691925, + "learning_rate": 1.3739983148667776e-05, + "loss": 0.4782615900039673, + "step": 1605 + }, + { + "epoch": 0.409484956654768, + "grad_norm": 0.4965715706348419, + "learning_rate": 1.3732160060194968e-05, + "loss": 0.49145591259002686, + "step": 1606 + }, + { + "epoch": 0.40973992860785313, + "grad_norm": 0.4683915376663208, + "learning_rate": 1.3724334317071198e-05, + "loss": 0.4661458730697632, + "step": 1607 + }, + { + "epoch": 0.4099949005609383, + "grad_norm": 0.47402238845825195, + "learning_rate": 1.3716505924862854e-05, + "loss": 0.4833154082298279, + "step": 1608 + }, + { + "epoch": 0.41024987251402345, + "grad_norm": 0.44059866666793823, + "learning_rate": 1.3708674889138196e-05, + "loss": 0.4715484082698822, + "step": 1609 + }, + { + "epoch": 0.4105048444671086, + "grad_norm": 0.4649759531021118, + "learning_rate": 1.3700841215467367e-05, + "loss": 0.4846433103084564, + "step": 1610 + }, + { + "epoch": 0.41075981642019377, + "grad_norm": 0.45463913679122925, + "learning_rate": 1.3693004909422386e-05, + "loss": 0.4861733615398407, + "step": 1611 + }, + { + "epoch": 0.4110147883732789, + "grad_norm": 0.49009451270103455, + "learning_rate": 1.3685165976577146e-05, + "loss": 0.47389039397239685, + "step": 1612 + }, + { + "epoch": 0.4112697603263641, + "grad_norm": 0.4492635130882263, + "learning_rate": 1.3677324422507408e-05, + "loss": 0.47067707777023315, + "step": 1613 + }, + { + "epoch": 0.41152473227944925, + "grad_norm": 0.47212710976600647, + "learning_rate": 1.3669480252790797e-05, + "loss": 0.47150108218193054, + "step": 1614 + }, + { + "epoch": 0.4117797042325344, + "grad_norm": 0.4461907148361206, + "learning_rate": 1.3661633473006793e-05, + "loss": 0.48159798979759216, + "step": 1615 + }, + { + "epoch": 0.41203467618561956, + "grad_norm": 0.4642240107059479, + "learning_rate": 1.3653784088736746e-05, + "loss": 0.48190754652023315, + "step": 1616 + }, + { + "epoch": 0.4122896481387047, + "grad_norm": 0.4726830720901489, + "learning_rate": 1.3645932105563846e-05, + "loss": 0.49468010663986206, + "step": 1617 + }, + { + "epoch": 0.4125446200917899, + "grad_norm": 0.4275886118412018, + "learning_rate": 1.363807752907314e-05, + "loss": 0.4874088168144226, + "step": 1618 + }, + { + "epoch": 0.41279959204487504, + "grad_norm": 0.6437248587608337, + "learning_rate": 1.363022036485151e-05, + "loss": 0.4754694402217865, + "step": 1619 + }, + { + "epoch": 0.4130545639979602, + "grad_norm": 0.46456652879714966, + "learning_rate": 1.3622360618487687e-05, + "loss": 0.48658570647239685, + "step": 1620 + }, + { + "epoch": 0.41330953595104536, + "grad_norm": 0.44917410612106323, + "learning_rate": 1.3614498295572238e-05, + "loss": 0.4856434762477875, + "step": 1621 + }, + { + "epoch": 0.4135645079041305, + "grad_norm": 0.4353286921977997, + "learning_rate": 1.3606633401697557e-05, + "loss": 0.49146074056625366, + "step": 1622 + }, + { + "epoch": 0.41381947985721573, + "grad_norm": 0.4655471742153168, + "learning_rate": 1.3598765942457872e-05, + "loss": 0.47687792778015137, + "step": 1623 + }, + { + "epoch": 0.4140744518103009, + "grad_norm": 0.4673038125038147, + "learning_rate": 1.3590895923449234e-05, + "loss": 0.45503830909729004, + "step": 1624 + }, + { + "epoch": 0.41432942376338605, + "grad_norm": 0.4729268252849579, + "learning_rate": 1.3583023350269513e-05, + "loss": 0.47268760204315186, + "step": 1625 + }, + { + "epoch": 0.4145843957164712, + "grad_norm": 0.49650296568870544, + "learning_rate": 1.3575148228518403e-05, + "loss": 0.46334347128868103, + "step": 1626 + }, + { + "epoch": 0.41483936766955637, + "grad_norm": 0.4445992112159729, + "learning_rate": 1.3567270563797399e-05, + "loss": 0.47191429138183594, + "step": 1627 + }, + { + "epoch": 0.41509433962264153, + "grad_norm": 0.4386600852012634, + "learning_rate": 1.3559390361709815e-05, + "loss": 0.49940285086631775, + "step": 1628 + }, + { + "epoch": 0.4153493115757267, + "grad_norm": 0.4637932777404785, + "learning_rate": 1.3551507627860763e-05, + "loss": 0.48494452238082886, + "step": 1629 + }, + { + "epoch": 0.41560428352881185, + "grad_norm": 0.46620863676071167, + "learning_rate": 1.3543622367857163e-05, + "loss": 0.4677116274833679, + "step": 1630 + }, + { + "epoch": 0.415859255481897, + "grad_norm": 0.4731232523918152, + "learning_rate": 1.353573458730772e-05, + "loss": 0.4884800910949707, + "step": 1631 + }, + { + "epoch": 0.41611422743498216, + "grad_norm": 0.5170514583587646, + "learning_rate": 1.3527844291822949e-05, + "loss": 0.48462021350860596, + "step": 1632 + }, + { + "epoch": 0.4163691993880673, + "grad_norm": 0.5338066816329956, + "learning_rate": 1.3519951487015139e-05, + "loss": 0.4877411723136902, + "step": 1633 + }, + { + "epoch": 0.4166241713411525, + "grad_norm": 0.6070787310600281, + "learning_rate": 1.351205617849837e-05, + "loss": 0.49505704641342163, + "step": 1634 + }, + { + "epoch": 0.41687914329423764, + "grad_norm": 0.44800975918769836, + "learning_rate": 1.3504158371888498e-05, + "loss": 0.47991299629211426, + "step": 1635 + }, + { + "epoch": 0.4171341152473228, + "grad_norm": 0.43597695231437683, + "learning_rate": 1.3496258072803166e-05, + "loss": 0.486041396856308, + "step": 1636 + }, + { + "epoch": 0.41738908720040796, + "grad_norm": 0.4952779710292816, + "learning_rate": 1.3488355286861783e-05, + "loss": 0.48333197832107544, + "step": 1637 + }, + { + "epoch": 0.4176440591534931, + "grad_norm": 0.4361210763454437, + "learning_rate": 1.3480450019685524e-05, + "loss": 0.46700620651245117, + "step": 1638 + }, + { + "epoch": 0.4178990311065783, + "grad_norm": 0.4386427700519562, + "learning_rate": 1.3472542276897334e-05, + "loss": 0.4709675908088684, + "step": 1639 + }, + { + "epoch": 0.41815400305966344, + "grad_norm": 0.47871455550193787, + "learning_rate": 1.3464632064121918e-05, + "loss": 0.48721468448638916, + "step": 1640 + }, + { + "epoch": 0.4184089750127486, + "grad_norm": 0.4435887634754181, + "learning_rate": 1.3456719386985736e-05, + "loss": 0.47918903827667236, + "step": 1641 + }, + { + "epoch": 0.41866394696583376, + "grad_norm": 0.4284169673919678, + "learning_rate": 1.3448804251117005e-05, + "loss": 0.47185349464416504, + "step": 1642 + }, + { + "epoch": 0.4189189189189189, + "grad_norm": 0.4468022882938385, + "learning_rate": 1.3440886662145684e-05, + "loss": 0.4968520998954773, + "step": 1643 + }, + { + "epoch": 0.4191738908720041, + "grad_norm": 0.4575154483318329, + "learning_rate": 1.3432966625703486e-05, + "loss": 0.48007091879844666, + "step": 1644 + }, + { + "epoch": 0.41942886282508923, + "grad_norm": 0.43185529112815857, + "learning_rate": 1.3425044147423853e-05, + "loss": 0.47791963815689087, + "step": 1645 + }, + { + "epoch": 0.4196838347781744, + "grad_norm": 0.4488309919834137, + "learning_rate": 1.3417119232941978e-05, + "loss": 0.4668620228767395, + "step": 1646 + }, + { + "epoch": 0.41993880673125955, + "grad_norm": 0.4566347301006317, + "learning_rate": 1.340919188789477e-05, + "loss": 0.49318307638168335, + "step": 1647 + }, + { + "epoch": 0.4201937786843447, + "grad_norm": 0.4377533793449402, + "learning_rate": 1.3401262117920885e-05, + "loss": 0.49428534507751465, + "step": 1648 + }, + { + "epoch": 0.42044875063742987, + "grad_norm": 0.4332873225212097, + "learning_rate": 1.3393329928660694e-05, + "loss": 0.48560136556625366, + "step": 1649 + }, + { + "epoch": 0.42070372259051503, + "grad_norm": 0.44632354378700256, + "learning_rate": 1.3385395325756285e-05, + "loss": 0.4737345576286316, + "step": 1650 + }, + { + "epoch": 0.4209586945436002, + "grad_norm": 0.44238245487213135, + "learning_rate": 1.3377458314851471e-05, + "loss": 0.47939157485961914, + "step": 1651 + }, + { + "epoch": 0.42121366649668535, + "grad_norm": 0.4579528272151947, + "learning_rate": 1.3369518901591771e-05, + "loss": 0.46565157175064087, + "step": 1652 + }, + { + "epoch": 0.4214686384497705, + "grad_norm": 0.4802386164665222, + "learning_rate": 1.3361577091624428e-05, + "loss": 0.4831387996673584, + "step": 1653 + }, + { + "epoch": 0.42172361040285566, + "grad_norm": 0.46119028329849243, + "learning_rate": 1.3353632890598362e-05, + "loss": 0.472695529460907, + "step": 1654 + }, + { + "epoch": 0.4219785823559408, + "grad_norm": 0.45306500792503357, + "learning_rate": 1.3345686304164219e-05, + "loss": 0.4780885577201843, + "step": 1655 + }, + { + "epoch": 0.422233554309026, + "grad_norm": 0.4374094009399414, + "learning_rate": 1.333773733797433e-05, + "loss": 0.4786366820335388, + "step": 1656 + }, + { + "epoch": 0.42248852626211114, + "grad_norm": 0.5166929364204407, + "learning_rate": 1.3329785997682722e-05, + "loss": 0.48381268978118896, + "step": 1657 + }, + { + "epoch": 0.42274349821519636, + "grad_norm": 0.4499305784702301, + "learning_rate": 1.3321832288945111e-05, + "loss": 0.49744629859924316, + "step": 1658 + }, + { + "epoch": 0.4229984701682815, + "grad_norm": 0.43170177936553955, + "learning_rate": 1.3313876217418893e-05, + "loss": 0.4850466251373291, + "step": 1659 + }, + { + "epoch": 0.4232534421213667, + "grad_norm": 0.46282199025154114, + "learning_rate": 1.3305917788763153e-05, + "loss": 0.47375819087028503, + "step": 1660 + }, + { + "epoch": 0.42350841407445183, + "grad_norm": 0.487903356552124, + "learning_rate": 1.3297957008638643e-05, + "loss": 0.46697551012039185, + "step": 1661 + }, + { + "epoch": 0.423763386027537, + "grad_norm": 0.4308168590068817, + "learning_rate": 1.3289993882707797e-05, + "loss": 0.4807865619659424, + "step": 1662 + }, + { + "epoch": 0.42401835798062215, + "grad_norm": 0.44673821330070496, + "learning_rate": 1.3282028416634706e-05, + "loss": 0.48854202032089233, + "step": 1663 + }, + { + "epoch": 0.4242733299337073, + "grad_norm": 0.44982901215553284, + "learning_rate": 1.3274060616085138e-05, + "loss": 0.4782748818397522, + "step": 1664 + }, + { + "epoch": 0.42452830188679247, + "grad_norm": 0.47188979387283325, + "learning_rate": 1.3266090486726517e-05, + "loss": 0.47001177072525024, + "step": 1665 + }, + { + "epoch": 0.42478327383987763, + "grad_norm": 0.4699896574020386, + "learning_rate": 1.3258118034227919e-05, + "loss": 0.46810004115104675, + "step": 1666 + }, + { + "epoch": 0.4250382457929628, + "grad_norm": 0.4989445209503174, + "learning_rate": 1.3250143264260073e-05, + "loss": 0.47544538974761963, + "step": 1667 + }, + { + "epoch": 0.42529321774604795, + "grad_norm": 0.45112210512161255, + "learning_rate": 1.3242166182495365e-05, + "loss": 0.4739255905151367, + "step": 1668 + }, + { + "epoch": 0.4255481896991331, + "grad_norm": 0.44104111194610596, + "learning_rate": 1.3234186794607818e-05, + "loss": 0.4756285548210144, + "step": 1669 + }, + { + "epoch": 0.42580316165221826, + "grad_norm": 0.47478458285331726, + "learning_rate": 1.3226205106273096e-05, + "loss": 0.47291168570518494, + "step": 1670 + }, + { + "epoch": 0.4260581336053034, + "grad_norm": 0.468546062707901, + "learning_rate": 1.32182211231685e-05, + "loss": 0.4908111095428467, + "step": 1671 + }, + { + "epoch": 0.4263131055583886, + "grad_norm": 0.4369455873966217, + "learning_rate": 1.3210234850972966e-05, + "loss": 0.484855979681015, + "step": 1672 + }, + { + "epoch": 0.42656807751147374, + "grad_norm": 0.4560834467411041, + "learning_rate": 1.320224629536705e-05, + "loss": 0.476165771484375, + "step": 1673 + }, + { + "epoch": 0.4268230494645589, + "grad_norm": 0.4688289165496826, + "learning_rate": 1.319425546203294e-05, + "loss": 0.4720986783504486, + "step": 1674 + }, + { + "epoch": 0.42707802141764406, + "grad_norm": 0.47327515482902527, + "learning_rate": 1.3186262356654446e-05, + "loss": 0.4842362403869629, + "step": 1675 + }, + { + "epoch": 0.4273329933707292, + "grad_norm": 0.45015227794647217, + "learning_rate": 1.3178266984916988e-05, + "loss": 0.46754440665245056, + "step": 1676 + }, + { + "epoch": 0.4275879653238144, + "grad_norm": 0.43983298540115356, + "learning_rate": 1.3170269352507596e-05, + "loss": 0.4953022003173828, + "step": 1677 + }, + { + "epoch": 0.42784293727689954, + "grad_norm": 0.44356074929237366, + "learning_rate": 1.3162269465114918e-05, + "loss": 0.4818527102470398, + "step": 1678 + }, + { + "epoch": 0.4280979092299847, + "grad_norm": 0.5327989459037781, + "learning_rate": 1.3154267328429195e-05, + "loss": 0.48241209983825684, + "step": 1679 + }, + { + "epoch": 0.42835288118306986, + "grad_norm": 0.46997660398483276, + "learning_rate": 1.3146262948142275e-05, + "loss": 0.4853352904319763, + "step": 1680 + }, + { + "epoch": 0.428607853136155, + "grad_norm": 0.4460082948207855, + "learning_rate": 1.31382563299476e-05, + "loss": 0.48609769344329834, + "step": 1681 + }, + { + "epoch": 0.4288628250892402, + "grad_norm": 0.4682973027229309, + "learning_rate": 1.3130247479540204e-05, + "loss": 0.47891318798065186, + "step": 1682 + }, + { + "epoch": 0.42911779704232533, + "grad_norm": 0.47408202290534973, + "learning_rate": 1.3122236402616704e-05, + "loss": 0.47502049803733826, + "step": 1683 + }, + { + "epoch": 0.4293727689954105, + "grad_norm": 0.4689554274082184, + "learning_rate": 1.3114223104875308e-05, + "loss": 0.47387951612472534, + "step": 1684 + }, + { + "epoch": 0.42962774094849565, + "grad_norm": 0.49953290820121765, + "learning_rate": 1.3106207592015805e-05, + "loss": 0.47314685583114624, + "step": 1685 + }, + { + "epoch": 0.4298827129015808, + "grad_norm": 0.4629705846309662, + "learning_rate": 1.309818986973955e-05, + "loss": 0.48018312454223633, + "step": 1686 + }, + { + "epoch": 0.43013768485466597, + "grad_norm": 0.4409901201725006, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.47948044538497925, + "step": 1687 + }, + { + "epoch": 0.43039265680775113, + "grad_norm": 0.4578230082988739, + "learning_rate": 1.3082147819750082e-05, + "loss": 0.48771414160728455, + "step": 1688 + }, + { + "epoch": 0.4306476287608363, + "grad_norm": 0.4722670018672943, + "learning_rate": 1.307412350344743e-05, + "loss": 0.47841060161590576, + "step": 1689 + }, + { + "epoch": 0.43090260071392145, + "grad_norm": 0.47902294993400574, + "learning_rate": 1.3066097000549144e-05, + "loss": 0.4815452992916107, + "step": 1690 + }, + { + "epoch": 0.4311575726670066, + "grad_norm": 0.45889562368392944, + "learning_rate": 1.3058068316764398e-05, + "loss": 0.4722667634487152, + "step": 1691 + }, + { + "epoch": 0.43141254462009176, + "grad_norm": 0.43147915601730347, + "learning_rate": 1.3050037457803923e-05, + "loss": 0.4807421565055847, + "step": 1692 + }, + { + "epoch": 0.4316675165731769, + "grad_norm": 0.46262240409851074, + "learning_rate": 1.3042004429379996e-05, + "loss": 0.47799575328826904, + "step": 1693 + }, + { + "epoch": 0.43192248852626214, + "grad_norm": 0.4444118142127991, + "learning_rate": 1.3033969237206435e-05, + "loss": 0.48812946677207947, + "step": 1694 + }, + { + "epoch": 0.4321774604793473, + "grad_norm": 0.45554131269454956, + "learning_rate": 1.3025931886998596e-05, + "loss": 0.4786272943019867, + "step": 1695 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 0.4367639422416687, + "learning_rate": 1.301789238447337e-05, + "loss": 0.4648439288139343, + "step": 1696 + }, + { + "epoch": 0.4326874043855176, + "grad_norm": 0.4478955864906311, + "learning_rate": 1.300985073534919e-05, + "loss": 0.4997819662094116, + "step": 1697 + }, + { + "epoch": 0.4329423763386028, + "grad_norm": 0.44023770093917847, + "learning_rate": 1.3001806945345996e-05, + "loss": 0.47148871421813965, + "step": 1698 + }, + { + "epoch": 0.43319734829168793, + "grad_norm": 0.43281856179237366, + "learning_rate": 1.2993761020185267e-05, + "loss": 0.4947243630886078, + "step": 1699 + }, + { + "epoch": 0.4334523202447731, + "grad_norm": 0.4664633274078369, + "learning_rate": 1.2985712965589994e-05, + "loss": 0.4891164302825928, + "step": 1700 + }, + { + "epoch": 0.43370729219785825, + "grad_norm": 0.44780081510543823, + "learning_rate": 1.2977662787284689e-05, + "loss": 0.4701750874519348, + "step": 1701 + }, + { + "epoch": 0.4339622641509434, + "grad_norm": 0.45525118708610535, + "learning_rate": 1.296961049099536e-05, + "loss": 0.47872576117515564, + "step": 1702 + }, + { + "epoch": 0.43421723610402857, + "grad_norm": 0.4881942570209503, + "learning_rate": 1.2961556082449536e-05, + "loss": 0.45850667357444763, + "step": 1703 + }, + { + "epoch": 0.43447220805711373, + "grad_norm": 0.4705426096916199, + "learning_rate": 1.2953499567376244e-05, + "loss": 0.4757862091064453, + "step": 1704 + }, + { + "epoch": 0.4347271800101989, + "grad_norm": 0.4563402533531189, + "learning_rate": 1.2945440951506007e-05, + "loss": 0.483933687210083, + "step": 1705 + }, + { + "epoch": 0.43498215196328405, + "grad_norm": 0.4657118320465088, + "learning_rate": 1.2937380240570845e-05, + "loss": 0.46637582778930664, + "step": 1706 + }, + { + "epoch": 0.4352371239163692, + "grad_norm": 0.48174551129341125, + "learning_rate": 1.292931744030427e-05, + "loss": 0.48372596502304077, + "step": 1707 + }, + { + "epoch": 0.43549209586945437, + "grad_norm": 0.46783116459846497, + "learning_rate": 1.2921252556441269e-05, + "loss": 0.47157931327819824, + "step": 1708 + }, + { + "epoch": 0.4357470678225395, + "grad_norm": 0.453797310590744, + "learning_rate": 1.2913185594718325e-05, + "loss": 0.4759908616542816, + "step": 1709 + }, + { + "epoch": 0.4360020397756247, + "grad_norm": 0.4898701012134552, + "learning_rate": 1.2905116560873396e-05, + "loss": 0.49279874563217163, + "step": 1710 + }, + { + "epoch": 0.43625701172870984, + "grad_norm": 0.44953569769859314, + "learning_rate": 1.2897045460645905e-05, + "loss": 0.47002968192100525, + "step": 1711 + }, + { + "epoch": 0.436511983681795, + "grad_norm": 0.453167587518692, + "learning_rate": 1.2888972299776754e-05, + "loss": 0.4644286334514618, + "step": 1712 + }, + { + "epoch": 0.43676695563488016, + "grad_norm": 0.43823450803756714, + "learning_rate": 1.288089708400831e-05, + "loss": 0.47578439116477966, + "step": 1713 + }, + { + "epoch": 0.4370219275879653, + "grad_norm": 0.4516087770462036, + "learning_rate": 1.2872819819084397e-05, + "loss": 0.46475034952163696, + "step": 1714 + }, + { + "epoch": 0.4372768995410505, + "grad_norm": 0.4563446044921875, + "learning_rate": 1.2864740510750298e-05, + "loss": 0.46562060713768005, + "step": 1715 + }, + { + "epoch": 0.43753187149413564, + "grad_norm": 0.4217168688774109, + "learning_rate": 1.285665916475275e-05, + "loss": 0.4905102252960205, + "step": 1716 + }, + { + "epoch": 0.4377868434472208, + "grad_norm": 0.4617106318473816, + "learning_rate": 1.2848575786839943e-05, + "loss": 0.4758516550064087, + "step": 1717 + }, + { + "epoch": 0.43804181540030596, + "grad_norm": 0.4274753928184509, + "learning_rate": 1.2840490382761507e-05, + "loss": 0.47411856055259705, + "step": 1718 + }, + { + "epoch": 0.4382967873533911, + "grad_norm": 0.4601130783557892, + "learning_rate": 1.2832402958268512e-05, + "loss": 0.4765852093696594, + "step": 1719 + }, + { + "epoch": 0.4385517593064763, + "grad_norm": 0.4270618259906769, + "learning_rate": 1.2824313519113473e-05, + "loss": 0.5004135370254517, + "step": 1720 + }, + { + "epoch": 0.43880673125956143, + "grad_norm": 0.4411335289478302, + "learning_rate": 1.2816222071050333e-05, + "loss": 0.49290788173675537, + "step": 1721 + }, + { + "epoch": 0.4390617032126466, + "grad_norm": 0.4671216607093811, + "learning_rate": 1.280812861983446e-05, + "loss": 0.46440428495407104, + "step": 1722 + }, + { + "epoch": 0.43931667516573175, + "grad_norm": 0.4586823582649231, + "learning_rate": 1.2800033171222662e-05, + "loss": 0.469355046749115, + "step": 1723 + }, + { + "epoch": 0.4395716471188169, + "grad_norm": 0.47849181294441223, + "learning_rate": 1.279193573097314e-05, + "loss": 0.47547468543052673, + "step": 1724 + }, + { + "epoch": 0.43982661907190207, + "grad_norm": 0.446884423494339, + "learning_rate": 1.2783836304845541e-05, + "loss": 0.48563331365585327, + "step": 1725 + }, + { + "epoch": 0.44008159102498723, + "grad_norm": 0.4576139748096466, + "learning_rate": 1.277573489860091e-05, + "loss": 0.4876425266265869, + "step": 1726 + }, + { + "epoch": 0.4403365629780724, + "grad_norm": 0.4311068654060364, + "learning_rate": 1.2767631518001697e-05, + "loss": 0.4762778878211975, + "step": 1727 + }, + { + "epoch": 0.44059153493115755, + "grad_norm": 0.44358745217323303, + "learning_rate": 1.2759526168811767e-05, + "loss": 0.48140454292297363, + "step": 1728 + }, + { + "epoch": 0.4408465068842427, + "grad_norm": 0.45184627175331116, + "learning_rate": 1.2751418856796378e-05, + "loss": 0.46511802077293396, + "step": 1729 + }, + { + "epoch": 0.4411014788373279, + "grad_norm": 0.44608306884765625, + "learning_rate": 1.2743309587722186e-05, + "loss": 0.47363704442977905, + "step": 1730 + }, + { + "epoch": 0.4413564507904131, + "grad_norm": 0.44456541538238525, + "learning_rate": 1.2735198367357238e-05, + "loss": 0.4707756042480469, + "step": 1731 + }, + { + "epoch": 0.44161142274349824, + "grad_norm": 0.4834231436252594, + "learning_rate": 1.2727085201470972e-05, + "loss": 0.47929319739341736, + "step": 1732 + }, + { + "epoch": 0.4418663946965834, + "grad_norm": 0.49860382080078125, + "learning_rate": 1.2718970095834208e-05, + "loss": 0.47534316778182983, + "step": 1733 + }, + { + "epoch": 0.44212136664966856, + "grad_norm": 1.011425256729126, + "learning_rate": 1.2710853056219147e-05, + "loss": 0.49217113852500916, + "step": 1734 + }, + { + "epoch": 0.4423763386027537, + "grad_norm": 0.4697829782962799, + "learning_rate": 1.2702734088399363e-05, + "loss": 0.4657892882823944, + "step": 1735 + }, + { + "epoch": 0.4426313105558389, + "grad_norm": 0.48535552620887756, + "learning_rate": 1.2694613198149796e-05, + "loss": 0.47705650329589844, + "step": 1736 + }, + { + "epoch": 0.44288628250892403, + "grad_norm": 0.4486839473247528, + "learning_rate": 1.2686490391246774e-05, + "loss": 0.46023058891296387, + "step": 1737 + }, + { + "epoch": 0.4431412544620092, + "grad_norm": 0.4410373270511627, + "learning_rate": 1.2678365673467963e-05, + "loss": 0.4703472852706909, + "step": 1738 + }, + { + "epoch": 0.44339622641509435, + "grad_norm": 0.46294504404067993, + "learning_rate": 1.2670239050592403e-05, + "loss": 0.47119733691215515, + "step": 1739 + }, + { + "epoch": 0.4436511983681795, + "grad_norm": 0.46631738543510437, + "learning_rate": 1.2662110528400487e-05, + "loss": 0.47250574827194214, + "step": 1740 + }, + { + "epoch": 0.44390617032126467, + "grad_norm": 0.47729727625846863, + "learning_rate": 1.2653980112673955e-05, + "loss": 0.4722181260585785, + "step": 1741 + }, + { + "epoch": 0.44416114227434983, + "grad_norm": 0.42720356583595276, + "learning_rate": 1.2645847809195901e-05, + "loss": 0.47384095191955566, + "step": 1742 + }, + { + "epoch": 0.444416114227435, + "grad_norm": 0.4484679102897644, + "learning_rate": 1.2637713623750756e-05, + "loss": 0.4848884046077728, + "step": 1743 + }, + { + "epoch": 0.44467108618052015, + "grad_norm": 0.456752210855484, + "learning_rate": 1.2629577562124287e-05, + "loss": 0.4707961976528168, + "step": 1744 + }, + { + "epoch": 0.4449260581336053, + "grad_norm": 0.42030447721481323, + "learning_rate": 1.2621439630103602e-05, + "loss": 0.47010743618011475, + "step": 1745 + }, + { + "epoch": 0.44518103008669047, + "grad_norm": 0.4455772042274475, + "learning_rate": 1.261329983347714e-05, + "loss": 0.46104955673217773, + "step": 1746 + }, + { + "epoch": 0.4454360020397756, + "grad_norm": 0.44260451197624207, + "learning_rate": 1.2605158178034656e-05, + "loss": 0.4911135137081146, + "step": 1747 + }, + { + "epoch": 0.4456909739928608, + "grad_norm": 0.4729379713535309, + "learning_rate": 1.259701466956724e-05, + "loss": 0.47768276929855347, + "step": 1748 + }, + { + "epoch": 0.44594594594594594, + "grad_norm": 0.4576628506183624, + "learning_rate": 1.2588869313867292e-05, + "loss": 0.47084134817123413, + "step": 1749 + }, + { + "epoch": 0.4462009178990311, + "grad_norm": 0.42538702487945557, + "learning_rate": 1.258072211672853e-05, + "loss": 0.47333893179893494, + "step": 1750 + }, + { + "epoch": 0.44645588985211626, + "grad_norm": 0.4477687180042267, + "learning_rate": 1.2572573083945978e-05, + "loss": 0.4792226254940033, + "step": 1751 + }, + { + "epoch": 0.4467108618052014, + "grad_norm": 0.4507501423358917, + "learning_rate": 1.2564422221315968e-05, + "loss": 0.4824841618537903, + "step": 1752 + }, + { + "epoch": 0.4469658337582866, + "grad_norm": 0.44207245111465454, + "learning_rate": 1.2556269534636135e-05, + "loss": 0.475760281085968, + "step": 1753 + }, + { + "epoch": 0.44722080571137174, + "grad_norm": 0.4917367994785309, + "learning_rate": 1.254811502970541e-05, + "loss": 0.46215686202049255, + "step": 1754 + }, + { + "epoch": 0.4474757776644569, + "grad_norm": 0.444948673248291, + "learning_rate": 1.2539958712324014e-05, + "loss": 0.4736212491989136, + "step": 1755 + }, + { + "epoch": 0.44773074961754206, + "grad_norm": 0.45400217175483704, + "learning_rate": 1.2531800588293461e-05, + "loss": 0.4696999490261078, + "step": 1756 + }, + { + "epoch": 0.4479857215706272, + "grad_norm": 0.4441147744655609, + "learning_rate": 1.252364066341655e-05, + "loss": 0.47615838050842285, + "step": 1757 + }, + { + "epoch": 0.4482406935237124, + "grad_norm": 0.44922590255737305, + "learning_rate": 1.2515478943497361e-05, + "loss": 0.4806191325187683, + "step": 1758 + }, + { + "epoch": 0.44849566547679753, + "grad_norm": 0.44325754046440125, + "learning_rate": 1.2507315434341249e-05, + "loss": 0.46858978271484375, + "step": 1759 + }, + { + "epoch": 0.4487506374298827, + "grad_norm": 0.45428788661956787, + "learning_rate": 1.2499150141754842e-05, + "loss": 0.4710807800292969, + "step": 1760 + }, + { + "epoch": 0.44900560938296785, + "grad_norm": 0.4337548017501831, + "learning_rate": 1.2490983071546037e-05, + "loss": 0.4801885485649109, + "step": 1761 + }, + { + "epoch": 0.449260581336053, + "grad_norm": 0.4419419765472412, + "learning_rate": 1.2482814229523996e-05, + "loss": 0.4899044930934906, + "step": 1762 + }, + { + "epoch": 0.44951555328913817, + "grad_norm": 0.45944783091545105, + "learning_rate": 1.2474643621499143e-05, + "loss": 0.4820578098297119, + "step": 1763 + }, + { + "epoch": 0.44977052524222333, + "grad_norm": 0.4322916269302368, + "learning_rate": 1.2466471253283152e-05, + "loss": 0.48402005434036255, + "step": 1764 + }, + { + "epoch": 0.45002549719530854, + "grad_norm": 0.45586666464805603, + "learning_rate": 1.2458297130688956e-05, + "loss": 0.4691661298274994, + "step": 1765 + }, + { + "epoch": 0.4502804691483937, + "grad_norm": 0.45265716314315796, + "learning_rate": 1.2450121259530734e-05, + "loss": 0.46303677558898926, + "step": 1766 + }, + { + "epoch": 0.45053544110147886, + "grad_norm": 0.466658353805542, + "learning_rate": 1.2441943645623904e-05, + "loss": 0.4779145121574402, + "step": 1767 + }, + { + "epoch": 0.450790413054564, + "grad_norm": 0.4724508821964264, + "learning_rate": 1.2433764294785129e-05, + "loss": 0.47466349601745605, + "step": 1768 + }, + { + "epoch": 0.4510453850076492, + "grad_norm": 0.45049089193344116, + "learning_rate": 1.2425583212832302e-05, + "loss": 0.4861510992050171, + "step": 1769 + }, + { + "epoch": 0.45130035696073434, + "grad_norm": 0.4370516836643219, + "learning_rate": 1.2417400405584558e-05, + "loss": 0.4628305435180664, + "step": 1770 + }, + { + "epoch": 0.4515553289138195, + "grad_norm": 0.45360061526298523, + "learning_rate": 1.2409215878862249e-05, + "loss": 0.4932473599910736, + "step": 1771 + }, + { + "epoch": 0.45181030086690466, + "grad_norm": 0.45101219415664673, + "learning_rate": 1.2401029638486952e-05, + "loss": 0.47766444087028503, + "step": 1772 + }, + { + "epoch": 0.4520652728199898, + "grad_norm": 0.44970381259918213, + "learning_rate": 1.239284169028146e-05, + "loss": 0.46386319398880005, + "step": 1773 + }, + { + "epoch": 0.452320244773075, + "grad_norm": 0.44679099321365356, + "learning_rate": 1.2384652040069797e-05, + "loss": 0.4587230086326599, + "step": 1774 + }, + { + "epoch": 0.45257521672616013, + "grad_norm": 0.44836875796318054, + "learning_rate": 1.2376460693677175e-05, + "loss": 0.4732503294944763, + "step": 1775 + }, + { + "epoch": 0.4528301886792453, + "grad_norm": 0.5044194459915161, + "learning_rate": 1.2368267656930027e-05, + "loss": 0.4807708263397217, + "step": 1776 + }, + { + "epoch": 0.45308516063233045, + "grad_norm": 0.47821566462516785, + "learning_rate": 1.2360072935655982e-05, + "loss": 0.4741743505001068, + "step": 1777 + }, + { + "epoch": 0.4533401325854156, + "grad_norm": 0.45061835646629333, + "learning_rate": 1.235187653568387e-05, + "loss": 0.46865314245224, + "step": 1778 + }, + { + "epoch": 0.45359510453850077, + "grad_norm": 0.44544747471809387, + "learning_rate": 1.2343678462843719e-05, + "loss": 0.4797539710998535, + "step": 1779 + }, + { + "epoch": 0.45385007649158593, + "grad_norm": 0.4373033344745636, + "learning_rate": 1.2335478722966738e-05, + "loss": 0.4692363440990448, + "step": 1780 + }, + { + "epoch": 0.4541050484446711, + "grad_norm": 0.42586174607276917, + "learning_rate": 1.2327277321885326e-05, + "loss": 0.4837741255760193, + "step": 1781 + }, + { + "epoch": 0.45436002039775625, + "grad_norm": 0.4503430724143982, + "learning_rate": 1.2319074265433064e-05, + "loss": 0.473422110080719, + "step": 1782 + }, + { + "epoch": 0.4546149923508414, + "grad_norm": 0.43340277671813965, + "learning_rate": 1.2310869559444714e-05, + "loss": 0.4896630048751831, + "step": 1783 + }, + { + "epoch": 0.45486996430392657, + "grad_norm": 0.5077560544013977, + "learning_rate": 1.2302663209756205e-05, + "loss": 0.4597128629684448, + "step": 1784 + }, + { + "epoch": 0.4551249362570117, + "grad_norm": 0.4537369906902313, + "learning_rate": 1.2294455222204632e-05, + "loss": 0.46173548698425293, + "step": 1785 + }, + { + "epoch": 0.4553799082100969, + "grad_norm": 0.46171754598617554, + "learning_rate": 1.228624560262827e-05, + "loss": 0.4772665500640869, + "step": 1786 + }, + { + "epoch": 0.45563488016318204, + "grad_norm": 0.4706333577632904, + "learning_rate": 1.2278034356866544e-05, + "loss": 0.4807857275009155, + "step": 1787 + }, + { + "epoch": 0.4558898521162672, + "grad_norm": 0.4414377212524414, + "learning_rate": 1.2269821490760032e-05, + "loss": 0.4800296425819397, + "step": 1788 + }, + { + "epoch": 0.45614482406935236, + "grad_norm": 0.43886110186576843, + "learning_rate": 1.226160701015047e-05, + "loss": 0.46951231360435486, + "step": 1789 + }, + { + "epoch": 0.4563997960224375, + "grad_norm": 0.438742458820343, + "learning_rate": 1.2253390920880752e-05, + "loss": 0.4638747572898865, + "step": 1790 + }, + { + "epoch": 0.4566547679755227, + "grad_norm": 0.4479273855686188, + "learning_rate": 1.2245173228794895e-05, + "loss": 0.4727884531021118, + "step": 1791 + }, + { + "epoch": 0.45690973992860784, + "grad_norm": 0.4348614811897278, + "learning_rate": 1.223695393973807e-05, + "loss": 0.4840061664581299, + "step": 1792 + }, + { + "epoch": 0.457164711881693, + "grad_norm": 0.463783860206604, + "learning_rate": 1.2228733059556586e-05, + "loss": 0.4745769500732422, + "step": 1793 + }, + { + "epoch": 0.45741968383477816, + "grad_norm": 0.46853530406951904, + "learning_rate": 1.2220510594097872e-05, + "loss": 0.49177151918411255, + "step": 1794 + }, + { + "epoch": 0.4576746557878633, + "grad_norm": 0.4315093457698822, + "learning_rate": 1.22122865492105e-05, + "loss": 0.47219523787498474, + "step": 1795 + }, + { + "epoch": 0.4579296277409485, + "grad_norm": 0.44561806321144104, + "learning_rate": 1.220406093074415e-05, + "loss": 0.4777514934539795, + "step": 1796 + }, + { + "epoch": 0.45818459969403363, + "grad_norm": 0.4777575135231018, + "learning_rate": 1.219583374454963e-05, + "loss": 0.47510725259780884, + "step": 1797 + }, + { + "epoch": 0.4584395716471188, + "grad_norm": 0.4492422044277191, + "learning_rate": 1.2187604996478866e-05, + "loss": 0.47745585441589355, + "step": 1798 + }, + { + "epoch": 0.45869454360020395, + "grad_norm": 0.4303736090660095, + "learning_rate": 1.2179374692384885e-05, + "loss": 0.462444543838501, + "step": 1799 + }, + { + "epoch": 0.4589495155532891, + "grad_norm": 0.458484411239624, + "learning_rate": 1.217114283812183e-05, + "loss": 0.49000486731529236, + "step": 1800 + }, + { + "epoch": 0.4592044875063743, + "grad_norm": 0.45598477125167847, + "learning_rate": 1.2162909439544937e-05, + "loss": 0.47185465693473816, + "step": 1801 + }, + { + "epoch": 0.4594594594594595, + "grad_norm": 0.4487041234970093, + "learning_rate": 1.2154674502510556e-05, + "loss": 0.4776914715766907, + "step": 1802 + }, + { + "epoch": 0.45971443141254464, + "grad_norm": 0.4446077048778534, + "learning_rate": 1.2146438032876113e-05, + "loss": 0.47020599246025085, + "step": 1803 + }, + { + "epoch": 0.4599694033656298, + "grad_norm": 0.4974459111690521, + "learning_rate": 1.2138200036500135e-05, + "loss": 0.48040771484375, + "step": 1804 + }, + { + "epoch": 0.46022437531871496, + "grad_norm": 0.47240355610847473, + "learning_rate": 1.2129960519242235e-05, + "loss": 0.47204288840293884, + "step": 1805 + }, + { + "epoch": 0.4604793472718001, + "grad_norm": 0.46213290095329285, + "learning_rate": 1.2121719486963105e-05, + "loss": 0.4674822688102722, + "step": 1806 + }, + { + "epoch": 0.4607343192248853, + "grad_norm": 0.4440208971500397, + "learning_rate": 1.2113476945524513e-05, + "loss": 0.4881192147731781, + "step": 1807 + }, + { + "epoch": 0.46098929117797044, + "grad_norm": 0.4705323278903961, + "learning_rate": 1.2105232900789303e-05, + "loss": 0.4879416823387146, + "step": 1808 + }, + { + "epoch": 0.4612442631310556, + "grad_norm": 0.4392581582069397, + "learning_rate": 1.209698735862139e-05, + "loss": 0.48007023334503174, + "step": 1809 + }, + { + "epoch": 0.46149923508414076, + "grad_norm": 0.44329649209976196, + "learning_rate": 1.2088740324885748e-05, + "loss": 0.463228315114975, + "step": 1810 + }, + { + "epoch": 0.4617542070372259, + "grad_norm": 0.43376657366752625, + "learning_rate": 1.208049180544842e-05, + "loss": 0.4700953960418701, + "step": 1811 + }, + { + "epoch": 0.4620091789903111, + "grad_norm": 0.4467110335826874, + "learning_rate": 1.2072241806176501e-05, + "loss": 0.4704214036464691, + "step": 1812 + }, + { + "epoch": 0.46226415094339623, + "grad_norm": 0.42056575417518616, + "learning_rate": 1.2063990332938134e-05, + "loss": 0.4868830442428589, + "step": 1813 + }, + { + "epoch": 0.4625191228964814, + "grad_norm": 0.4433046579360962, + "learning_rate": 1.2055737391602522e-05, + "loss": 0.46415600180625916, + "step": 1814 + }, + { + "epoch": 0.46277409484956655, + "grad_norm": 0.4381503164768219, + "learning_rate": 1.2047482988039905e-05, + "loss": 0.4766816794872284, + "step": 1815 + }, + { + "epoch": 0.4630290668026517, + "grad_norm": 0.45899951457977295, + "learning_rate": 1.2039227128121564e-05, + "loss": 0.4798325300216675, + "step": 1816 + }, + { + "epoch": 0.46328403875573687, + "grad_norm": 0.4464300870895386, + "learning_rate": 1.203096981771981e-05, + "loss": 0.47894152998924255, + "step": 1817 + }, + { + "epoch": 0.46353901070882203, + "grad_norm": 0.4569408893585205, + "learning_rate": 1.2022711062707997e-05, + "loss": 0.4581347107887268, + "step": 1818 + }, + { + "epoch": 0.4637939826619072, + "grad_norm": 0.45413821935653687, + "learning_rate": 1.2014450868960504e-05, + "loss": 0.45865410566329956, + "step": 1819 + }, + { + "epoch": 0.46404895461499235, + "grad_norm": 0.4557012915611267, + "learning_rate": 1.2006189242352723e-05, + "loss": 0.48170050978660583, + "step": 1820 + }, + { + "epoch": 0.4643039265680775, + "grad_norm": 0.4703729748725891, + "learning_rate": 1.1997926188761077e-05, + "loss": 0.4796580672264099, + "step": 1821 + }, + { + "epoch": 0.46455889852116267, + "grad_norm": 0.44692087173461914, + "learning_rate": 1.1989661714063e-05, + "loss": 0.47521767020225525, + "step": 1822 + }, + { + "epoch": 0.4648138704742478, + "grad_norm": 0.43485578894615173, + "learning_rate": 1.1981395824136932e-05, + "loss": 0.47263312339782715, + "step": 1823 + }, + { + "epoch": 0.465068842427333, + "grad_norm": 0.44065967202186584, + "learning_rate": 1.197312852486233e-05, + "loss": 0.4700472354888916, + "step": 1824 + }, + { + "epoch": 0.46532381438041814, + "grad_norm": 0.44195377826690674, + "learning_rate": 1.1964859822119646e-05, + "loss": 0.46952304244041443, + "step": 1825 + }, + { + "epoch": 0.4655787863335033, + "grad_norm": 0.46734338998794556, + "learning_rate": 1.1956589721790329e-05, + "loss": 0.47229647636413574, + "step": 1826 + }, + { + "epoch": 0.46583375828658846, + "grad_norm": 0.4728843569755554, + "learning_rate": 1.1948318229756828e-05, + "loss": 0.47129344940185547, + "step": 1827 + }, + { + "epoch": 0.4660887302396736, + "grad_norm": 0.48862841725349426, + "learning_rate": 1.1940045351902578e-05, + "loss": 0.4701687693595886, + "step": 1828 + }, + { + "epoch": 0.4663437021927588, + "grad_norm": 0.46959683299064636, + "learning_rate": 1.1931771094111997e-05, + "loss": 0.4784017503261566, + "step": 1829 + }, + { + "epoch": 0.46659867414584394, + "grad_norm": 0.48078179359436035, + "learning_rate": 1.1923495462270494e-05, + "loss": 0.4602683186531067, + "step": 1830 + }, + { + "epoch": 0.4668536460989291, + "grad_norm": 0.43931084871292114, + "learning_rate": 1.1915218462264446e-05, + "loss": 0.4679564833641052, + "step": 1831 + }, + { + "epoch": 0.46710861805201426, + "grad_norm": 0.48635151982307434, + "learning_rate": 1.190694009998121e-05, + "loss": 0.4717889726161957, + "step": 1832 + }, + { + "epoch": 0.4673635900050994, + "grad_norm": 0.5232540369033813, + "learning_rate": 1.1898660381309104e-05, + "loss": 0.4683421552181244, + "step": 1833 + }, + { + "epoch": 0.4676185619581846, + "grad_norm": 0.44738560914993286, + "learning_rate": 1.1890379312137414e-05, + "loss": 0.4686152935028076, + "step": 1834 + }, + { + "epoch": 0.46787353391126973, + "grad_norm": 0.4688865840435028, + "learning_rate": 1.1882096898356396e-05, + "loss": 0.4768770635128021, + "step": 1835 + }, + { + "epoch": 0.4681285058643549, + "grad_norm": 0.43922460079193115, + "learning_rate": 1.187381314585725e-05, + "loss": 0.4662562310695648, + "step": 1836 + }, + { + "epoch": 0.4683834778174401, + "grad_norm": 0.8413701057434082, + "learning_rate": 1.1865528060532127e-05, + "loss": 0.4704360365867615, + "step": 1837 + }, + { + "epoch": 0.46863844977052527, + "grad_norm": 0.46233436465263367, + "learning_rate": 1.185724164827414e-05, + "loss": 0.4667252004146576, + "step": 1838 + }, + { + "epoch": 0.4688934217236104, + "grad_norm": 0.43266645073890686, + "learning_rate": 1.1848953914977335e-05, + "loss": 0.47639912366867065, + "step": 1839 + }, + { + "epoch": 0.4691483936766956, + "grad_norm": 0.4587555527687073, + "learning_rate": 1.1840664866536697e-05, + "loss": 0.47096312046051025, + "step": 1840 + }, + { + "epoch": 0.46940336562978074, + "grad_norm": 0.4797708988189697, + "learning_rate": 1.1832374508848154e-05, + "loss": 0.4757949709892273, + "step": 1841 + }, + { + "epoch": 0.4696583375828659, + "grad_norm": 0.4354809820652008, + "learning_rate": 1.1824082847808558e-05, + "loss": 0.4758743643760681, + "step": 1842 + }, + { + "epoch": 0.46991330953595106, + "grad_norm": 0.46408167481422424, + "learning_rate": 1.1815789889315691e-05, + "loss": 0.4625156819820404, + "step": 1843 + }, + { + "epoch": 0.4701682814890362, + "grad_norm": 0.4551498591899872, + "learning_rate": 1.180749563926826e-05, + "loss": 0.46774551272392273, + "step": 1844 + }, + { + "epoch": 0.4704232534421214, + "grad_norm": 0.45411378145217896, + "learning_rate": 1.1799200103565887e-05, + "loss": 0.46884870529174805, + "step": 1845 + }, + { + "epoch": 0.47067822539520654, + "grad_norm": 0.4664252996444702, + "learning_rate": 1.1790903288109106e-05, + "loss": 0.4782741963863373, + "step": 1846 + }, + { + "epoch": 0.4709331973482917, + "grad_norm": 0.4781484305858612, + "learning_rate": 1.1782605198799371e-05, + "loss": 0.4723745286464691, + "step": 1847 + }, + { + "epoch": 0.47118816930137686, + "grad_norm": 0.4496593475341797, + "learning_rate": 1.1774305841539036e-05, + "loss": 0.46488043665885925, + "step": 1848 + }, + { + "epoch": 0.471443141254462, + "grad_norm": 0.45651766657829285, + "learning_rate": 1.1766005222231351e-05, + "loss": 0.47134900093078613, + "step": 1849 + }, + { + "epoch": 0.4716981132075472, + "grad_norm": 0.47809088230133057, + "learning_rate": 1.1757703346780473e-05, + "loss": 0.48021984100341797, + "step": 1850 + }, + { + "epoch": 0.47195308516063234, + "grad_norm": 0.46262407302856445, + "learning_rate": 1.1749400221091455e-05, + "loss": 0.4492693841457367, + "step": 1851 + }, + { + "epoch": 0.4722080571137175, + "grad_norm": 0.4340875744819641, + "learning_rate": 1.1741095851070228e-05, + "loss": 0.4752400517463684, + "step": 1852 + }, + { + "epoch": 0.47246302906680265, + "grad_norm": 0.42423349618911743, + "learning_rate": 1.1732790242623612e-05, + "loss": 0.4710129201412201, + "step": 1853 + }, + { + "epoch": 0.4727180010198878, + "grad_norm": 0.48380085825920105, + "learning_rate": 1.1724483401659313e-05, + "loss": 0.47220295667648315, + "step": 1854 + }, + { + "epoch": 0.47297297297297297, + "grad_norm": 0.458454966545105, + "learning_rate": 1.1716175334085908e-05, + "loss": 0.4703748822212219, + "step": 1855 + }, + { + "epoch": 0.47322794492605813, + "grad_norm": 0.4433039426803589, + "learning_rate": 1.170786604581285e-05, + "loss": 0.4689106345176697, + "step": 1856 + }, + { + "epoch": 0.4734829168791433, + "grad_norm": 0.43104684352874756, + "learning_rate": 1.169955554275046e-05, + "loss": 0.4718170166015625, + "step": 1857 + }, + { + "epoch": 0.47373788883222845, + "grad_norm": 0.5022832155227661, + "learning_rate": 1.169124383080992e-05, + "loss": 0.4683248996734619, + "step": 1858 + }, + { + "epoch": 0.4739928607853136, + "grad_norm": 0.48713451623916626, + "learning_rate": 1.1682930915903275e-05, + "loss": 0.4653375744819641, + "step": 1859 + }, + { + "epoch": 0.47424783273839877, + "grad_norm": 0.43980905413627625, + "learning_rate": 1.1674616803943425e-05, + "loss": 0.4635535180568695, + "step": 1860 + }, + { + "epoch": 0.4745028046914839, + "grad_norm": 0.44994592666625977, + "learning_rate": 1.1666301500844117e-05, + "loss": 0.4867127537727356, + "step": 1861 + }, + { + "epoch": 0.4747577766445691, + "grad_norm": 0.45325347781181335, + "learning_rate": 1.1657985012519951e-05, + "loss": 0.4687597453594208, + "step": 1862 + }, + { + "epoch": 0.47501274859765424, + "grad_norm": 0.45354774594306946, + "learning_rate": 1.1649667344886376e-05, + "loss": 0.46883076429367065, + "step": 1863 + }, + { + "epoch": 0.4752677205507394, + "grad_norm": 0.4507564902305603, + "learning_rate": 1.1641348503859664e-05, + "loss": 0.4657594859600067, + "step": 1864 + }, + { + "epoch": 0.47552269250382456, + "grad_norm": 0.4488622844219208, + "learning_rate": 1.1633028495356928e-05, + "loss": 0.481199711561203, + "step": 1865 + }, + { + "epoch": 0.4757776644569097, + "grad_norm": 0.47723981738090515, + "learning_rate": 1.1624707325296117e-05, + "loss": 0.47260582447052, + "step": 1866 + }, + { + "epoch": 0.4760326364099949, + "grad_norm": 0.43852660059928894, + "learning_rate": 1.1616384999596006e-05, + "loss": 0.4724685847759247, + "step": 1867 + }, + { + "epoch": 0.47628760836308004, + "grad_norm": 0.45562633872032166, + "learning_rate": 1.1608061524176182e-05, + "loss": 0.4831947982311249, + "step": 1868 + }, + { + "epoch": 0.4765425803161652, + "grad_norm": 0.4537625312805176, + "learning_rate": 1.1599736904957057e-05, + "loss": 0.4736064374446869, + "step": 1869 + }, + { + "epoch": 0.47679755226925036, + "grad_norm": 0.44323453307151794, + "learning_rate": 1.1591411147859859e-05, + "loss": 0.4788297414779663, + "step": 1870 + }, + { + "epoch": 0.4770525242223355, + "grad_norm": 0.4866369962692261, + "learning_rate": 1.1583084258806621e-05, + "loss": 0.46678242087364197, + "step": 1871 + }, + { + "epoch": 0.47730749617542073, + "grad_norm": 0.45373299717903137, + "learning_rate": 1.157475624372018e-05, + "loss": 0.471998393535614, + "step": 1872 + }, + { + "epoch": 0.4775624681285059, + "grad_norm": 0.4975714087486267, + "learning_rate": 1.1566427108524179e-05, + "loss": 0.46503281593322754, + "step": 1873 + }, + { + "epoch": 0.47781744008159105, + "grad_norm": 0.440778911113739, + "learning_rate": 1.1558096859143051e-05, + "loss": 0.4698757827281952, + "step": 1874 + }, + { + "epoch": 0.4780724120346762, + "grad_norm": 0.44512689113616943, + "learning_rate": 1.1549765501502028e-05, + "loss": 0.4689352214336395, + "step": 1875 + }, + { + "epoch": 0.47832738398776137, + "grad_norm": 0.4634833335876465, + "learning_rate": 1.154143304152713e-05, + "loss": 0.468364417552948, + "step": 1876 + }, + { + "epoch": 0.4785823559408465, + "grad_norm": 0.46329474449157715, + "learning_rate": 1.1533099485145156e-05, + "loss": 0.468635618686676, + "step": 1877 + }, + { + "epoch": 0.4788373278939317, + "grad_norm": 0.4551597535610199, + "learning_rate": 1.1524764838283686e-05, + "loss": 0.46521398425102234, + "step": 1878 + }, + { + "epoch": 0.47909229984701684, + "grad_norm": 0.4328210651874542, + "learning_rate": 1.1516429106871078e-05, + "loss": 0.47337955236434937, + "step": 1879 + }, + { + "epoch": 0.479347271800102, + "grad_norm": 0.4416564106941223, + "learning_rate": 1.1508092296836468e-05, + "loss": 0.4794473350048065, + "step": 1880 + }, + { + "epoch": 0.47960224375318716, + "grad_norm": 0.4334598779678345, + "learning_rate": 1.1499754414109743e-05, + "loss": 0.47388195991516113, + "step": 1881 + }, + { + "epoch": 0.4798572157062723, + "grad_norm": 0.4369765818119049, + "learning_rate": 1.1491415464621562e-05, + "loss": 0.48022717237472534, + "step": 1882 + }, + { + "epoch": 0.4801121876593575, + "grad_norm": 0.4837839901447296, + "learning_rate": 1.148307545430335e-05, + "loss": 0.46111083030700684, + "step": 1883 + }, + { + "epoch": 0.48036715961244264, + "grad_norm": 0.43741574883461, + "learning_rate": 1.1474734389087271e-05, + "loss": 0.47851133346557617, + "step": 1884 + }, + { + "epoch": 0.4806221315655278, + "grad_norm": 0.48897045850753784, + "learning_rate": 1.1466392274906253e-05, + "loss": 0.47374314069747925, + "step": 1885 + }, + { + "epoch": 0.48087710351861296, + "grad_norm": 0.46187931299209595, + "learning_rate": 1.1458049117693963e-05, + "loss": 0.4768770933151245, + "step": 1886 + }, + { + "epoch": 0.4811320754716981, + "grad_norm": 0.4495793879032135, + "learning_rate": 1.1449704923384813e-05, + "loss": 0.4840967655181885, + "step": 1887 + }, + { + "epoch": 0.4813870474247833, + "grad_norm": 0.4757740795612335, + "learning_rate": 1.1441359697913947e-05, + "loss": 0.470970094203949, + "step": 1888 + }, + { + "epoch": 0.48164201937786844, + "grad_norm": 0.4387821853160858, + "learning_rate": 1.1433013447217254e-05, + "loss": 0.47704702615737915, + "step": 1889 + }, + { + "epoch": 0.4818969913309536, + "grad_norm": 0.6305357217788696, + "learning_rate": 1.1424666177231335e-05, + "loss": 0.473327100276947, + "step": 1890 + }, + { + "epoch": 0.48215196328403875, + "grad_norm": 0.4685002267360687, + "learning_rate": 1.1416317893893537e-05, + "loss": 0.466896653175354, + "step": 1891 + }, + { + "epoch": 0.4824069352371239, + "grad_norm": 0.46780869364738464, + "learning_rate": 1.140796860314191e-05, + "loss": 0.480934739112854, + "step": 1892 + }, + { + "epoch": 0.48266190719020907, + "grad_norm": 0.4916781187057495, + "learning_rate": 1.1399618310915228e-05, + "loss": 0.4645150601863861, + "step": 1893 + }, + { + "epoch": 0.48291687914329423, + "grad_norm": 0.4845971465110779, + "learning_rate": 1.1391267023152977e-05, + "loss": 0.46976611018180847, + "step": 1894 + }, + { + "epoch": 0.4831718510963794, + "grad_norm": 0.4984472990036011, + "learning_rate": 1.1382914745795353e-05, + "loss": 0.4808133542537689, + "step": 1895 + }, + { + "epoch": 0.48342682304946455, + "grad_norm": 0.4666406512260437, + "learning_rate": 1.137456148478325e-05, + "loss": 0.4635807275772095, + "step": 1896 + }, + { + "epoch": 0.4836817950025497, + "grad_norm": 0.44348442554473877, + "learning_rate": 1.1366207246058269e-05, + "loss": 0.47462043166160583, + "step": 1897 + }, + { + "epoch": 0.48393676695563487, + "grad_norm": 0.44801104068756104, + "learning_rate": 1.1357852035562703e-05, + "loss": 0.4728727340698242, + "step": 1898 + }, + { + "epoch": 0.48419173890872, + "grad_norm": 0.5481351017951965, + "learning_rate": 1.134949585923953e-05, + "loss": 0.44794484972953796, + "step": 1899 + }, + { + "epoch": 0.4844467108618052, + "grad_norm": 0.45803800225257874, + "learning_rate": 1.1341138723032426e-05, + "loss": 0.47609731554985046, + "step": 1900 + }, + { + "epoch": 0.48470168281489034, + "grad_norm": 0.4401257038116455, + "learning_rate": 1.1332780632885745e-05, + "loss": 0.46779781579971313, + "step": 1901 + }, + { + "epoch": 0.4849566547679755, + "grad_norm": 0.4817562699317932, + "learning_rate": 1.1324421594744516e-05, + "loss": 0.46378979086875916, + "step": 1902 + }, + { + "epoch": 0.48521162672106066, + "grad_norm": 0.4679580628871918, + "learning_rate": 1.1316061614554452e-05, + "loss": 0.48464304208755493, + "step": 1903 + }, + { + "epoch": 0.4854665986741458, + "grad_norm": 0.42400920391082764, + "learning_rate": 1.130770069826192e-05, + "loss": 0.4615113139152527, + "step": 1904 + }, + { + "epoch": 0.485721570627231, + "grad_norm": 0.4422542452812195, + "learning_rate": 1.1299338851813975e-05, + "loss": 0.47650349140167236, + "step": 1905 + }, + { + "epoch": 0.48597654258031614, + "grad_norm": 0.44117411971092224, + "learning_rate": 1.1290976081158306e-05, + "loss": 0.4791657328605652, + "step": 1906 + }, + { + "epoch": 0.4862315145334013, + "grad_norm": 0.5534968972206116, + "learning_rate": 1.1282612392243286e-05, + "loss": 0.46617746353149414, + "step": 1907 + }, + { + "epoch": 0.4864864864864865, + "grad_norm": 0.44438889622688293, + "learning_rate": 1.127424779101793e-05, + "loss": 0.47839054465293884, + "step": 1908 + }, + { + "epoch": 0.4867414584395717, + "grad_norm": 0.43262767791748047, + "learning_rate": 1.1265882283431892e-05, + "loss": 0.4703233242034912, + "step": 1909 + }, + { + "epoch": 0.48699643039265683, + "grad_norm": 0.4824342131614685, + "learning_rate": 1.1257515875435488e-05, + "loss": 0.4696802794933319, + "step": 1910 + }, + { + "epoch": 0.487251402345742, + "grad_norm": 0.43984854221343994, + "learning_rate": 1.1249148572979664e-05, + "loss": 0.4753193259239197, + "step": 1911 + }, + { + "epoch": 0.48750637429882715, + "grad_norm": 0.561900794506073, + "learning_rate": 1.1240780382016006e-05, + "loss": 0.4705323576927185, + "step": 1912 + }, + { + "epoch": 0.4877613462519123, + "grad_norm": 0.459989070892334, + "learning_rate": 1.1232411308496729e-05, + "loss": 0.4775056540966034, + "step": 1913 + }, + { + "epoch": 0.48801631820499747, + "grad_norm": 0.4523221254348755, + "learning_rate": 1.122404135837468e-05, + "loss": 0.4824284017086029, + "step": 1914 + }, + { + "epoch": 0.4882712901580826, + "grad_norm": 0.4518112540245056, + "learning_rate": 1.1215670537603324e-05, + "loss": 0.4524316191673279, + "step": 1915 + }, + { + "epoch": 0.4885262621111678, + "grad_norm": 0.5053389668464661, + "learning_rate": 1.1207298852136753e-05, + "loss": 0.4648011326789856, + "step": 1916 + }, + { + "epoch": 0.48878123406425295, + "grad_norm": 0.43176308274269104, + "learning_rate": 1.1198926307929665e-05, + "loss": 0.45798033475875854, + "step": 1917 + }, + { + "epoch": 0.4890362060173381, + "grad_norm": 0.46930280327796936, + "learning_rate": 1.1190552910937378e-05, + "loss": 0.47919926047325134, + "step": 1918 + }, + { + "epoch": 0.48929117797042326, + "grad_norm": 0.4317914843559265, + "learning_rate": 1.1182178667115806e-05, + "loss": 0.4498417377471924, + "step": 1919 + }, + { + "epoch": 0.4895461499235084, + "grad_norm": 0.4929078221321106, + "learning_rate": 1.1173803582421477e-05, + "loss": 0.46412748098373413, + "step": 1920 + }, + { + "epoch": 0.4898011218765936, + "grad_norm": 0.4420813024044037, + "learning_rate": 1.116542766281151e-05, + "loss": 0.4748162627220154, + "step": 1921 + }, + { + "epoch": 0.49005609382967874, + "grad_norm": 0.4476282596588135, + "learning_rate": 1.1157050914243614e-05, + "loss": 0.4865252375602722, + "step": 1922 + }, + { + "epoch": 0.4903110657827639, + "grad_norm": 0.46605515480041504, + "learning_rate": 1.11486733426761e-05, + "loss": 0.46148931980133057, + "step": 1923 + }, + { + "epoch": 0.49056603773584906, + "grad_norm": 0.4584254026412964, + "learning_rate": 1.1140294954067853e-05, + "loss": 0.48570913076400757, + "step": 1924 + }, + { + "epoch": 0.4908210096889342, + "grad_norm": 0.4745509624481201, + "learning_rate": 1.1131915754378344e-05, + "loss": 0.4840744733810425, + "step": 1925 + }, + { + "epoch": 0.4910759816420194, + "grad_norm": 0.4451920986175537, + "learning_rate": 1.1123535749567622e-05, + "loss": 0.46334999799728394, + "step": 1926 + }, + { + "epoch": 0.49133095359510454, + "grad_norm": 0.4377116560935974, + "learning_rate": 1.1115154945596306e-05, + "loss": 0.46594923734664917, + "step": 1927 + }, + { + "epoch": 0.4915859255481897, + "grad_norm": 0.4420704245567322, + "learning_rate": 1.1106773348425581e-05, + "loss": 0.4736064672470093, + "step": 1928 + }, + { + "epoch": 0.49184089750127485, + "grad_norm": 0.4611606001853943, + "learning_rate": 1.1098390964017204e-05, + "loss": 0.4691614806652069, + "step": 1929 + }, + { + "epoch": 0.49209586945436, + "grad_norm": 0.4173363745212555, + "learning_rate": 1.1090007798333486e-05, + "loss": 0.4697965979576111, + "step": 1930 + }, + { + "epoch": 0.49235084140744517, + "grad_norm": 0.45429426431655884, + "learning_rate": 1.1081623857337297e-05, + "loss": 0.4782986342906952, + "step": 1931 + }, + { + "epoch": 0.49260581336053033, + "grad_norm": 0.44332918524742126, + "learning_rate": 1.1073239146992054e-05, + "loss": 0.4492153525352478, + "step": 1932 + }, + { + "epoch": 0.4928607853136155, + "grad_norm": 0.44081881642341614, + "learning_rate": 1.1064853673261727e-05, + "loss": 0.4710718095302582, + "step": 1933 + }, + { + "epoch": 0.49311575726670065, + "grad_norm": 0.45330432057380676, + "learning_rate": 1.105646744211082e-05, + "loss": 0.4720541834831238, + "step": 1934 + }, + { + "epoch": 0.4933707292197858, + "grad_norm": 0.45238229632377625, + "learning_rate": 1.1048080459504388e-05, + "loss": 0.46549171209335327, + "step": 1935 + }, + { + "epoch": 0.49362570117287097, + "grad_norm": 0.4571751654148102, + "learning_rate": 1.1039692731408014e-05, + "loss": 0.46449217200279236, + "step": 1936 + }, + { + "epoch": 0.4938806731259561, + "grad_norm": 0.44922831654548645, + "learning_rate": 1.1031304263787811e-05, + "loss": 0.47091251611709595, + "step": 1937 + }, + { + "epoch": 0.4941356450790413, + "grad_norm": 0.516984224319458, + "learning_rate": 1.1022915062610416e-05, + "loss": 0.45369237661361694, + "step": 1938 + }, + { + "epoch": 0.49439061703212644, + "grad_norm": 0.4570682942867279, + "learning_rate": 1.1014525133842986e-05, + "loss": 0.47135066986083984, + "step": 1939 + }, + { + "epoch": 0.4946455889852116, + "grad_norm": 0.4430196285247803, + "learning_rate": 1.100613448345321e-05, + "loss": 0.4640349745750427, + "step": 1940 + }, + { + "epoch": 0.49490056093829676, + "grad_norm": 0.45458775758743286, + "learning_rate": 1.0997743117409277e-05, + "loss": 0.4690319001674652, + "step": 1941 + }, + { + "epoch": 0.4951555328913819, + "grad_norm": 0.47576791048049927, + "learning_rate": 1.098935104167988e-05, + "loss": 0.47333425283432007, + "step": 1942 + }, + { + "epoch": 0.4954105048444671, + "grad_norm": 0.46054837107658386, + "learning_rate": 1.0980958262234235e-05, + "loss": 0.4702942371368408, + "step": 1943 + }, + { + "epoch": 0.4956654767975523, + "grad_norm": 0.4609912931919098, + "learning_rate": 1.0972564785042045e-05, + "loss": 0.47456806898117065, + "step": 1944 + }, + { + "epoch": 0.49592044875063745, + "grad_norm": 0.4505545198917389, + "learning_rate": 1.0964170616073512e-05, + "loss": 0.46712201833724976, + "step": 1945 + }, + { + "epoch": 0.4961754207037226, + "grad_norm": 0.46952521800994873, + "learning_rate": 1.0955775761299333e-05, + "loss": 0.4621439576148987, + "step": 1946 + }, + { + "epoch": 0.4964303926568078, + "grad_norm": 0.4679666757583618, + "learning_rate": 1.0947380226690686e-05, + "loss": 0.4812907576560974, + "step": 1947 + }, + { + "epoch": 0.49668536460989293, + "grad_norm": 0.43158140778541565, + "learning_rate": 1.093898401821924e-05, + "loss": 0.462116003036499, + "step": 1948 + }, + { + "epoch": 0.4969403365629781, + "grad_norm": 0.4336107075214386, + "learning_rate": 1.093058714185714e-05, + "loss": 0.4580172300338745, + "step": 1949 + }, + { + "epoch": 0.49719530851606325, + "grad_norm": 0.5117933750152588, + "learning_rate": 1.0922189603577011e-05, + "loss": 0.46445757150650024, + "step": 1950 + }, + { + "epoch": 0.4974502804691484, + "grad_norm": 0.4405916929244995, + "learning_rate": 1.0913791409351935e-05, + "loss": 0.4670753479003906, + "step": 1951 + }, + { + "epoch": 0.49770525242223357, + "grad_norm": 0.4495008587837219, + "learning_rate": 1.0905392565155477e-05, + "loss": 0.4655703902244568, + "step": 1952 + }, + { + "epoch": 0.4979602243753187, + "grad_norm": 0.45106127858161926, + "learning_rate": 1.089699307696166e-05, + "loss": 0.4770054519176483, + "step": 1953 + }, + { + "epoch": 0.4982151963284039, + "grad_norm": 0.44882631301879883, + "learning_rate": 1.0888592950744955e-05, + "loss": 0.46113085746765137, + "step": 1954 + }, + { + "epoch": 0.49847016828148905, + "grad_norm": 0.46270790696144104, + "learning_rate": 1.0880192192480291e-05, + "loss": 0.4824230670928955, + "step": 1955 + }, + { + "epoch": 0.4987251402345742, + "grad_norm": 0.4500824213027954, + "learning_rate": 1.0871790808143063e-05, + "loss": 0.46038350462913513, + "step": 1956 + }, + { + "epoch": 0.49898011218765936, + "grad_norm": 0.600124180316925, + "learning_rate": 1.086338880370909e-05, + "loss": 0.4829053282737732, + "step": 1957 + }, + { + "epoch": 0.4992350841407445, + "grad_norm": 0.435794472694397, + "learning_rate": 1.0854986185154638e-05, + "loss": 0.45988592505455017, + "step": 1958 + }, + { + "epoch": 0.4994900560938297, + "grad_norm": 0.4310831129550934, + "learning_rate": 1.0846582958456418e-05, + "loss": 0.45987948775291443, + "step": 1959 + }, + { + "epoch": 0.49974502804691484, + "grad_norm": 0.43353962898254395, + "learning_rate": 1.0838179129591562e-05, + "loss": 0.4516587257385254, + "step": 1960 + }, + { + "epoch": 0.5, + "grad_norm": 0.4458468556404114, + "learning_rate": 1.082977470453764e-05, + "loss": 0.469723641872406, + "step": 1961 + }, + { + "epoch": 0.5002549719530852, + "grad_norm": 0.4295446574687958, + "learning_rate": 1.0821369689272639e-05, + "loss": 0.4734836518764496, + "step": 1962 + }, + { + "epoch": 0.5005099439061703, + "grad_norm": 0.42044463753700256, + "learning_rate": 1.0812964089774968e-05, + "loss": 0.4678879678249359, + "step": 1963 + }, + { + "epoch": 0.5007649158592555, + "grad_norm": 0.48239123821258545, + "learning_rate": 1.0804557912023455e-05, + "loss": 0.4636920690536499, + "step": 1964 + }, + { + "epoch": 0.5010198878123406, + "grad_norm": 0.47312313318252563, + "learning_rate": 1.0796151161997334e-05, + "loss": 0.4822813868522644, + "step": 1965 + }, + { + "epoch": 0.5012748597654259, + "grad_norm": 0.5328629016876221, + "learning_rate": 1.0787743845676256e-05, + "loss": 0.45847058296203613, + "step": 1966 + }, + { + "epoch": 0.501529831718511, + "grad_norm": 0.45701542496681213, + "learning_rate": 1.0779335969040252e-05, + "loss": 0.471851110458374, + "step": 1967 + }, + { + "epoch": 0.5017848036715962, + "grad_norm": 0.45358672738075256, + "learning_rate": 1.0770927538069781e-05, + "loss": 0.4805513322353363, + "step": 1968 + }, + { + "epoch": 0.5020397756246813, + "grad_norm": 0.4443429410457611, + "learning_rate": 1.076251855874568e-05, + "loss": 0.4669516682624817, + "step": 1969 + }, + { + "epoch": 0.5022947475777665, + "grad_norm": 0.49044740200042725, + "learning_rate": 1.0754109037049174e-05, + "loss": 0.46486878395080566, + "step": 1970 + }, + { + "epoch": 0.5025497195308516, + "grad_norm": 0.44463393092155457, + "learning_rate": 1.0745698978961875e-05, + "loss": 0.45962488651275635, + "step": 1971 + }, + { + "epoch": 0.5028046914839368, + "grad_norm": 0.47817161679267883, + "learning_rate": 1.0737288390465792e-05, + "loss": 0.47821468114852905, + "step": 1972 + }, + { + "epoch": 0.5030596634370219, + "grad_norm": 0.4611988663673401, + "learning_rate": 1.0728877277543288e-05, + "loss": 0.45836731791496277, + "step": 1973 + }, + { + "epoch": 0.5033146353901071, + "grad_norm": 0.46209344267845154, + "learning_rate": 1.0720465646177111e-05, + "loss": 0.47259795665740967, + "step": 1974 + }, + { + "epoch": 0.5035696073431922, + "grad_norm": 0.4538228213787079, + "learning_rate": 1.0712053502350384e-05, + "loss": 0.47403550148010254, + "step": 1975 + }, + { + "epoch": 0.5038245792962774, + "grad_norm": 0.443917453289032, + "learning_rate": 1.0703640852046577e-05, + "loss": 0.4648395776748657, + "step": 1976 + }, + { + "epoch": 0.5040795512493625, + "grad_norm": 0.45121023058891296, + "learning_rate": 1.0695227701249537e-05, + "loss": 0.4645318388938904, + "step": 1977 + }, + { + "epoch": 0.5043345232024478, + "grad_norm": 0.47773703932762146, + "learning_rate": 1.0686814055943459e-05, + "loss": 0.46701762080192566, + "step": 1978 + }, + { + "epoch": 0.5045894951555329, + "grad_norm": 0.46860113739967346, + "learning_rate": 1.0678399922112892e-05, + "loss": 0.47811728715896606, + "step": 1979 + }, + { + "epoch": 0.5048444671086181, + "grad_norm": 0.4680320620536804, + "learning_rate": 1.066998530574273e-05, + "loss": 0.44749268889427185, + "step": 1980 + }, + { + "epoch": 0.5050994390617032, + "grad_norm": 0.45026010274887085, + "learning_rate": 1.0661570212818211e-05, + "loss": 0.4677862524986267, + "step": 1981 + }, + { + "epoch": 0.5053544110147884, + "grad_norm": 0.4540463387966156, + "learning_rate": 1.0653154649324918e-05, + "loss": 0.47464463114738464, + "step": 1982 + }, + { + "epoch": 0.5056093829678735, + "grad_norm": 0.4496227502822876, + "learning_rate": 1.0644738621248753e-05, + "loss": 0.46616873145103455, + "step": 1983 + }, + { + "epoch": 0.5058643549209587, + "grad_norm": 0.4450114667415619, + "learning_rate": 1.0636322134575972e-05, + "loss": 0.4677341878414154, + "step": 1984 + }, + { + "epoch": 0.5061193268740438, + "grad_norm": 0.4330081343650818, + "learning_rate": 1.0627905195293135e-05, + "loss": 0.47850990295410156, + "step": 1985 + }, + { + "epoch": 0.506374298827129, + "grad_norm": 0.4307883083820343, + "learning_rate": 1.0619487809387137e-05, + "loss": 0.47758373618125916, + "step": 1986 + }, + { + "epoch": 0.5066292707802141, + "grad_norm": 0.4381122887134552, + "learning_rate": 1.0611069982845183e-05, + "loss": 0.4692190885543823, + "step": 1987 + }, + { + "epoch": 0.5068842427332994, + "grad_norm": 0.43644505739212036, + "learning_rate": 1.0602651721654804e-05, + "loss": 0.46923840045928955, + "step": 1988 + }, + { + "epoch": 0.5071392146863845, + "grad_norm": 0.513249397277832, + "learning_rate": 1.0594233031803822e-05, + "loss": 0.4702669382095337, + "step": 1989 + }, + { + "epoch": 0.5073941866394697, + "grad_norm": 0.4524279832839966, + "learning_rate": 1.0585813919280377e-05, + "loss": 0.4500643014907837, + "step": 1990 + }, + { + "epoch": 0.5076491585925548, + "grad_norm": 0.4405861496925354, + "learning_rate": 1.0577394390072904e-05, + "loss": 0.47343379259109497, + "step": 1991 + }, + { + "epoch": 0.50790413054564, + "grad_norm": 0.44109389185905457, + "learning_rate": 1.056897445017014e-05, + "loss": 0.46540567278862, + "step": 1992 + }, + { + "epoch": 0.5081591024987251, + "grad_norm": 0.46960288286209106, + "learning_rate": 1.0560554105561108e-05, + "loss": 0.4597005248069763, + "step": 1993 + }, + { + "epoch": 0.5084140744518103, + "grad_norm": 0.46017128229141235, + "learning_rate": 1.055213336223512e-05, + "loss": 0.4604566693305969, + "step": 1994 + }, + { + "epoch": 0.5086690464048954, + "grad_norm": 0.43463587760925293, + "learning_rate": 1.0543712226181776e-05, + "loss": 0.4535146653652191, + "step": 1995 + }, + { + "epoch": 0.5089240183579806, + "grad_norm": 0.46322280168533325, + "learning_rate": 1.053529070339095e-05, + "loss": 0.47358083724975586, + "step": 1996 + }, + { + "epoch": 0.5091789903110658, + "grad_norm": 0.4796248972415924, + "learning_rate": 1.0526868799852797e-05, + "loss": 0.47287294268608093, + "step": 1997 + }, + { + "epoch": 0.5094339622641509, + "grad_norm": 0.5290987491607666, + "learning_rate": 1.0518446521557735e-05, + "loss": 0.46784111857414246, + "step": 1998 + }, + { + "epoch": 0.5096889342172362, + "grad_norm": 0.4367986023426056, + "learning_rate": 1.0510023874496451e-05, + "loss": 0.4657739996910095, + "step": 1999 + }, + { + "epoch": 0.5099439061703213, + "grad_norm": 0.446732759475708, + "learning_rate": 1.0501600864659902e-05, + "loss": 0.4691154956817627, + "step": 2000 + }, + { + "epoch": 0.5101988781234065, + "grad_norm": 0.4529222846031189, + "learning_rate": 1.0493177498039295e-05, + "loss": 0.46201086044311523, + "step": 2001 + }, + { + "epoch": 0.5104538500764916, + "grad_norm": 0.44185587763786316, + "learning_rate": 1.0484753780626089e-05, + "loss": 0.47200000286102295, + "step": 2002 + }, + { + "epoch": 0.5107088220295768, + "grad_norm": 0.4439525306224823, + "learning_rate": 1.0476329718412001e-05, + "loss": 0.47707054018974304, + "step": 2003 + }, + { + "epoch": 0.5109637939826619, + "grad_norm": 0.4340081512928009, + "learning_rate": 1.0467905317388987e-05, + "loss": 0.45767197012901306, + "step": 2004 + }, + { + "epoch": 0.5112187659357471, + "grad_norm": 0.4600531756877899, + "learning_rate": 1.0459480583549245e-05, + "loss": 0.4482811391353607, + "step": 2005 + }, + { + "epoch": 0.5114737378888322, + "grad_norm": 0.4530130624771118, + "learning_rate": 1.0451055522885208e-05, + "loss": 0.4777918756008148, + "step": 2006 + }, + { + "epoch": 0.5117287098419174, + "grad_norm": 0.4777997136116028, + "learning_rate": 1.044263014138955e-05, + "loss": 0.4598935544490814, + "step": 2007 + }, + { + "epoch": 0.5119836817950025, + "grad_norm": 0.42285043001174927, + "learning_rate": 1.0434204445055161e-05, + "loss": 0.4733307361602783, + "step": 2008 + }, + { + "epoch": 0.5122386537480877, + "grad_norm": 0.4836715757846832, + "learning_rate": 1.0425778439875164e-05, + "loss": 0.47103893756866455, + "step": 2009 + }, + { + "epoch": 0.5124936257011729, + "grad_norm": 0.45524367690086365, + "learning_rate": 1.04173521318429e-05, + "loss": 0.4625380039215088, + "step": 2010 + }, + { + "epoch": 0.5127485976542581, + "grad_norm": 0.44874754548072815, + "learning_rate": 1.0408925526951919e-05, + "loss": 0.46822822093963623, + "step": 2011 + }, + { + "epoch": 0.5130035696073432, + "grad_norm": 0.532166063785553, + "learning_rate": 1.0400498631195992e-05, + "loss": 0.46029651165008545, + "step": 2012 + }, + { + "epoch": 0.5132585415604284, + "grad_norm": 0.4773326814174652, + "learning_rate": 1.039207145056909e-05, + "loss": 0.45712560415267944, + "step": 2013 + }, + { + "epoch": 0.5135135135135135, + "grad_norm": 0.4635961353778839, + "learning_rate": 1.038364399106539e-05, + "loss": 0.4717424809932709, + "step": 2014 + }, + { + "epoch": 0.5137684854665987, + "grad_norm": 0.4427274763584137, + "learning_rate": 1.0375216258679267e-05, + "loss": 0.4571472108364105, + "step": 2015 + }, + { + "epoch": 0.5140234574196838, + "grad_norm": 0.4772450923919678, + "learning_rate": 1.0366788259405281e-05, + "loss": 0.46459245681762695, + "step": 2016 + }, + { + "epoch": 0.514278429372769, + "grad_norm": 0.44661223888397217, + "learning_rate": 1.0358359999238202e-05, + "loss": 0.46645474433898926, + "step": 2017 + }, + { + "epoch": 0.5145334013258541, + "grad_norm": 0.4430338442325592, + "learning_rate": 1.0349931484172964e-05, + "loss": 0.4655427634716034, + "step": 2018 + }, + { + "epoch": 0.5147883732789393, + "grad_norm": 0.4474931061267853, + "learning_rate": 1.0341502720204696e-05, + "loss": 0.4667506217956543, + "step": 2019 + }, + { + "epoch": 0.5150433452320244, + "grad_norm": 0.46259480714797974, + "learning_rate": 1.03330737133287e-05, + "loss": 0.4495689570903778, + "step": 2020 + }, + { + "epoch": 0.5152983171851097, + "grad_norm": 0.44517841935157776, + "learning_rate": 1.032464446954045e-05, + "loss": 0.47217345237731934, + "step": 2021 + }, + { + "epoch": 0.5155532891381948, + "grad_norm": 0.4635341763496399, + "learning_rate": 1.031621499483559e-05, + "loss": 0.466999351978302, + "step": 2022 + }, + { + "epoch": 0.51580826109128, + "grad_norm": 0.4462071657180786, + "learning_rate": 1.0307785295209927e-05, + "loss": 0.47211140394210815, + "step": 2023 + }, + { + "epoch": 0.5160632330443651, + "grad_norm": 0.42271414399147034, + "learning_rate": 1.0299355376659426e-05, + "loss": 0.47100910544395447, + "step": 2024 + }, + { + "epoch": 0.5163182049974503, + "grad_norm": 0.4277551472187042, + "learning_rate": 1.0290925245180214e-05, + "loss": 0.46340689063072205, + "step": 2025 + }, + { + "epoch": 0.5165731769505354, + "grad_norm": 0.4386981725692749, + "learning_rate": 1.0282494906768567e-05, + "loss": 0.46859121322631836, + "step": 2026 + }, + { + "epoch": 0.5168281489036206, + "grad_norm": 0.4549739956855774, + "learning_rate": 1.0274064367420897e-05, + "loss": 0.4723912477493286, + "step": 2027 + }, + { + "epoch": 0.5170831208567057, + "grad_norm": 0.46073293685913086, + "learning_rate": 1.026563363313378e-05, + "loss": 0.46750569343566895, + "step": 2028 + }, + { + "epoch": 0.5173380928097909, + "grad_norm": 0.4419374465942383, + "learning_rate": 1.0257202709903915e-05, + "loss": 0.46308594942092896, + "step": 2029 + }, + { + "epoch": 0.517593064762876, + "grad_norm": 0.45616763830184937, + "learning_rate": 1.024877160372814e-05, + "loss": 0.4581572413444519, + "step": 2030 + }, + { + "epoch": 0.5178480367159612, + "grad_norm": 0.5098860263824463, + "learning_rate": 1.0240340320603421e-05, + "loss": 0.4721102714538574, + "step": 2031 + }, + { + "epoch": 0.5181030086690463, + "grad_norm": 0.42570966482162476, + "learning_rate": 1.0231908866526851e-05, + "loss": 0.46494436264038086, + "step": 2032 + }, + { + "epoch": 0.5183579806221316, + "grad_norm": 0.44891291856765747, + "learning_rate": 1.0223477247495655e-05, + "loss": 0.4593876600265503, + "step": 2033 + }, + { + "epoch": 0.5186129525752168, + "grad_norm": 0.4411010444164276, + "learning_rate": 1.0215045469507152e-05, + "loss": 0.46814531087875366, + "step": 2034 + }, + { + "epoch": 0.5188679245283019, + "grad_norm": 0.4394744634628296, + "learning_rate": 1.0206613538558797e-05, + "loss": 0.4597451090812683, + "step": 2035 + }, + { + "epoch": 0.5191228964813871, + "grad_norm": 0.45414721965789795, + "learning_rate": 1.019818146064814e-05, + "loss": 0.46010684967041016, + "step": 2036 + }, + { + "epoch": 0.5193778684344722, + "grad_norm": 0.4221673905849457, + "learning_rate": 1.0189749241772843e-05, + "loss": 0.4623414874076843, + "step": 2037 + }, + { + "epoch": 0.5196328403875574, + "grad_norm": 0.45032015442848206, + "learning_rate": 1.0181316887930663e-05, + "loss": 0.45287615060806274, + "step": 2038 + }, + { + "epoch": 0.5198878123406425, + "grad_norm": 0.4528999626636505, + "learning_rate": 1.0172884405119457e-05, + "loss": 0.4615684449672699, + "step": 2039 + }, + { + "epoch": 0.5201427842937277, + "grad_norm": 0.45465999841690063, + "learning_rate": 1.016445179933717e-05, + "loss": 0.46677809953689575, + "step": 2040 + }, + { + "epoch": 0.5203977562468128, + "grad_norm": 0.45125019550323486, + "learning_rate": 1.015601907658184e-05, + "loss": 0.45892518758773804, + "step": 2041 + }, + { + "epoch": 0.520652728199898, + "grad_norm": 0.4755760133266449, + "learning_rate": 1.0147586242851586e-05, + "loss": 0.45814356207847595, + "step": 2042 + }, + { + "epoch": 0.5209077001529832, + "grad_norm": 0.44352635741233826, + "learning_rate": 1.0139153304144597e-05, + "loss": 0.46379196643829346, + "step": 2043 + }, + { + "epoch": 0.5211626721060684, + "grad_norm": 0.43896499276161194, + "learning_rate": 1.013072026645915e-05, + "loss": 0.46776318550109863, + "step": 2044 + }, + { + "epoch": 0.5214176440591535, + "grad_norm": 0.47973087430000305, + "learning_rate": 1.0122287135793582e-05, + "loss": 0.46471667289733887, + "step": 2045 + }, + { + "epoch": 0.5216726160122387, + "grad_norm": 0.4453974962234497, + "learning_rate": 1.011385391814631e-05, + "loss": 0.45457661151885986, + "step": 2046 + }, + { + "epoch": 0.5219275879653238, + "grad_norm": 0.45497700572013855, + "learning_rate": 1.0105420619515798e-05, + "loss": 0.4676205813884735, + "step": 2047 + }, + { + "epoch": 0.522182559918409, + "grad_norm": 0.4510899782180786, + "learning_rate": 1.0096987245900569e-05, + "loss": 0.47454676032066345, + "step": 2048 + }, + { + "epoch": 0.5224375318714941, + "grad_norm": 0.4445478916168213, + "learning_rate": 1.0088553803299215e-05, + "loss": 0.4578350782394409, + "step": 2049 + }, + { + "epoch": 0.5226925038245793, + "grad_norm": 0.4726911783218384, + "learning_rate": 1.0080120297710355e-05, + "loss": 0.45180433988571167, + "step": 2050 + }, + { + "epoch": 0.5229474757776644, + "grad_norm": 0.46544188261032104, + "learning_rate": 1.0071686735132669e-05, + "loss": 0.4593530297279358, + "step": 2051 + }, + { + "epoch": 0.5232024477307496, + "grad_norm": 0.44481343030929565, + "learning_rate": 1.0063253121564868e-05, + "loss": 0.45732077956199646, + "step": 2052 + }, + { + "epoch": 0.5234574196838347, + "grad_norm": 0.4890146255493164, + "learning_rate": 1.005481946300571e-05, + "loss": 0.46841156482696533, + "step": 2053 + }, + { + "epoch": 0.52371239163692, + "grad_norm": 0.46075499057769775, + "learning_rate": 1.0046385765453972e-05, + "loss": 0.4659883975982666, + "step": 2054 + }, + { + "epoch": 0.5239673635900051, + "grad_norm": 0.4498838484287262, + "learning_rate": 1.0037952034908472e-05, + "loss": 0.46704480051994324, + "step": 2055 + }, + { + "epoch": 0.5242223355430903, + "grad_norm": 0.4813586473464966, + "learning_rate": 1.0029518277368034e-05, + "loss": 0.45885729789733887, + "step": 2056 + }, + { + "epoch": 0.5244773074961754, + "grad_norm": 0.45792996883392334, + "learning_rate": 1.0021084498831522e-05, + "loss": 0.4637394845485687, + "step": 2057 + }, + { + "epoch": 0.5247322794492606, + "grad_norm": 0.4410227835178375, + "learning_rate": 1.0012650705297802e-05, + "loss": 0.4597325325012207, + "step": 2058 + }, + { + "epoch": 0.5249872514023457, + "grad_norm": 0.4623326063156128, + "learning_rate": 1.000421690276575e-05, + "loss": 0.47189098596572876, + "step": 2059 + }, + { + "epoch": 0.5252422233554309, + "grad_norm": 0.4475923776626587, + "learning_rate": 9.995783097234255e-06, + "loss": 0.46808943152427673, + "step": 2060 + }, + { + "epoch": 0.525497195308516, + "grad_norm": 0.45596396923065186, + "learning_rate": 9.987349294702203e-06, + "loss": 0.4588278532028198, + "step": 2061 + }, + { + "epoch": 0.5257521672616012, + "grad_norm": 0.46377086639404297, + "learning_rate": 9.978915501168481e-06, + "loss": 0.468145489692688, + "step": 2062 + }, + { + "epoch": 0.5260071392146863, + "grad_norm": 0.450214147567749, + "learning_rate": 9.970481722631968e-06, + "loss": 0.4631403386592865, + "step": 2063 + }, + { + "epoch": 0.5262621111677716, + "grad_norm": 0.45720988512039185, + "learning_rate": 9.962047965091531e-06, + "loss": 0.4511702060699463, + "step": 2064 + }, + { + "epoch": 0.5265170831208567, + "grad_norm": 0.4529277980327606, + "learning_rate": 9.95361423454603e-06, + "loss": 0.4667152762413025, + "step": 2065 + }, + { + "epoch": 0.5267720550739419, + "grad_norm": 0.4591916799545288, + "learning_rate": 9.945180536994295e-06, + "loss": 0.46990442276000977, + "step": 2066 + }, + { + "epoch": 0.527027027027027, + "grad_norm": 0.43610069155693054, + "learning_rate": 9.936746878435137e-06, + "loss": 0.44906729459762573, + "step": 2067 + }, + { + "epoch": 0.5272819989801122, + "grad_norm": 0.44433870911598206, + "learning_rate": 9.928313264867335e-06, + "loss": 0.45793232321739197, + "step": 2068 + }, + { + "epoch": 0.5275369709331974, + "grad_norm": 0.4354260563850403, + "learning_rate": 9.919879702289649e-06, + "loss": 0.4704725444316864, + "step": 2069 + }, + { + "epoch": 0.5277919428862825, + "grad_norm": 0.4708549678325653, + "learning_rate": 9.911446196700788e-06, + "loss": 0.4823994040489197, + "step": 2070 + }, + { + "epoch": 0.5280469148393677, + "grad_norm": 0.44031861424446106, + "learning_rate": 9.903012754099431e-06, + "loss": 0.4768982529640198, + "step": 2071 + }, + { + "epoch": 0.5283018867924528, + "grad_norm": 0.4868448078632355, + "learning_rate": 9.894579380484206e-06, + "loss": 0.460830420255661, + "step": 2072 + }, + { + "epoch": 0.528556858745538, + "grad_norm": 0.4490775167942047, + "learning_rate": 9.886146081853694e-06, + "loss": 0.4631894826889038, + "step": 2073 + }, + { + "epoch": 0.5288118306986231, + "grad_norm": 0.4440162181854248, + "learning_rate": 9.87771286420642e-06, + "loss": 0.4668932855129242, + "step": 2074 + }, + { + "epoch": 0.5290668026517084, + "grad_norm": 0.4418855607509613, + "learning_rate": 9.869279733540856e-06, + "loss": 0.46449732780456543, + "step": 2075 + }, + { + "epoch": 0.5293217746047935, + "grad_norm": 0.4457247853279114, + "learning_rate": 9.860846695855406e-06, + "loss": 0.4614427089691162, + "step": 2076 + }, + { + "epoch": 0.5295767465578787, + "grad_norm": 0.4354749619960785, + "learning_rate": 9.852413757148418e-06, + "loss": 0.4676775336265564, + "step": 2077 + }, + { + "epoch": 0.5298317185109638, + "grad_norm": 0.42488738894462585, + "learning_rate": 9.84398092341816e-06, + "loss": 0.4580889940261841, + "step": 2078 + }, + { + "epoch": 0.530086690464049, + "grad_norm": 0.45775771141052246, + "learning_rate": 9.835548200662831e-06, + "loss": 0.46174877882003784, + "step": 2079 + }, + { + "epoch": 0.5303416624171341, + "grad_norm": 0.4479270875453949, + "learning_rate": 9.827115594880545e-06, + "loss": 0.47181570529937744, + "step": 2080 + }, + { + "epoch": 0.5305966343702193, + "grad_norm": 0.4417240023612976, + "learning_rate": 9.81868311206934e-06, + "loss": 0.4601489305496216, + "step": 2081 + }, + { + "epoch": 0.5308516063233044, + "grad_norm": 0.43877771496772766, + "learning_rate": 9.81025075822716e-06, + "loss": 0.4636847972869873, + "step": 2082 + }, + { + "epoch": 0.5311065782763896, + "grad_norm": 0.4579344391822815, + "learning_rate": 9.801818539351862e-06, + "loss": 0.4594728350639343, + "step": 2083 + }, + { + "epoch": 0.5313615502294747, + "grad_norm": 0.49850475788116455, + "learning_rate": 9.793386461441205e-06, + "loss": 0.46444177627563477, + "step": 2084 + }, + { + "epoch": 0.53161652218256, + "grad_norm": 0.4226575493812561, + "learning_rate": 9.784954530492846e-06, + "loss": 0.45225656032562256, + "step": 2085 + }, + { + "epoch": 0.531871494135645, + "grad_norm": 0.4468752145767212, + "learning_rate": 9.77652275250435e-06, + "loss": 0.46698713302612305, + "step": 2086 + }, + { + "epoch": 0.5321264660887303, + "grad_norm": 0.46996641159057617, + "learning_rate": 9.76809113347315e-06, + "loss": 0.4715825915336609, + "step": 2087 + }, + { + "epoch": 0.5323814380418154, + "grad_norm": 0.4626185894012451, + "learning_rate": 9.75965967939658e-06, + "loss": 0.47345566749572754, + "step": 2088 + }, + { + "epoch": 0.5326364099949006, + "grad_norm": 0.46349525451660156, + "learning_rate": 9.751228396271863e-06, + "loss": 0.4519904851913452, + "step": 2089 + }, + { + "epoch": 0.5328913819479857, + "grad_norm": 0.4393472671508789, + "learning_rate": 9.742797290096085e-06, + "loss": 0.47511130571365356, + "step": 2090 + }, + { + "epoch": 0.5331463539010709, + "grad_norm": 0.45189473032951355, + "learning_rate": 9.73436636686622e-06, + "loss": 0.4526618421077728, + "step": 2091 + }, + { + "epoch": 0.533401325854156, + "grad_norm": 0.44831642508506775, + "learning_rate": 9.725935632579105e-06, + "loss": 0.47018808126449585, + "step": 2092 + }, + { + "epoch": 0.5336562978072412, + "grad_norm": 0.4449017345905304, + "learning_rate": 9.71750509323144e-06, + "loss": 0.463678777217865, + "step": 2093 + }, + { + "epoch": 0.5339112697603263, + "grad_norm": 0.4990766942501068, + "learning_rate": 9.709074754819788e-06, + "loss": 0.4615117013454437, + "step": 2094 + }, + { + "epoch": 0.5341662417134115, + "grad_norm": 0.4472992420196533, + "learning_rate": 9.700644623340576e-06, + "loss": 0.4790828227996826, + "step": 2095 + }, + { + "epoch": 0.5344212136664966, + "grad_norm": 0.4707857072353363, + "learning_rate": 9.692214704790078e-06, + "loss": 0.4669959545135498, + "step": 2096 + }, + { + "epoch": 0.5346761856195819, + "grad_norm": 0.47750231623649597, + "learning_rate": 9.683785005164412e-06, + "loss": 0.4698825478553772, + "step": 2097 + }, + { + "epoch": 0.534931157572667, + "grad_norm": 0.44692638516426086, + "learning_rate": 9.675355530459554e-06, + "loss": 0.4576447904109955, + "step": 2098 + }, + { + "epoch": 0.5351861295257522, + "grad_norm": 0.4795735776424408, + "learning_rate": 9.666926286671306e-06, + "loss": 0.45990800857543945, + "step": 2099 + }, + { + "epoch": 0.5354411014788373, + "grad_norm": 0.452642023563385, + "learning_rate": 9.658497279795307e-06, + "loss": 0.45492836833000183, + "step": 2100 + }, + { + "epoch": 0.5356960734319225, + "grad_norm": 0.492758572101593, + "learning_rate": 9.650068515827039e-06, + "loss": 0.4594270884990692, + "step": 2101 + }, + { + "epoch": 0.5359510453850076, + "grad_norm": 0.44014111161231995, + "learning_rate": 9.641640000761803e-06, + "loss": 0.4722544550895691, + "step": 2102 + }, + { + "epoch": 0.5362060173380928, + "grad_norm": 0.4531368911266327, + "learning_rate": 9.63321174059472e-06, + "loss": 0.4664442539215088, + "step": 2103 + }, + { + "epoch": 0.536460989291178, + "grad_norm": 0.4387378692626953, + "learning_rate": 9.624783741320738e-06, + "loss": 0.48242801427841187, + "step": 2104 + }, + { + "epoch": 0.5367159612442631, + "grad_norm": 0.46323493123054504, + "learning_rate": 9.616356008934615e-06, + "loss": 0.4569288194179535, + "step": 2105 + }, + { + "epoch": 0.5369709331973483, + "grad_norm": 0.4467400014400482, + "learning_rate": 9.607928549430913e-06, + "loss": 0.443368136882782, + "step": 2106 + }, + { + "epoch": 0.5372259051504334, + "grad_norm": 0.46812474727630615, + "learning_rate": 9.599501368804011e-06, + "loss": 0.47366127371788025, + "step": 2107 + }, + { + "epoch": 0.5374808771035187, + "grad_norm": 0.45175814628601074, + "learning_rate": 9.591074473048083e-06, + "loss": 0.4567645192146301, + "step": 2108 + }, + { + "epoch": 0.5377358490566038, + "grad_norm": 0.45011746883392334, + "learning_rate": 9.582647868157103e-06, + "loss": 0.4638264775276184, + "step": 2109 + }, + { + "epoch": 0.537990821009689, + "grad_norm": 0.46512457728385925, + "learning_rate": 9.574221560124837e-06, + "loss": 0.4629848897457123, + "step": 2110 + }, + { + "epoch": 0.5382457929627741, + "grad_norm": 0.5198569297790527, + "learning_rate": 9.565795554944844e-06, + "loss": 0.46182310581207275, + "step": 2111 + }, + { + "epoch": 0.5385007649158593, + "grad_norm": 0.41323816776275635, + "learning_rate": 9.557369858610454e-06, + "loss": 0.4664433002471924, + "step": 2112 + }, + { + "epoch": 0.5387557368689444, + "grad_norm": 0.4467845559120178, + "learning_rate": 9.548944477114794e-06, + "loss": 0.46828243136405945, + "step": 2113 + }, + { + "epoch": 0.5390107088220296, + "grad_norm": 0.4535411298274994, + "learning_rate": 9.54051941645076e-06, + "loss": 0.47404253482818604, + "step": 2114 + }, + { + "epoch": 0.5392656807751147, + "grad_norm": 0.4470507800579071, + "learning_rate": 9.532094682611018e-06, + "loss": 0.4727866053581238, + "step": 2115 + }, + { + "epoch": 0.5395206527281999, + "grad_norm": 0.4469457268714905, + "learning_rate": 9.523670281588002e-06, + "loss": 0.45999619364738464, + "step": 2116 + }, + { + "epoch": 0.539775624681285, + "grad_norm": 0.4599631428718567, + "learning_rate": 9.515246219373911e-06, + "loss": 0.465770959854126, + "step": 2117 + }, + { + "epoch": 0.5400305966343703, + "grad_norm": 0.4520121216773987, + "learning_rate": 9.50682250196071e-06, + "loss": 0.46152377128601074, + "step": 2118 + }, + { + "epoch": 0.5402855685874554, + "grad_norm": 0.4410518705844879, + "learning_rate": 9.498399135340101e-06, + "loss": 0.47138556838035583, + "step": 2119 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 0.46009910106658936, + "learning_rate": 9.489976125503552e-06, + "loss": 0.4417486786842346, + "step": 2120 + }, + { + "epoch": 0.5407955124936257, + "grad_norm": 0.4274013638496399, + "learning_rate": 9.481553478442268e-06, + "loss": 0.4546934962272644, + "step": 2121 + }, + { + "epoch": 0.5410504844467109, + "grad_norm": 0.46814054250717163, + "learning_rate": 9.473131200147205e-06, + "loss": 0.4571819305419922, + "step": 2122 + }, + { + "epoch": 0.541305456399796, + "grad_norm": 0.45113396644592285, + "learning_rate": 9.46470929660905e-06, + "loss": 0.4620974659919739, + "step": 2123 + }, + { + "epoch": 0.5415604283528812, + "grad_norm": 0.4376858174800873, + "learning_rate": 9.456287773818227e-06, + "loss": 0.45991358160972595, + "step": 2124 + }, + { + "epoch": 0.5418154003059663, + "grad_norm": 0.44803953170776367, + "learning_rate": 9.447866637764882e-06, + "loss": 0.45416638255119324, + "step": 2125 + }, + { + "epoch": 0.5420703722590515, + "grad_norm": 0.4605104327201843, + "learning_rate": 9.439445894438896e-06, + "loss": 0.4649544656276703, + "step": 2126 + }, + { + "epoch": 0.5423253442121366, + "grad_norm": 0.46057993173599243, + "learning_rate": 9.431025549829864e-06, + "loss": 0.454551637172699, + "step": 2127 + }, + { + "epoch": 0.5425803161652218, + "grad_norm": 0.5471037030220032, + "learning_rate": 9.422605609927098e-06, + "loss": 0.4616963267326355, + "step": 2128 + }, + { + "epoch": 0.542835288118307, + "grad_norm": 0.4633654057979584, + "learning_rate": 9.414186080719625e-06, + "loss": 0.4599413573741913, + "step": 2129 + }, + { + "epoch": 0.5430902600713922, + "grad_norm": 0.4843316972255707, + "learning_rate": 9.405766968196183e-06, + "loss": 0.46958720684051514, + "step": 2130 + }, + { + "epoch": 0.5433452320244773, + "grad_norm": 0.4385245442390442, + "learning_rate": 9.397348278345201e-06, + "loss": 0.4616304337978363, + "step": 2131 + }, + { + "epoch": 0.5436002039775625, + "grad_norm": 0.43418222665786743, + "learning_rate": 9.388930017154819e-06, + "loss": 0.4705386757850647, + "step": 2132 + }, + { + "epoch": 0.5438551759306476, + "grad_norm": 0.5231401920318604, + "learning_rate": 9.380512190612865e-06, + "loss": 0.46048712730407715, + "step": 2133 + }, + { + "epoch": 0.5441101478837328, + "grad_norm": 0.44810429215431213, + "learning_rate": 9.372094804706867e-06, + "loss": 0.45276516675949097, + "step": 2134 + }, + { + "epoch": 0.5443651198368179, + "grad_norm": 0.4766870439052582, + "learning_rate": 9.36367786542403e-06, + "loss": 0.45601996779441833, + "step": 2135 + }, + { + "epoch": 0.5446200917899031, + "grad_norm": 0.4559542238712311, + "learning_rate": 9.355261378751247e-06, + "loss": 0.4878440797328949, + "step": 2136 + }, + { + "epoch": 0.5448750637429882, + "grad_norm": 0.43964841961860657, + "learning_rate": 9.346845350675087e-06, + "loss": 0.4721733033657074, + "step": 2137 + }, + { + "epoch": 0.5451300356960734, + "grad_norm": 0.4341309368610382, + "learning_rate": 9.338429787181792e-06, + "loss": 0.46167072653770447, + "step": 2138 + }, + { + "epoch": 0.5453850076491585, + "grad_norm": 0.4392673075199127, + "learning_rate": 9.330014694257274e-06, + "loss": 0.45932063460350037, + "step": 2139 + }, + { + "epoch": 0.5456399796022438, + "grad_norm": 0.43223118782043457, + "learning_rate": 9.321600077887111e-06, + "loss": 0.4435594081878662, + "step": 2140 + }, + { + "epoch": 0.545894951555329, + "grad_norm": 0.4600526988506317, + "learning_rate": 9.313185944056541e-06, + "loss": 0.4624207019805908, + "step": 2141 + }, + { + "epoch": 0.5461499235084141, + "grad_norm": 0.4657096266746521, + "learning_rate": 9.304772298750463e-06, + "loss": 0.46557554602622986, + "step": 2142 + }, + { + "epoch": 0.5464048954614993, + "grad_norm": 0.43160563707351685, + "learning_rate": 9.296359147953428e-06, + "loss": 0.4483899474143982, + "step": 2143 + }, + { + "epoch": 0.5466598674145844, + "grad_norm": 0.43755489587783813, + "learning_rate": 9.287946497649621e-06, + "loss": 0.46107468008995056, + "step": 2144 + }, + { + "epoch": 0.5469148393676696, + "grad_norm": 0.45328959822654724, + "learning_rate": 9.27953435382289e-06, + "loss": 0.4532812535762787, + "step": 2145 + }, + { + "epoch": 0.5471698113207547, + "grad_norm": 0.4669122099876404, + "learning_rate": 9.271122722456715e-06, + "loss": 0.4514726996421814, + "step": 2146 + }, + { + "epoch": 0.5474247832738399, + "grad_norm": 0.42995432019233704, + "learning_rate": 9.262711609534211e-06, + "loss": 0.4527595639228821, + "step": 2147 + }, + { + "epoch": 0.547679755226925, + "grad_norm": 0.46898776292800903, + "learning_rate": 9.254301021038125e-06, + "loss": 0.4473420977592468, + "step": 2148 + }, + { + "epoch": 0.5479347271800102, + "grad_norm": 0.46627387404441833, + "learning_rate": 9.245890962950832e-06, + "loss": 0.45596832036972046, + "step": 2149 + }, + { + "epoch": 0.5481896991330953, + "grad_norm": 0.4662887752056122, + "learning_rate": 9.237481441254325e-06, + "loss": 0.4651641249656677, + "step": 2150 + }, + { + "epoch": 0.5484446710861806, + "grad_norm": 0.44494640827178955, + "learning_rate": 9.22907246193022e-06, + "loss": 0.45719462633132935, + "step": 2151 + }, + { + "epoch": 0.5486996430392657, + "grad_norm": 0.4548644721508026, + "learning_rate": 9.22066403095975e-06, + "loss": 0.4613267779350281, + "step": 2152 + }, + { + "epoch": 0.5489546149923509, + "grad_norm": 0.4366980493068695, + "learning_rate": 9.21225615432375e-06, + "loss": 0.4484979808330536, + "step": 2153 + }, + { + "epoch": 0.549209586945436, + "grad_norm": 0.4651254117488861, + "learning_rate": 9.203848838002666e-06, + "loss": 0.4629446864128113, + "step": 2154 + }, + { + "epoch": 0.5494645588985212, + "grad_norm": 0.4780799150466919, + "learning_rate": 9.195442087976547e-06, + "loss": 0.4587783217430115, + "step": 2155 + }, + { + "epoch": 0.5497195308516063, + "grad_norm": 0.4534330666065216, + "learning_rate": 9.187035910225035e-06, + "loss": 0.47006481885910034, + "step": 2156 + }, + { + "epoch": 0.5499745028046915, + "grad_norm": 0.4560837149620056, + "learning_rate": 9.178630310727366e-06, + "loss": 0.4496500492095947, + "step": 2157 + }, + { + "epoch": 0.5502294747577766, + "grad_norm": 0.4531233310699463, + "learning_rate": 9.170225295462364e-06, + "loss": 0.4603425860404968, + "step": 2158 + }, + { + "epoch": 0.5504844467108618, + "grad_norm": 0.4529958963394165, + "learning_rate": 9.16182087040844e-06, + "loss": 0.47253990173339844, + "step": 2159 + }, + { + "epoch": 0.5507394186639469, + "grad_norm": 0.4396999478340149, + "learning_rate": 9.153417041543586e-06, + "loss": 0.46572861075401306, + "step": 2160 + }, + { + "epoch": 0.5509943906170321, + "grad_norm": 0.45875558257102966, + "learning_rate": 9.145013814845363e-06, + "loss": 0.453915536403656, + "step": 2161 + }, + { + "epoch": 0.5512493625701173, + "grad_norm": 0.45509132742881775, + "learning_rate": 9.136611196290915e-06, + "loss": 0.4658550024032593, + "step": 2162 + }, + { + "epoch": 0.5515043345232025, + "grad_norm": 0.6631203889846802, + "learning_rate": 9.12820919185694e-06, + "loss": 0.4767513871192932, + "step": 2163 + }, + { + "epoch": 0.5517593064762876, + "grad_norm": 0.4426300823688507, + "learning_rate": 9.11980780751971e-06, + "loss": 0.4608106017112732, + "step": 2164 + }, + { + "epoch": 0.5520142784293728, + "grad_norm": 0.517145037651062, + "learning_rate": 9.11140704925505e-06, + "loss": 0.4579850137233734, + "step": 2165 + }, + { + "epoch": 0.5522692503824579, + "grad_norm": 0.4606268107891083, + "learning_rate": 9.103006923038344e-06, + "loss": 0.4567311704158783, + "step": 2166 + }, + { + "epoch": 0.5525242223355431, + "grad_norm": 0.46356263756752014, + "learning_rate": 9.094607434844523e-06, + "loss": 0.4575386047363281, + "step": 2167 + }, + { + "epoch": 0.5527791942886282, + "grad_norm": 0.4374256134033203, + "learning_rate": 9.086208590648066e-06, + "loss": 0.4539230763912201, + "step": 2168 + }, + { + "epoch": 0.5530341662417134, + "grad_norm": 0.439361572265625, + "learning_rate": 9.077810396422995e-06, + "loss": 0.47697627544403076, + "step": 2169 + }, + { + "epoch": 0.5532891381947985, + "grad_norm": 0.4530436396598816, + "learning_rate": 9.069412858142863e-06, + "loss": 0.4649924039840698, + "step": 2170 + }, + { + "epoch": 0.5535441101478837, + "grad_norm": 0.4412326514720917, + "learning_rate": 9.061015981780764e-06, + "loss": 0.46756356954574585, + "step": 2171 + }, + { + "epoch": 0.5537990821009688, + "grad_norm": 0.440147340297699, + "learning_rate": 9.052619773309318e-06, + "loss": 0.44795387983322144, + "step": 2172 + }, + { + "epoch": 0.5540540540540541, + "grad_norm": 0.44182321429252625, + "learning_rate": 9.044224238700672e-06, + "loss": 0.4552813470363617, + "step": 2173 + }, + { + "epoch": 0.5543090260071392, + "grad_norm": 0.544137179851532, + "learning_rate": 9.03582938392649e-06, + "loss": 0.4627988338470459, + "step": 2174 + }, + { + "epoch": 0.5545639979602244, + "grad_norm": 0.4592606723308563, + "learning_rate": 9.02743521495796e-06, + "loss": 0.4631474018096924, + "step": 2175 + }, + { + "epoch": 0.5548189699133096, + "grad_norm": 0.4471321403980255, + "learning_rate": 9.019041737765768e-06, + "loss": 0.4676770567893982, + "step": 2176 + }, + { + "epoch": 0.5550739418663947, + "grad_norm": 0.48931652307510376, + "learning_rate": 9.010648958320121e-06, + "loss": 0.4544360041618347, + "step": 2177 + }, + { + "epoch": 0.5553289138194799, + "grad_norm": 0.4572511315345764, + "learning_rate": 9.002256882590728e-06, + "loss": 0.46220335364341736, + "step": 2178 + }, + { + "epoch": 0.555583885772565, + "grad_norm": 0.43627431988716125, + "learning_rate": 8.993865516546791e-06, + "loss": 0.45467087626457214, + "step": 2179 + }, + { + "epoch": 0.5558388577256502, + "grad_norm": 0.4847698211669922, + "learning_rate": 8.985474866157014e-06, + "loss": 0.4593415856361389, + "step": 2180 + }, + { + "epoch": 0.5560938296787353, + "grad_norm": 0.5630824565887451, + "learning_rate": 8.977084937389591e-06, + "loss": 0.4610545337200165, + "step": 2181 + }, + { + "epoch": 0.5563488016318205, + "grad_norm": 0.44593602418899536, + "learning_rate": 8.968695736212194e-06, + "loss": 0.46437281370162964, + "step": 2182 + }, + { + "epoch": 0.5566037735849056, + "grad_norm": 0.45797938108444214, + "learning_rate": 8.960307268591988e-06, + "loss": 0.4567159116268158, + "step": 2183 + }, + { + "epoch": 0.5568587455379909, + "grad_norm": 0.4415434002876282, + "learning_rate": 8.951919540495613e-06, + "loss": 0.4509863555431366, + "step": 2184 + }, + { + "epoch": 0.557113717491076, + "grad_norm": 0.4689483344554901, + "learning_rate": 8.943532557889182e-06, + "loss": 0.4577532708644867, + "step": 2185 + }, + { + "epoch": 0.5573686894441612, + "grad_norm": 0.44582420587539673, + "learning_rate": 8.935146326738275e-06, + "loss": 0.44930100440979004, + "step": 2186 + }, + { + "epoch": 0.5576236613972463, + "grad_norm": 0.4341600835323334, + "learning_rate": 8.926760853007946e-06, + "loss": 0.459422767162323, + "step": 2187 + }, + { + "epoch": 0.5578786333503315, + "grad_norm": 0.4593459367752075, + "learning_rate": 8.918376142662709e-06, + "loss": 0.47731637954711914, + "step": 2188 + }, + { + "epoch": 0.5581336053034166, + "grad_norm": 0.49128293991088867, + "learning_rate": 8.909992201666516e-06, + "loss": 0.44464847445487976, + "step": 2189 + }, + { + "epoch": 0.5583885772565018, + "grad_norm": 0.4623543918132782, + "learning_rate": 8.901609035982799e-06, + "loss": 0.4652901589870453, + "step": 2190 + }, + { + "epoch": 0.5586435492095869, + "grad_norm": 0.4317094683647156, + "learning_rate": 8.893226651574422e-06, + "loss": 0.46675562858581543, + "step": 2191 + }, + { + "epoch": 0.5588985211626721, + "grad_norm": 0.43534040451049805, + "learning_rate": 8.8848450544037e-06, + "loss": 0.46179577708244324, + "step": 2192 + }, + { + "epoch": 0.5591534931157572, + "grad_norm": 0.4367939233779907, + "learning_rate": 8.876464250432381e-06, + "loss": 0.4616091251373291, + "step": 2193 + }, + { + "epoch": 0.5594084650688425, + "grad_norm": 0.4712008535861969, + "learning_rate": 8.86808424562166e-06, + "loss": 0.45911508798599243, + "step": 2194 + }, + { + "epoch": 0.5596634370219276, + "grad_norm": 0.4427134096622467, + "learning_rate": 8.859705045932152e-06, + "loss": 0.4717683792114258, + "step": 2195 + }, + { + "epoch": 0.5599184089750128, + "grad_norm": 0.43177637457847595, + "learning_rate": 8.851326657323904e-06, + "loss": 0.4647178053855896, + "step": 2196 + }, + { + "epoch": 0.5601733809280979, + "grad_norm": 0.5126813650131226, + "learning_rate": 8.842949085756389e-06, + "loss": 0.4650019407272339, + "step": 2197 + }, + { + "epoch": 0.5604283528811831, + "grad_norm": 0.4845443069934845, + "learning_rate": 8.834572337188494e-06, + "loss": 0.4632229804992676, + "step": 2198 + }, + { + "epoch": 0.5606833248342682, + "grad_norm": 0.4437451958656311, + "learning_rate": 8.826196417578523e-06, + "loss": 0.46996474266052246, + "step": 2199 + }, + { + "epoch": 0.5609382967873534, + "grad_norm": 0.4660290777683258, + "learning_rate": 8.817821332884194e-06, + "loss": 0.4538179337978363, + "step": 2200 + }, + { + "epoch": 0.5611932687404385, + "grad_norm": 0.48204517364501953, + "learning_rate": 8.809447089062625e-06, + "loss": 0.4647831320762634, + "step": 2201 + }, + { + "epoch": 0.5614482406935237, + "grad_norm": 0.4758976399898529, + "learning_rate": 8.801073692070337e-06, + "loss": 0.45339640974998474, + "step": 2202 + }, + { + "epoch": 0.5617032126466088, + "grad_norm": 0.45573097467422485, + "learning_rate": 8.792701147863249e-06, + "loss": 0.46297213435173035, + "step": 2203 + }, + { + "epoch": 0.561958184599694, + "grad_norm": 0.4524531662464142, + "learning_rate": 8.784329462396678e-06, + "loss": 0.45312342047691345, + "step": 2204 + }, + { + "epoch": 0.5622131565527791, + "grad_norm": 0.47178715467453003, + "learning_rate": 8.775958641625323e-06, + "loss": 0.4536016881465912, + "step": 2205 + }, + { + "epoch": 0.5624681285058644, + "grad_norm": 0.4592845141887665, + "learning_rate": 8.767588691503271e-06, + "loss": 0.46670663356781006, + "step": 2206 + }, + { + "epoch": 0.5627231004589495, + "grad_norm": 0.453191339969635, + "learning_rate": 8.759219617984e-06, + "loss": 0.4639744162559509, + "step": 2207 + }, + { + "epoch": 0.5629780724120347, + "grad_norm": 0.4972878694534302, + "learning_rate": 8.750851427020341e-06, + "loss": 0.46717357635498047, + "step": 2208 + }, + { + "epoch": 0.5632330443651198, + "grad_norm": 0.4795783758163452, + "learning_rate": 8.742484124564514e-06, + "loss": 0.45974206924438477, + "step": 2209 + }, + { + "epoch": 0.563488016318205, + "grad_norm": 0.43076789379119873, + "learning_rate": 8.73411771656811e-06, + "loss": 0.446453332901001, + "step": 2210 + }, + { + "epoch": 0.5637429882712902, + "grad_norm": 0.4527166783809662, + "learning_rate": 8.725752208982074e-06, + "loss": 0.47003960609436035, + "step": 2211 + }, + { + "epoch": 0.5639979602243753, + "grad_norm": 0.4806315004825592, + "learning_rate": 8.717387607756714e-06, + "loss": 0.46837812662124634, + "step": 2212 + }, + { + "epoch": 0.5642529321774605, + "grad_norm": 0.44723406434059143, + "learning_rate": 8.709023918841695e-06, + "loss": 0.4666522443294525, + "step": 2213 + }, + { + "epoch": 0.5645079041305456, + "grad_norm": 0.46346336603164673, + "learning_rate": 8.70066114818603e-06, + "loss": 0.44260507822036743, + "step": 2214 + }, + { + "epoch": 0.5647628760836308, + "grad_norm": 0.46325960755348206, + "learning_rate": 8.692299301738081e-06, + "loss": 0.46868035197257996, + "step": 2215 + }, + { + "epoch": 0.565017848036716, + "grad_norm": 0.4721716046333313, + "learning_rate": 8.683938385445552e-06, + "loss": 0.4612208604812622, + "step": 2216 + }, + { + "epoch": 0.5652728199898012, + "grad_norm": 0.4544331133365631, + "learning_rate": 8.675578405255485e-06, + "loss": 0.47177284955978394, + "step": 2217 + }, + { + "epoch": 0.5655277919428863, + "grad_norm": 0.4448011815547943, + "learning_rate": 8.667219367114257e-06, + "loss": 0.45070117712020874, + "step": 2218 + }, + { + "epoch": 0.5657827638959715, + "grad_norm": 0.43457093834877014, + "learning_rate": 8.658861276967574e-06, + "loss": 0.4596124291419983, + "step": 2219 + }, + { + "epoch": 0.5660377358490566, + "grad_norm": 0.5355086326599121, + "learning_rate": 8.650504140760474e-06, + "loss": 0.4691835343837738, + "step": 2220 + }, + { + "epoch": 0.5662927078021418, + "grad_norm": 0.4600619971752167, + "learning_rate": 8.642147964437303e-06, + "loss": 0.45449429750442505, + "step": 2221 + }, + { + "epoch": 0.5665476797552269, + "grad_norm": 0.48802223801612854, + "learning_rate": 8.633792753941733e-06, + "loss": 0.46611183881759644, + "step": 2222 + }, + { + "epoch": 0.5668026517083121, + "grad_norm": 0.4627108573913574, + "learning_rate": 8.625438515216751e-06, + "loss": 0.47132524847984314, + "step": 2223 + }, + { + "epoch": 0.5670576236613972, + "grad_norm": 0.4547104835510254, + "learning_rate": 8.617085254204648e-06, + "loss": 0.4444102644920349, + "step": 2224 + }, + { + "epoch": 0.5673125956144824, + "grad_norm": 0.4633663594722748, + "learning_rate": 8.608732976847025e-06, + "loss": 0.4674640893936157, + "step": 2225 + }, + { + "epoch": 0.5675675675675675, + "grad_norm": 0.44347018003463745, + "learning_rate": 8.600381689084775e-06, + "loss": 0.46025699377059937, + "step": 2226 + }, + { + "epoch": 0.5678225395206528, + "grad_norm": 0.4436300992965698, + "learning_rate": 8.592031396858094e-06, + "loss": 0.45944809913635254, + "step": 2227 + }, + { + "epoch": 0.5680775114737379, + "grad_norm": 0.45730459690093994, + "learning_rate": 8.583682106106467e-06, + "loss": 0.4482569098472595, + "step": 2228 + }, + { + "epoch": 0.5683324834268231, + "grad_norm": 0.4409254491329193, + "learning_rate": 8.575333822768666e-06, + "loss": 0.47202134132385254, + "step": 2229 + }, + { + "epoch": 0.5685874553799082, + "grad_norm": 0.46967801451683044, + "learning_rate": 8.566986552782747e-06, + "loss": 0.4568798542022705, + "step": 2230 + }, + { + "epoch": 0.5688424273329934, + "grad_norm": 0.4574753940105438, + "learning_rate": 8.558640302086053e-06, + "loss": 0.45431026816368103, + "step": 2231 + }, + { + "epoch": 0.5690973992860785, + "grad_norm": 0.45594164729118347, + "learning_rate": 8.550295076615189e-06, + "loss": 0.4549804627895355, + "step": 2232 + }, + { + "epoch": 0.5693523712391637, + "grad_norm": 0.4559899866580963, + "learning_rate": 8.541950882306039e-06, + "loss": 0.4558350443840027, + "step": 2233 + }, + { + "epoch": 0.5696073431922488, + "grad_norm": 0.46322691440582275, + "learning_rate": 8.533607725093749e-06, + "loss": 0.45330411195755005, + "step": 2234 + }, + { + "epoch": 0.569862315145334, + "grad_norm": 0.519966185092926, + "learning_rate": 8.52526561091273e-06, + "loss": 0.47055038809776306, + "step": 2235 + }, + { + "epoch": 0.5701172870984191, + "grad_norm": 0.4469245970249176, + "learning_rate": 8.516924545696653e-06, + "loss": 0.4623914361000061, + "step": 2236 + }, + { + "epoch": 0.5703722590515043, + "grad_norm": 0.48718273639678955, + "learning_rate": 8.50858453537844e-06, + "loss": 0.4567593038082123, + "step": 2237 + }, + { + "epoch": 0.5706272310045895, + "grad_norm": 0.4596962034702301, + "learning_rate": 8.50024558589026e-06, + "loss": 0.4584554433822632, + "step": 2238 + }, + { + "epoch": 0.5708822029576747, + "grad_norm": 0.4492977261543274, + "learning_rate": 8.491907703163537e-06, + "loss": 0.45537814497947693, + "step": 2239 + }, + { + "epoch": 0.5711371749107598, + "grad_norm": 0.45445308089256287, + "learning_rate": 8.483570893128923e-06, + "loss": 0.4571847915649414, + "step": 2240 + }, + { + "epoch": 0.571392146863845, + "grad_norm": 0.4287809133529663, + "learning_rate": 8.475235161716317e-06, + "loss": 0.4541972577571869, + "step": 2241 + }, + { + "epoch": 0.5716471188169301, + "grad_norm": 0.5193936824798584, + "learning_rate": 8.466900514854847e-06, + "loss": 0.44935959577560425, + "step": 2242 + }, + { + "epoch": 0.5719020907700153, + "grad_norm": 0.4742075204849243, + "learning_rate": 8.458566958472872e-06, + "loss": 0.45262664556503296, + "step": 2243 + }, + { + "epoch": 0.5721570627231004, + "grad_norm": 0.42204076051712036, + "learning_rate": 8.450234498497972e-06, + "loss": 0.46552449464797974, + "step": 2244 + }, + { + "epoch": 0.5724120346761856, + "grad_norm": 0.44148775935173035, + "learning_rate": 8.441903140856954e-06, + "loss": 0.4528549611568451, + "step": 2245 + }, + { + "epoch": 0.5726670066292707, + "grad_norm": 0.4461648166179657, + "learning_rate": 8.433572891475826e-06, + "loss": 0.45218607783317566, + "step": 2246 + }, + { + "epoch": 0.5729219785823559, + "grad_norm": 0.4426140785217285, + "learning_rate": 8.425243756279824e-06, + "loss": 0.46453770995140076, + "step": 2247 + }, + { + "epoch": 0.5731769505354412, + "grad_norm": 0.43527477979660034, + "learning_rate": 8.416915741193384e-06, + "loss": 0.46211183071136475, + "step": 2248 + }, + { + "epoch": 0.5734319224885263, + "grad_norm": 0.4312806725502014, + "learning_rate": 8.408588852140144e-06, + "loss": 0.47130507230758667, + "step": 2249 + }, + { + "epoch": 0.5736868944416115, + "grad_norm": 0.43725165724754333, + "learning_rate": 8.400263095042945e-06, + "loss": 0.4598652720451355, + "step": 2250 + }, + { + "epoch": 0.5739418663946966, + "grad_norm": 0.45928505063056946, + "learning_rate": 8.39193847582382e-06, + "loss": 0.444231241941452, + "step": 2251 + }, + { + "epoch": 0.5741968383477818, + "grad_norm": 0.4414902627468109, + "learning_rate": 8.383615000403999e-06, + "loss": 0.4602000117301941, + "step": 2252 + }, + { + "epoch": 0.5744518103008669, + "grad_norm": 0.4411165118217468, + "learning_rate": 8.375292674703885e-06, + "loss": 0.4520033299922943, + "step": 2253 + }, + { + "epoch": 0.5747067822539521, + "grad_norm": 0.4370625913143158, + "learning_rate": 8.366971504643074e-06, + "loss": 0.45016974210739136, + "step": 2254 + }, + { + "epoch": 0.5749617542070372, + "grad_norm": 0.4558812379837036, + "learning_rate": 8.35865149614034e-06, + "loss": 0.4486446976661682, + "step": 2255 + }, + { + "epoch": 0.5752167261601224, + "grad_norm": 0.4235498607158661, + "learning_rate": 8.350332655113626e-06, + "loss": 0.45743364095687866, + "step": 2256 + }, + { + "epoch": 0.5754716981132075, + "grad_norm": 0.4559319019317627, + "learning_rate": 8.342014987480047e-06, + "loss": 0.45270293951034546, + "step": 2257 + }, + { + "epoch": 0.5757266700662927, + "grad_norm": 0.44990503787994385, + "learning_rate": 8.333698499155886e-06, + "loss": 0.45790648460388184, + "step": 2258 + }, + { + "epoch": 0.5759816420193778, + "grad_norm": 0.4234278202056885, + "learning_rate": 8.32538319605658e-06, + "loss": 0.4859004616737366, + "step": 2259 + }, + { + "epoch": 0.5762366139724631, + "grad_norm": 0.44142118096351624, + "learning_rate": 8.31706908409673e-06, + "loss": 0.46571290493011475, + "step": 2260 + }, + { + "epoch": 0.5764915859255482, + "grad_norm": 0.4531867504119873, + "learning_rate": 8.308756169190083e-06, + "loss": 0.45635876059532166, + "step": 2261 + }, + { + "epoch": 0.5767465578786334, + "grad_norm": 0.4511576294898987, + "learning_rate": 8.300444457249544e-06, + "loss": 0.4581971764564514, + "step": 2262 + }, + { + "epoch": 0.5770015298317185, + "grad_norm": 0.43757081031799316, + "learning_rate": 8.292133954187148e-06, + "loss": 0.46797725558280945, + "step": 2263 + }, + { + "epoch": 0.5772565017848037, + "grad_norm": 0.47690480947494507, + "learning_rate": 8.283824665914097e-06, + "loss": 0.4525216519832611, + "step": 2264 + }, + { + "epoch": 0.5775114737378888, + "grad_norm": 0.44798848032951355, + "learning_rate": 8.275516598340692e-06, + "loss": 0.4591936767101288, + "step": 2265 + }, + { + "epoch": 0.577766445690974, + "grad_norm": 0.45524078607559204, + "learning_rate": 8.267209757376391e-06, + "loss": 0.46707552671432495, + "step": 2266 + }, + { + "epoch": 0.5780214176440591, + "grad_norm": 0.9517451524734497, + "learning_rate": 8.258904148929775e-06, + "loss": 0.4510538578033447, + "step": 2267 + }, + { + "epoch": 0.5782763895971443, + "grad_norm": 0.4590145945549011, + "learning_rate": 8.250599778908546e-06, + "loss": 0.46022161841392517, + "step": 2268 + }, + { + "epoch": 0.5785313615502294, + "grad_norm": 0.49070027470588684, + "learning_rate": 8.242296653219527e-06, + "loss": 0.45525944232940674, + "step": 2269 + }, + { + "epoch": 0.5787863335033147, + "grad_norm": 0.44155219197273254, + "learning_rate": 8.23399477776865e-06, + "loss": 0.45879992842674255, + "step": 2270 + }, + { + "epoch": 0.5790413054563998, + "grad_norm": 0.4478846788406372, + "learning_rate": 8.225694158460969e-06, + "loss": 0.4742863178253174, + "step": 2271 + }, + { + "epoch": 0.579296277409485, + "grad_norm": 0.4330812394618988, + "learning_rate": 8.217394801200632e-06, + "loss": 0.4589769244194031, + "step": 2272 + }, + { + "epoch": 0.5795512493625701, + "grad_norm": 0.44530272483825684, + "learning_rate": 8.209096711890897e-06, + "loss": 0.46048784255981445, + "step": 2273 + }, + { + "epoch": 0.5798062213156553, + "grad_norm": 0.44956183433532715, + "learning_rate": 8.200799896434116e-06, + "loss": 0.47708839178085327, + "step": 2274 + }, + { + "epoch": 0.5800611932687404, + "grad_norm": 0.43646925687789917, + "learning_rate": 8.192504360731741e-06, + "loss": 0.4574781358242035, + "step": 2275 + }, + { + "epoch": 0.5803161652218256, + "grad_norm": 0.4399811625480652, + "learning_rate": 8.18421011068431e-06, + "loss": 0.4625152349472046, + "step": 2276 + }, + { + "epoch": 0.5805711371749107, + "grad_norm": 0.4326459765434265, + "learning_rate": 8.175917152191447e-06, + "loss": 0.4627325236797333, + "step": 2277 + }, + { + "epoch": 0.5808261091279959, + "grad_norm": 0.4582973122596741, + "learning_rate": 8.167625491151849e-06, + "loss": 0.45126891136169434, + "step": 2278 + }, + { + "epoch": 0.581081081081081, + "grad_norm": 0.4950394928455353, + "learning_rate": 8.159335133463306e-06, + "loss": 0.45725926756858826, + "step": 2279 + }, + { + "epoch": 0.5813360530341662, + "grad_norm": 0.43344494700431824, + "learning_rate": 8.151046085022668e-06, + "loss": 0.45229387283325195, + "step": 2280 + }, + { + "epoch": 0.5815910249872513, + "grad_norm": 0.4514404833316803, + "learning_rate": 8.142758351725862e-06, + "loss": 0.4698212146759033, + "step": 2281 + }, + { + "epoch": 0.5818459969403366, + "grad_norm": 0.46291786432266235, + "learning_rate": 8.134471939467874e-06, + "loss": 0.4666634500026703, + "step": 2282 + }, + { + "epoch": 0.5821009688934218, + "grad_norm": 0.4638640582561493, + "learning_rate": 8.126186854142752e-06, + "loss": 0.4487994611263275, + "step": 2283 + }, + { + "epoch": 0.5823559408465069, + "grad_norm": 0.44678041338920593, + "learning_rate": 8.117903101643609e-06, + "loss": 0.4675861597061157, + "step": 2284 + }, + { + "epoch": 0.5826109127995921, + "grad_norm": 0.45098763704299927, + "learning_rate": 8.109620687862587e-06, + "loss": 0.46308618783950806, + "step": 2285 + }, + { + "epoch": 0.5828658847526772, + "grad_norm": 0.43600428104400635, + "learning_rate": 8.1013396186909e-06, + "loss": 0.4676024317741394, + "step": 2286 + }, + { + "epoch": 0.5831208567057624, + "grad_norm": 0.5147649645805359, + "learning_rate": 8.093059900018793e-06, + "loss": 0.4530147314071655, + "step": 2287 + }, + { + "epoch": 0.5833758286588475, + "grad_norm": 0.4465515613555908, + "learning_rate": 8.084781537735554e-06, + "loss": 0.43942782282829285, + "step": 2288 + }, + { + "epoch": 0.5836308006119327, + "grad_norm": 0.478004515171051, + "learning_rate": 8.076504537729506e-06, + "loss": 0.45939934253692627, + "step": 2289 + }, + { + "epoch": 0.5838857725650178, + "grad_norm": 0.4839225113391876, + "learning_rate": 8.068228905888005e-06, + "loss": 0.4545905292034149, + "step": 2290 + }, + { + "epoch": 0.584140744518103, + "grad_norm": 0.45244279503822327, + "learning_rate": 8.059954648097427e-06, + "loss": 0.4498525857925415, + "step": 2291 + }, + { + "epoch": 0.5843957164711882, + "grad_norm": 0.46127891540527344, + "learning_rate": 8.051681770243176e-06, + "loss": 0.4581502676010132, + "step": 2292 + }, + { + "epoch": 0.5846506884242734, + "grad_norm": 0.467270165681839, + "learning_rate": 8.043410278209673e-06, + "loss": 0.470907062292099, + "step": 2293 + }, + { + "epoch": 0.5849056603773585, + "grad_norm": 0.442678302526474, + "learning_rate": 8.035140177880358e-06, + "loss": 0.4593364894390106, + "step": 2294 + }, + { + "epoch": 0.5851606323304437, + "grad_norm": 0.4396481513977051, + "learning_rate": 8.02687147513767e-06, + "loss": 0.45607924461364746, + "step": 2295 + }, + { + "epoch": 0.5854156042835288, + "grad_norm": 0.4411541819572449, + "learning_rate": 8.018604175863072e-06, + "loss": 0.45380541682243347, + "step": 2296 + }, + { + "epoch": 0.585670576236614, + "grad_norm": 0.4444219470024109, + "learning_rate": 8.010338285937006e-06, + "loss": 0.4425385892391205, + "step": 2297 + }, + { + "epoch": 0.5859255481896991, + "grad_norm": 0.47013190388679504, + "learning_rate": 8.002073811238926e-06, + "loss": 0.46726760268211365, + "step": 2298 + }, + { + "epoch": 0.5861805201427843, + "grad_norm": 0.42536959052085876, + "learning_rate": 7.99381075764728e-06, + "loss": 0.4520610272884369, + "step": 2299 + }, + { + "epoch": 0.5864354920958694, + "grad_norm": 0.45570358633995056, + "learning_rate": 7.9855491310395e-06, + "loss": 0.4546683728694916, + "step": 2300 + }, + { + "epoch": 0.5866904640489546, + "grad_norm": 0.44183358550071716, + "learning_rate": 7.977288937292003e-06, + "loss": 0.4454396665096283, + "step": 2301 + }, + { + "epoch": 0.5869454360020397, + "grad_norm": 0.46111831068992615, + "learning_rate": 7.969030182280192e-06, + "loss": 0.4599505364894867, + "step": 2302 + }, + { + "epoch": 0.587200407955125, + "grad_norm": 0.4493691027164459, + "learning_rate": 7.960772871878441e-06, + "loss": 0.444584459066391, + "step": 2303 + }, + { + "epoch": 0.5874553799082101, + "grad_norm": 0.4369162917137146, + "learning_rate": 7.952517011960099e-06, + "loss": 0.4446493089199066, + "step": 2304 + }, + { + "epoch": 0.5877103518612953, + "grad_norm": 0.4527932405471802, + "learning_rate": 7.94426260839748e-06, + "loss": 0.4618287980556488, + "step": 2305 + }, + { + "epoch": 0.5879653238143804, + "grad_norm": 0.47581008076667786, + "learning_rate": 7.936009667061868e-06, + "loss": 0.4562625288963318, + "step": 2306 + }, + { + "epoch": 0.5882202957674656, + "grad_norm": 0.44282785058021545, + "learning_rate": 7.927758193823502e-06, + "loss": 0.46942514181137085, + "step": 2307 + }, + { + "epoch": 0.5884752677205507, + "grad_norm": 0.43578222393989563, + "learning_rate": 7.91950819455158e-06, + "loss": 0.44949501752853394, + "step": 2308 + }, + { + "epoch": 0.5887302396736359, + "grad_norm": 0.4501938819885254, + "learning_rate": 7.911259675114257e-06, + "loss": 0.4654654264450073, + "step": 2309 + }, + { + "epoch": 0.588985211626721, + "grad_norm": 0.4405452609062195, + "learning_rate": 7.903012641378613e-06, + "loss": 0.4545036256313324, + "step": 2310 + }, + { + "epoch": 0.5892401835798062, + "grad_norm": 0.5059425234794617, + "learning_rate": 7.8947670992107e-06, + "loss": 0.46662160754203796, + "step": 2311 + }, + { + "epoch": 0.5894951555328913, + "grad_norm": 0.4511828124523163, + "learning_rate": 7.88652305447549e-06, + "loss": 0.4749602675437927, + "step": 2312 + }, + { + "epoch": 0.5897501274859765, + "grad_norm": 0.4501926600933075, + "learning_rate": 7.878280513036898e-06, + "loss": 0.44702601432800293, + "step": 2313 + }, + { + "epoch": 0.5900050994390617, + "grad_norm": 0.4389598071575165, + "learning_rate": 7.870039480757767e-06, + "loss": 0.4614280164241791, + "step": 2314 + }, + { + "epoch": 0.5902600713921469, + "grad_norm": 0.44306790828704834, + "learning_rate": 7.861799963499866e-06, + "loss": 0.45800089836120605, + "step": 2315 + }, + { + "epoch": 0.590515043345232, + "grad_norm": 0.4772992432117462, + "learning_rate": 7.853561967123892e-06, + "loss": 0.47122466564178467, + "step": 2316 + }, + { + "epoch": 0.5907700152983172, + "grad_norm": 0.46096673607826233, + "learning_rate": 7.84532549748945e-06, + "loss": 0.45394206047058105, + "step": 2317 + }, + { + "epoch": 0.5910249872514024, + "grad_norm": 0.4446107745170593, + "learning_rate": 7.837090560455065e-06, + "loss": 0.4603210389614105, + "step": 2318 + }, + { + "epoch": 0.5912799592044875, + "grad_norm": 0.5345133543014526, + "learning_rate": 7.828857161878172e-06, + "loss": 0.4523944854736328, + "step": 2319 + }, + { + "epoch": 0.5915349311575727, + "grad_norm": 0.4459843635559082, + "learning_rate": 7.820625307615117e-06, + "loss": 0.4598017632961273, + "step": 2320 + }, + { + "epoch": 0.5917899031106578, + "grad_norm": 0.4400486350059509, + "learning_rate": 7.812395003521136e-06, + "loss": 0.4436286389827728, + "step": 2321 + }, + { + "epoch": 0.592044875063743, + "grad_norm": 0.47732704877853394, + "learning_rate": 7.804166255450372e-06, + "loss": 0.44466397166252136, + "step": 2322 + }, + { + "epoch": 0.5922998470168281, + "grad_norm": 0.4303358793258667, + "learning_rate": 7.795939069255853e-06, + "loss": 0.46469664573669434, + "step": 2323 + }, + { + "epoch": 0.5925548189699134, + "grad_norm": 0.4471558928489685, + "learning_rate": 7.787713450789505e-06, + "loss": 0.46260079741477966, + "step": 2324 + }, + { + "epoch": 0.5928097909229985, + "grad_norm": 0.43670353293418884, + "learning_rate": 7.77948940590213e-06, + "loss": 0.44456517696380615, + "step": 2325 + }, + { + "epoch": 0.5930647628760837, + "grad_norm": 0.4920576512813568, + "learning_rate": 7.771266940443419e-06, + "loss": 0.45686396956443787, + "step": 2326 + }, + { + "epoch": 0.5933197348291688, + "grad_norm": 0.5350610017776489, + "learning_rate": 7.763046060261931e-06, + "loss": 0.46714290976524353, + "step": 2327 + }, + { + "epoch": 0.593574706782254, + "grad_norm": 0.428666889667511, + "learning_rate": 7.754826771205111e-06, + "loss": 0.454071044921875, + "step": 2328 + }, + { + "epoch": 0.5938296787353391, + "grad_norm": 0.617946982383728, + "learning_rate": 7.746609079119255e-06, + "loss": 0.4507453143596649, + "step": 2329 + }, + { + "epoch": 0.5940846506884243, + "grad_norm": 0.46272704005241394, + "learning_rate": 7.738392989849531e-06, + "loss": 0.4464974105358124, + "step": 2330 + }, + { + "epoch": 0.5943396226415094, + "grad_norm": 0.4412076771259308, + "learning_rate": 7.73017850923997e-06, + "loss": 0.4614940285682678, + "step": 2331 + }, + { + "epoch": 0.5945945945945946, + "grad_norm": 0.4551214575767517, + "learning_rate": 7.721965643133458e-06, + "loss": 0.45627376437187195, + "step": 2332 + }, + { + "epoch": 0.5948495665476797, + "grad_norm": 0.45512428879737854, + "learning_rate": 7.713754397371729e-06, + "loss": 0.463383287191391, + "step": 2333 + }, + { + "epoch": 0.595104538500765, + "grad_norm": 0.47260189056396484, + "learning_rate": 7.705544777795368e-06, + "loss": 0.45705240964889526, + "step": 2334 + }, + { + "epoch": 0.59535951045385, + "grad_norm": 0.4583861231803894, + "learning_rate": 7.6973367902438e-06, + "loss": 0.47041448950767517, + "step": 2335 + }, + { + "epoch": 0.5956144824069353, + "grad_norm": 0.42344579100608826, + "learning_rate": 7.68913044055529e-06, + "loss": 0.4356708526611328, + "step": 2336 + }, + { + "epoch": 0.5958694543600204, + "grad_norm": 0.47228240966796875, + "learning_rate": 7.680925734566938e-06, + "loss": 0.46566927433013916, + "step": 2337 + }, + { + "epoch": 0.5961244263131056, + "grad_norm": 0.44690245389938354, + "learning_rate": 7.672722678114676e-06, + "loss": 0.46312588453292847, + "step": 2338 + }, + { + "epoch": 0.5963793982661907, + "grad_norm": 0.45382794737815857, + "learning_rate": 7.664521277033264e-06, + "loss": 0.47106119990348816, + "step": 2339 + }, + { + "epoch": 0.5966343702192759, + "grad_norm": 0.47584131360054016, + "learning_rate": 7.656321537156281e-06, + "loss": 0.4431549906730652, + "step": 2340 + }, + { + "epoch": 0.596889342172361, + "grad_norm": 0.4524940252304077, + "learning_rate": 7.648123464316133e-06, + "loss": 0.4706418216228485, + "step": 2341 + }, + { + "epoch": 0.5971443141254462, + "grad_norm": 0.4546431303024292, + "learning_rate": 7.639927064344023e-06, + "loss": 0.4647432565689087, + "step": 2342 + }, + { + "epoch": 0.5973992860785313, + "grad_norm": 0.47987425327301025, + "learning_rate": 7.631732343069976e-06, + "loss": 0.4546689987182617, + "step": 2343 + }, + { + "epoch": 0.5976542580316165, + "grad_norm": 0.45354241132736206, + "learning_rate": 7.623539306322827e-06, + "loss": 0.4678219258785248, + "step": 2344 + }, + { + "epoch": 0.5979092299847016, + "grad_norm": 0.4847342073917389, + "learning_rate": 7.615347959930206e-06, + "loss": 0.4393325746059418, + "step": 2345 + }, + { + "epoch": 0.5981642019377869, + "grad_norm": 0.4791020452976227, + "learning_rate": 7.607158309718539e-06, + "loss": 0.4542630612850189, + "step": 2346 + }, + { + "epoch": 0.598419173890872, + "grad_norm": 0.458964079618454, + "learning_rate": 7.598970361513052e-06, + "loss": 0.4545365273952484, + "step": 2347 + }, + { + "epoch": 0.5986741458439572, + "grad_norm": 0.4871685206890106, + "learning_rate": 7.590784121137755e-06, + "loss": 0.4562395513057709, + "step": 2348 + }, + { + "epoch": 0.5989291177970423, + "grad_norm": 0.46031686663627625, + "learning_rate": 7.582599594415445e-06, + "loss": 0.44557708501815796, + "step": 2349 + }, + { + "epoch": 0.5991840897501275, + "grad_norm": 0.4532771706581116, + "learning_rate": 7.5744167871677e-06, + "loss": 0.4576115608215332, + "step": 2350 + }, + { + "epoch": 0.5994390617032126, + "grad_norm": 0.43669387698173523, + "learning_rate": 7.5662357052148735e-06, + "loss": 0.46957695484161377, + "step": 2351 + }, + { + "epoch": 0.5996940336562978, + "grad_norm": 0.4384315311908722, + "learning_rate": 7.558056354376097e-06, + "loss": 0.4530205726623535, + "step": 2352 + }, + { + "epoch": 0.5999490056093829, + "grad_norm": 0.49745088815689087, + "learning_rate": 7.549878740469267e-06, + "loss": 0.45755571126937866, + "step": 2353 + }, + { + "epoch": 0.6002039775624681, + "grad_norm": 0.4505481719970703, + "learning_rate": 7.541702869311047e-06, + "loss": 0.4640122354030609, + "step": 2354 + }, + { + "epoch": 0.6004589495155533, + "grad_norm": 0.5628259778022766, + "learning_rate": 7.53352874671685e-06, + "loss": 0.4595435857772827, + "step": 2355 + }, + { + "epoch": 0.6007139214686384, + "grad_norm": 0.47558727860450745, + "learning_rate": 7.52535637850086e-06, + "loss": 0.46309083700180054, + "step": 2356 + }, + { + "epoch": 0.6009688934217237, + "grad_norm": 0.44385313987731934, + "learning_rate": 7.517185770476005e-06, + "loss": 0.4619719386100769, + "step": 2357 + }, + { + "epoch": 0.6012238653748088, + "grad_norm": 0.4565849304199219, + "learning_rate": 7.509016928453966e-06, + "loss": 0.46770864725112915, + "step": 2358 + }, + { + "epoch": 0.601478837327894, + "grad_norm": 0.4452267587184906, + "learning_rate": 7.5008498582451615e-06, + "loss": 0.4678848385810852, + "step": 2359 + }, + { + "epoch": 0.6017338092809791, + "grad_norm": 0.4667193293571472, + "learning_rate": 7.492684565658756e-06, + "loss": 0.4526401460170746, + "step": 2360 + }, + { + "epoch": 0.6019887812340643, + "grad_norm": 0.46906158328056335, + "learning_rate": 7.484521056502644e-06, + "loss": 0.4547154903411865, + "step": 2361 + }, + { + "epoch": 0.6022437531871494, + "grad_norm": 0.4575459361076355, + "learning_rate": 7.476359336583454e-06, + "loss": 0.455719530582428, + "step": 2362 + }, + { + "epoch": 0.6024987251402346, + "grad_norm": 0.45700007677078247, + "learning_rate": 7.46819941170654e-06, + "loss": 0.4421968460083008, + "step": 2363 + }, + { + "epoch": 0.6027536970933197, + "grad_norm": 0.4708902835845947, + "learning_rate": 7.4600412876759874e-06, + "loss": 0.45173487067222595, + "step": 2364 + }, + { + "epoch": 0.6030086690464049, + "grad_norm": 0.4684321880340576, + "learning_rate": 7.451884970294591e-06, + "loss": 0.4693717360496521, + "step": 2365 + }, + { + "epoch": 0.60326364099949, + "grad_norm": 0.4727350175380707, + "learning_rate": 7.443730465363865e-06, + "loss": 0.4616132378578186, + "step": 2366 + }, + { + "epoch": 0.6035186129525753, + "grad_norm": 0.43844127655029297, + "learning_rate": 7.435577778684033e-06, + "loss": 0.4775257110595703, + "step": 2367 + }, + { + "epoch": 0.6037735849056604, + "grad_norm": 0.4586241543292999, + "learning_rate": 7.427426916054025e-06, + "loss": 0.4586012363433838, + "step": 2368 + }, + { + "epoch": 0.6040285568587456, + "grad_norm": 0.4408634305000305, + "learning_rate": 7.419277883271473e-06, + "loss": 0.4525206983089447, + "step": 2369 + }, + { + "epoch": 0.6042835288118307, + "grad_norm": 0.4376678168773651, + "learning_rate": 7.41113068613271e-06, + "loss": 0.4567801356315613, + "step": 2370 + }, + { + "epoch": 0.6045385007649159, + "grad_norm": 0.4777446389198303, + "learning_rate": 7.4029853304327636e-06, + "loss": 0.4473497271537781, + "step": 2371 + }, + { + "epoch": 0.604793472718001, + "grad_norm": 0.7311729192733765, + "learning_rate": 7.394841821965345e-06, + "loss": 0.4430292546749115, + "step": 2372 + }, + { + "epoch": 0.6050484446710862, + "grad_norm": 0.4651448726654053, + "learning_rate": 7.3867001665228666e-06, + "loss": 0.45642799139022827, + "step": 2373 + }, + { + "epoch": 0.6053034166241713, + "grad_norm": 0.49951180815696716, + "learning_rate": 7.378560369896403e-06, + "loss": 0.4494733214378357, + "step": 2374 + }, + { + "epoch": 0.6055583885772565, + "grad_norm": 0.431007444858551, + "learning_rate": 7.370422437875716e-06, + "loss": 0.4578118324279785, + "step": 2375 + }, + { + "epoch": 0.6058133605303416, + "grad_norm": 0.44983765482902527, + "learning_rate": 7.362286376249248e-06, + "loss": 0.4628801941871643, + "step": 2376 + }, + { + "epoch": 0.6060683324834268, + "grad_norm": 0.44235482811927795, + "learning_rate": 7.3541521908041005e-06, + "loss": 0.453955739736557, + "step": 2377 + }, + { + "epoch": 0.606323304436512, + "grad_norm": 0.4479529857635498, + "learning_rate": 7.346019887326046e-06, + "loss": 0.4570160508155823, + "step": 2378 + }, + { + "epoch": 0.6065782763895972, + "grad_norm": 0.44753244519233704, + "learning_rate": 7.337889471599517e-06, + "loss": 0.4476889371871948, + "step": 2379 + }, + { + "epoch": 0.6068332483426823, + "grad_norm": 0.4472813904285431, + "learning_rate": 7.3297609494076e-06, + "loss": 0.4456092119216919, + "step": 2380 + }, + { + "epoch": 0.6070882202957675, + "grad_norm": 0.43575942516326904, + "learning_rate": 7.3216343265320414e-06, + "loss": 0.45226651430130005, + "step": 2381 + }, + { + "epoch": 0.6073431922488526, + "grad_norm": 0.4231815040111542, + "learning_rate": 7.313509608753231e-06, + "loss": 0.45405086874961853, + "step": 2382 + }, + { + "epoch": 0.6075981642019378, + "grad_norm": 0.4461728632450104, + "learning_rate": 7.305386801850205e-06, + "loss": 0.45775744318962097, + "step": 2383 + }, + { + "epoch": 0.6078531361550229, + "grad_norm": 0.4281957149505615, + "learning_rate": 7.29726591160064e-06, + "loss": 0.458732932806015, + "step": 2384 + }, + { + "epoch": 0.6081081081081081, + "grad_norm": 0.44112592935562134, + "learning_rate": 7.289146943780853e-06, + "loss": 0.4626714587211609, + "step": 2385 + }, + { + "epoch": 0.6083630800611932, + "grad_norm": 0.4187681972980499, + "learning_rate": 7.281029904165795e-06, + "loss": 0.45753440260887146, + "step": 2386 + }, + { + "epoch": 0.6086180520142784, + "grad_norm": 0.43930917978286743, + "learning_rate": 7.27291479852903e-06, + "loss": 0.4530356824398041, + "step": 2387 + }, + { + "epoch": 0.6088730239673635, + "grad_norm": 0.48235180974006653, + "learning_rate": 7.264801632642764e-06, + "loss": 0.4509512782096863, + "step": 2388 + }, + { + "epoch": 0.6091279959204488, + "grad_norm": 0.4398561418056488, + "learning_rate": 7.256690412277818e-06, + "loss": 0.4341862201690674, + "step": 2389 + }, + { + "epoch": 0.609382967873534, + "grad_norm": 0.429409384727478, + "learning_rate": 7.248581143203626e-06, + "loss": 0.440748393535614, + "step": 2390 + }, + { + "epoch": 0.6096379398266191, + "grad_norm": 0.4427203834056854, + "learning_rate": 7.240473831188237e-06, + "loss": 0.4571680426597595, + "step": 2391 + }, + { + "epoch": 0.6098929117797043, + "grad_norm": 0.46011874079704285, + "learning_rate": 7.232368481998309e-06, + "loss": 0.4616484045982361, + "step": 2392 + }, + { + "epoch": 0.6101478837327894, + "grad_norm": 0.43624424934387207, + "learning_rate": 7.2242651013990965e-06, + "loss": 0.4401955008506775, + "step": 2393 + }, + { + "epoch": 0.6104028556858746, + "grad_norm": 0.44239377975463867, + "learning_rate": 7.216163695154463e-06, + "loss": 0.4554689824581146, + "step": 2394 + }, + { + "epoch": 0.6106578276389597, + "grad_norm": 0.44673725962638855, + "learning_rate": 7.208064269026863e-06, + "loss": 0.43517106771469116, + "step": 2395 + }, + { + "epoch": 0.6109127995920449, + "grad_norm": 0.4484138488769531, + "learning_rate": 7.1999668287773425e-06, + "loss": 0.4602460563182831, + "step": 2396 + }, + { + "epoch": 0.61116777154513, + "grad_norm": 0.44098860025405884, + "learning_rate": 7.191871380165538e-06, + "loss": 0.45212626457214355, + "step": 2397 + }, + { + "epoch": 0.6114227434982152, + "grad_norm": 0.45807570219039917, + "learning_rate": 7.183777928949668e-06, + "loss": 0.44789016246795654, + "step": 2398 + }, + { + "epoch": 0.6116777154513003, + "grad_norm": 0.45253509283065796, + "learning_rate": 7.175686480886529e-06, + "loss": 0.45914798974990845, + "step": 2399 + }, + { + "epoch": 0.6119326874043856, + "grad_norm": 0.4360634982585907, + "learning_rate": 7.167597041731491e-06, + "loss": 0.4590366780757904, + "step": 2400 + }, + { + "epoch": 0.6121876593574707, + "grad_norm": 0.436668336391449, + "learning_rate": 7.1595096172384985e-06, + "loss": 0.4478031098842621, + "step": 2401 + }, + { + "epoch": 0.6124426313105559, + "grad_norm": 0.4420381188392639, + "learning_rate": 7.151424213160061e-06, + "loss": 0.4541397988796234, + "step": 2402 + }, + { + "epoch": 0.612697603263641, + "grad_norm": 0.42137983441352844, + "learning_rate": 7.143340835247252e-06, + "loss": 0.44909077882766724, + "step": 2403 + }, + { + "epoch": 0.6129525752167262, + "grad_norm": 0.44776588678359985, + "learning_rate": 7.135259489249706e-06, + "loss": 0.46198710799217224, + "step": 2404 + }, + { + "epoch": 0.6132075471698113, + "grad_norm": 0.4347139894962311, + "learning_rate": 7.127180180915609e-06, + "loss": 0.4604983329772949, + "step": 2405 + }, + { + "epoch": 0.6134625191228965, + "grad_norm": 0.46223214268684387, + "learning_rate": 7.119102915991693e-06, + "loss": 0.44768351316452026, + "step": 2406 + }, + { + "epoch": 0.6137174910759816, + "grad_norm": 0.4660816192626953, + "learning_rate": 7.111027700223249e-06, + "loss": 0.4439643621444702, + "step": 2407 + }, + { + "epoch": 0.6139724630290668, + "grad_norm": 0.4395486116409302, + "learning_rate": 7.102954539354097e-06, + "loss": 0.4600735306739807, + "step": 2408 + }, + { + "epoch": 0.6142274349821519, + "grad_norm": 0.4583393335342407, + "learning_rate": 7.094883439126607e-06, + "loss": 0.44173431396484375, + "step": 2409 + }, + { + "epoch": 0.6144824069352371, + "grad_norm": 0.43538230657577515, + "learning_rate": 7.086814405281677e-06, + "loss": 0.4389159083366394, + "step": 2410 + }, + { + "epoch": 0.6147373788883223, + "grad_norm": 0.44855111837387085, + "learning_rate": 7.078747443558736e-06, + "loss": 0.4594293534755707, + "step": 2411 + }, + { + "epoch": 0.6149923508414075, + "grad_norm": 0.4412626624107361, + "learning_rate": 7.070682559695737e-06, + "loss": 0.4607057571411133, + "step": 2412 + }, + { + "epoch": 0.6152473227944926, + "grad_norm": 0.4448585510253906, + "learning_rate": 7.062619759429158e-06, + "loss": 0.4644583463668823, + "step": 2413 + }, + { + "epoch": 0.6155022947475778, + "grad_norm": 0.4369284510612488, + "learning_rate": 7.054559048493996e-06, + "loss": 0.450400173664093, + "step": 2414 + }, + { + "epoch": 0.6157572667006629, + "grad_norm": 0.4468177556991577, + "learning_rate": 7.046500432623759e-06, + "loss": 0.4551388919353485, + "step": 2415 + }, + { + "epoch": 0.6160122386537481, + "grad_norm": 0.4235501289367676, + "learning_rate": 7.038443917550464e-06, + "loss": 0.45600685477256775, + "step": 2416 + }, + { + "epoch": 0.6162672106068332, + "grad_norm": 0.4289674460887909, + "learning_rate": 7.0303895090046405e-06, + "loss": 0.459359735250473, + "step": 2417 + }, + { + "epoch": 0.6165221825599184, + "grad_norm": 0.4907114505767822, + "learning_rate": 7.022337212715318e-06, + "loss": 0.4645995795726776, + "step": 2418 + }, + { + "epoch": 0.6167771545130035, + "grad_norm": 0.4563983380794525, + "learning_rate": 7.014287034410009e-06, + "loss": 0.4514213502407074, + "step": 2419 + }, + { + "epoch": 0.6170321264660887, + "grad_norm": 0.44452720880508423, + "learning_rate": 7.006238979814735e-06, + "loss": 0.47157543897628784, + "step": 2420 + }, + { + "epoch": 0.6172870984191738, + "grad_norm": 0.4489119052886963, + "learning_rate": 6.998193054654007e-06, + "loss": 0.4580475687980652, + "step": 2421 + }, + { + "epoch": 0.617542070372259, + "grad_norm": 0.4419918358325958, + "learning_rate": 6.990149264650814e-06, + "loss": 0.43857941031455994, + "step": 2422 + }, + { + "epoch": 0.6177970423253442, + "grad_norm": 0.42903104424476624, + "learning_rate": 6.982107615526631e-06, + "loss": 0.46096014976501465, + "step": 2423 + }, + { + "epoch": 0.6180520142784294, + "grad_norm": 0.4421437978744507, + "learning_rate": 6.97406811300141e-06, + "loss": 0.44633787870407104, + "step": 2424 + }, + { + "epoch": 0.6183069862315146, + "grad_norm": 0.44374901056289673, + "learning_rate": 6.966030762793569e-06, + "loss": 0.46367567777633667, + "step": 2425 + }, + { + "epoch": 0.6185619581845997, + "grad_norm": 0.4307388365268707, + "learning_rate": 6.9579955706200086e-06, + "loss": 0.4521704912185669, + "step": 2426 + }, + { + "epoch": 0.6188169301376849, + "grad_norm": 0.4368211627006531, + "learning_rate": 6.949962542196081e-06, + "loss": 0.4489135146141052, + "step": 2427 + }, + { + "epoch": 0.61907190209077, + "grad_norm": 0.4382425844669342, + "learning_rate": 6.941931683235603e-06, + "loss": 0.45526647567749023, + "step": 2428 + }, + { + "epoch": 0.6193268740438552, + "grad_norm": 0.448438823223114, + "learning_rate": 6.933902999450859e-06, + "loss": 0.45010483264923096, + "step": 2429 + }, + { + "epoch": 0.6195818459969403, + "grad_norm": 0.43156129121780396, + "learning_rate": 6.925876496552572e-06, + "loss": 0.4513978660106659, + "step": 2430 + }, + { + "epoch": 0.6198368179500255, + "grad_norm": 0.4507652223110199, + "learning_rate": 6.917852180249922e-06, + "loss": 0.458511084318161, + "step": 2431 + }, + { + "epoch": 0.6200917899031106, + "grad_norm": 0.43145832419395447, + "learning_rate": 6.909830056250527e-06, + "loss": 0.45626115798950195, + "step": 2432 + }, + { + "epoch": 0.6203467618561959, + "grad_norm": 0.4541116952896118, + "learning_rate": 6.901810130260452e-06, + "loss": 0.45203861594200134, + "step": 2433 + }, + { + "epoch": 0.620601733809281, + "grad_norm": 0.4439984858036041, + "learning_rate": 6.8937924079841964e-06, + "loss": 0.44320011138916016, + "step": 2434 + }, + { + "epoch": 0.6208567057623662, + "grad_norm": 0.4731787443161011, + "learning_rate": 6.885776895124692e-06, + "loss": 0.4608490467071533, + "step": 2435 + }, + { + "epoch": 0.6211116777154513, + "grad_norm": 0.4167715907096863, + "learning_rate": 6.877763597383298e-06, + "loss": 0.46320483088493347, + "step": 2436 + }, + { + "epoch": 0.6213666496685365, + "grad_norm": 0.45172250270843506, + "learning_rate": 6.869752520459803e-06, + "loss": 0.4560050368309021, + "step": 2437 + }, + { + "epoch": 0.6216216216216216, + "grad_norm": 0.4521925747394562, + "learning_rate": 6.861743670052404e-06, + "loss": 0.4454156756401062, + "step": 2438 + }, + { + "epoch": 0.6218765935747068, + "grad_norm": 0.44286608695983887, + "learning_rate": 6.853737051857729e-06, + "loss": 0.4496470093727112, + "step": 2439 + }, + { + "epoch": 0.6221315655277919, + "grad_norm": 0.4602040946483612, + "learning_rate": 6.845732671570806e-06, + "loss": 0.4612855911254883, + "step": 2440 + }, + { + "epoch": 0.6223865374808771, + "grad_norm": 0.45351946353912354, + "learning_rate": 6.837730534885084e-06, + "loss": 0.45776110887527466, + "step": 2441 + }, + { + "epoch": 0.6226415094339622, + "grad_norm": 0.4474753439426422, + "learning_rate": 6.829730647492404e-06, + "loss": 0.45600712299346924, + "step": 2442 + }, + { + "epoch": 0.6228964813870475, + "grad_norm": 0.4500572681427002, + "learning_rate": 6.821733015083016e-06, + "loss": 0.4475374221801758, + "step": 2443 + }, + { + "epoch": 0.6231514533401326, + "grad_norm": 0.43536829948425293, + "learning_rate": 6.813737643345556e-06, + "loss": 0.45218324661254883, + "step": 2444 + }, + { + "epoch": 0.6234064252932178, + "grad_norm": 0.4450026750564575, + "learning_rate": 6.805744537967061e-06, + "loss": 0.4560825824737549, + "step": 2445 + }, + { + "epoch": 0.6236613972463029, + "grad_norm": 0.5039249658584595, + "learning_rate": 6.797753704632953e-06, + "loss": 0.4593825340270996, + "step": 2446 + }, + { + "epoch": 0.6239163691993881, + "grad_norm": 0.4471895098686218, + "learning_rate": 6.789765149027039e-06, + "loss": 0.4518852233886719, + "step": 2447 + }, + { + "epoch": 0.6241713411524732, + "grad_norm": 0.44482189416885376, + "learning_rate": 6.781778876831502e-06, + "loss": 0.4477294087409973, + "step": 2448 + }, + { + "epoch": 0.6244263131055584, + "grad_norm": 0.43780866265296936, + "learning_rate": 6.773794893726904e-06, + "loss": 0.46982261538505554, + "step": 2449 + }, + { + "epoch": 0.6246812850586435, + "grad_norm": 0.46241095662117004, + "learning_rate": 6.7658132053921864e-06, + "loss": 0.4516867399215698, + "step": 2450 + }, + { + "epoch": 0.6249362570117287, + "grad_norm": 0.45196449756622314, + "learning_rate": 6.7578338175046394e-06, + "loss": 0.45734354853630066, + "step": 2451 + }, + { + "epoch": 0.6251912289648138, + "grad_norm": 0.44736403226852417, + "learning_rate": 6.749856735739929e-06, + "loss": 0.45609256625175476, + "step": 2452 + }, + { + "epoch": 0.625446200917899, + "grad_norm": 0.4608820378780365, + "learning_rate": 6.741881965772086e-06, + "loss": 0.45859766006469727, + "step": 2453 + }, + { + "epoch": 0.6257011728709841, + "grad_norm": 0.4258270561695099, + "learning_rate": 6.733909513273487e-06, + "loss": 0.44442683458328247, + "step": 2454 + }, + { + "epoch": 0.6259561448240694, + "grad_norm": 0.4504850208759308, + "learning_rate": 6.725939383914864e-06, + "loss": 0.45771652460098267, + "step": 2455 + }, + { + "epoch": 0.6262111167771545, + "grad_norm": 0.44868308305740356, + "learning_rate": 6.717971583365299e-06, + "loss": 0.4601963460445404, + "step": 2456 + }, + { + "epoch": 0.6264660887302397, + "grad_norm": 0.46132561564445496, + "learning_rate": 6.71000611729221e-06, + "loss": 0.45538172125816345, + "step": 2457 + }, + { + "epoch": 0.6267210606833248, + "grad_norm": 0.4325001835823059, + "learning_rate": 6.702042991361361e-06, + "loss": 0.45820948481559753, + "step": 2458 + }, + { + "epoch": 0.62697603263641, + "grad_norm": 0.4585916996002197, + "learning_rate": 6.694082211236852e-06, + "loss": 0.4583401679992676, + "step": 2459 + }, + { + "epoch": 0.6272310045894951, + "grad_norm": 0.4335605502128601, + "learning_rate": 6.686123782581109e-06, + "loss": 0.44409066438674927, + "step": 2460 + }, + { + "epoch": 0.6274859765425803, + "grad_norm": 0.44804811477661133, + "learning_rate": 6.6781677110548904e-06, + "loss": 0.45734381675720215, + "step": 2461 + }, + { + "epoch": 0.6277409484956655, + "grad_norm": 0.4423656761646271, + "learning_rate": 6.670214002317279e-06, + "loss": 0.44959601759910583, + "step": 2462 + }, + { + "epoch": 0.6279959204487506, + "grad_norm": 0.45429491996765137, + "learning_rate": 6.6622626620256744e-06, + "loss": 0.4591550827026367, + "step": 2463 + }, + { + "epoch": 0.6282508924018358, + "grad_norm": 0.44495442509651184, + "learning_rate": 6.654313695835784e-06, + "loss": 0.4539817273616791, + "step": 2464 + }, + { + "epoch": 0.628505864354921, + "grad_norm": 0.43526822328567505, + "learning_rate": 6.6463671094016414e-06, + "loss": 0.46003812551498413, + "step": 2465 + }, + { + "epoch": 0.6287608363080062, + "grad_norm": 0.45048588514328003, + "learning_rate": 6.638422908375577e-06, + "loss": 0.4498358368873596, + "step": 2466 + }, + { + "epoch": 0.6290158082610913, + "grad_norm": 0.4301253855228424, + "learning_rate": 6.630481098408228e-06, + "loss": 0.46457192301750183, + "step": 2467 + }, + { + "epoch": 0.6292707802141765, + "grad_norm": 0.4593249261379242, + "learning_rate": 6.6225416851485315e-06, + "loss": 0.45766276121139526, + "step": 2468 + }, + { + "epoch": 0.6295257521672616, + "grad_norm": 0.47466710209846497, + "learning_rate": 6.6146046742437184e-06, + "loss": 0.4411028325557709, + "step": 2469 + }, + { + "epoch": 0.6297807241203468, + "grad_norm": 0.46918758749961853, + "learning_rate": 6.6066700713393106e-06, + "loss": 0.449954628944397, + "step": 2470 + }, + { + "epoch": 0.6300356960734319, + "grad_norm": 0.45072171092033386, + "learning_rate": 6.598737882079118e-06, + "loss": 0.4382023811340332, + "step": 2471 + }, + { + "epoch": 0.6302906680265171, + "grad_norm": 0.43589165806770325, + "learning_rate": 6.590808112105232e-06, + "loss": 0.4397895336151123, + "step": 2472 + }, + { + "epoch": 0.6305456399796022, + "grad_norm": 0.44526374340057373, + "learning_rate": 6.5828807670580255e-06, + "loss": 0.4655776023864746, + "step": 2473 + }, + { + "epoch": 0.6308006119326874, + "grad_norm": 0.432576984167099, + "learning_rate": 6.5749558525761485e-06, + "loss": 0.4525606334209442, + "step": 2474 + }, + { + "epoch": 0.6310555838857725, + "grad_norm": 0.44795534014701843, + "learning_rate": 6.567033374296521e-06, + "loss": 0.4457744061946869, + "step": 2475 + }, + { + "epoch": 0.6313105558388578, + "grad_norm": 0.46786946058273315, + "learning_rate": 6.5591133378543174e-06, + "loss": 0.4649132490158081, + "step": 2476 + }, + { + "epoch": 0.6315655277919429, + "grad_norm": 0.43163734674453735, + "learning_rate": 6.551195748882997e-06, + "loss": 0.4570130705833435, + "step": 2477 + }, + { + "epoch": 0.6318204997450281, + "grad_norm": 0.47148701548576355, + "learning_rate": 6.5432806130142644e-06, + "loss": 0.45167526602745056, + "step": 2478 + }, + { + "epoch": 0.6320754716981132, + "grad_norm": 0.44035637378692627, + "learning_rate": 6.535367935878084e-06, + "loss": 0.4530208706855774, + "step": 2479 + }, + { + "epoch": 0.6323304436511984, + "grad_norm": 0.4428682029247284, + "learning_rate": 6.527457723102668e-06, + "loss": 0.44609448313713074, + "step": 2480 + }, + { + "epoch": 0.6325854156042835, + "grad_norm": 0.4493977427482605, + "learning_rate": 6.519549980314477e-06, + "loss": 0.4558190703392029, + "step": 2481 + }, + { + "epoch": 0.6328403875573687, + "grad_norm": 0.4617939889431, + "learning_rate": 6.511644713138221e-06, + "loss": 0.45700210332870483, + "step": 2482 + }, + { + "epoch": 0.6330953595104538, + "grad_norm": 0.42630910873413086, + "learning_rate": 6.503741927196836e-06, + "loss": 0.4472409188747406, + "step": 2483 + }, + { + "epoch": 0.633350331463539, + "grad_norm": 0.4318679869174957, + "learning_rate": 6.495841628111505e-06, + "loss": 0.4415430426597595, + "step": 2484 + }, + { + "epoch": 0.6336053034166241, + "grad_norm": 0.45561352372169495, + "learning_rate": 6.487943821501632e-06, + "loss": 0.46478819847106934, + "step": 2485 + }, + { + "epoch": 0.6338602753697093, + "grad_norm": 0.45272183418273926, + "learning_rate": 6.480048512984863e-06, + "loss": 0.45466798543930054, + "step": 2486 + }, + { + "epoch": 0.6341152473227945, + "grad_norm": 0.4237247407436371, + "learning_rate": 6.472155708177052e-06, + "loss": 0.4517762362957001, + "step": 2487 + }, + { + "epoch": 0.6343702192758797, + "grad_norm": 0.4491550922393799, + "learning_rate": 6.464265412692281e-06, + "loss": 0.4577600955963135, + "step": 2488 + }, + { + "epoch": 0.6346251912289648, + "grad_norm": 0.4343549311161041, + "learning_rate": 6.456377632142842e-06, + "loss": 0.460031121969223, + "step": 2489 + }, + { + "epoch": 0.63488016318205, + "grad_norm": 0.43676286935806274, + "learning_rate": 6.44849237213924e-06, + "loss": 0.4529508352279663, + "step": 2490 + }, + { + "epoch": 0.6351351351351351, + "grad_norm": 0.4376670718193054, + "learning_rate": 6.4406096382901895e-06, + "loss": 0.4492360055446625, + "step": 2491 + }, + { + "epoch": 0.6353901070882203, + "grad_norm": 0.44926509261131287, + "learning_rate": 6.432729436202605e-06, + "loss": 0.45888346433639526, + "step": 2492 + }, + { + "epoch": 0.6356450790413054, + "grad_norm": 0.4386760890483856, + "learning_rate": 6.4248517714816e-06, + "loss": 0.44898495078086853, + "step": 2493 + }, + { + "epoch": 0.6359000509943906, + "grad_norm": 0.4315982758998871, + "learning_rate": 6.4169766497304906e-06, + "loss": 0.4513634443283081, + "step": 2494 + }, + { + "epoch": 0.6361550229474757, + "grad_norm": 0.4623962938785553, + "learning_rate": 6.4091040765507715e-06, + "loss": 0.4590742588043213, + "step": 2495 + }, + { + "epoch": 0.6364099949005609, + "grad_norm": 0.4697008728981018, + "learning_rate": 6.4012340575421316e-06, + "loss": 0.4546396732330322, + "step": 2496 + }, + { + "epoch": 0.6366649668536462, + "grad_norm": 0.4347667098045349, + "learning_rate": 6.3933665983024465e-06, + "loss": 0.45000773668289185, + "step": 2497 + }, + { + "epoch": 0.6369199388067313, + "grad_norm": 0.4588465392589569, + "learning_rate": 6.3855017044277655e-06, + "loss": 0.45052796602249146, + "step": 2498 + }, + { + "epoch": 0.6371749107598165, + "grad_norm": 0.43070754408836365, + "learning_rate": 6.377639381512315e-06, + "loss": 0.45954522490501404, + "step": 2499 + }, + { + "epoch": 0.6374298827129016, + "grad_norm": 0.43200400471687317, + "learning_rate": 6.369779635148492e-06, + "loss": 0.45716235041618347, + "step": 2500 + }, + { + "epoch": 0.6376848546659868, + "grad_norm": 0.44875460863113403, + "learning_rate": 6.3619224709268645e-06, + "loss": 0.45695194602012634, + "step": 2501 + }, + { + "epoch": 0.6379398266190719, + "grad_norm": 0.4918175935745239, + "learning_rate": 6.3540678944361556e-06, + "loss": 0.4504631459712982, + "step": 2502 + }, + { + "epoch": 0.6381947985721571, + "grad_norm": 0.44776660203933716, + "learning_rate": 6.346215911263257e-06, + "loss": 0.4516891837120056, + "step": 2503 + }, + { + "epoch": 0.6384497705252422, + "grad_norm": 0.4455452561378479, + "learning_rate": 6.33836652699321e-06, + "loss": 0.4505302906036377, + "step": 2504 + }, + { + "epoch": 0.6387047424783274, + "grad_norm": 0.4366051256656647, + "learning_rate": 6.3305197472092065e-06, + "loss": 0.4543272852897644, + "step": 2505 + }, + { + "epoch": 0.6389597144314125, + "grad_norm": 0.4307226240634918, + "learning_rate": 6.322675577492594e-06, + "loss": 0.4597504138946533, + "step": 2506 + }, + { + "epoch": 0.6392146863844977, + "grad_norm": 0.45603105425834656, + "learning_rate": 6.314834023422859e-06, + "loss": 0.44111618399620056, + "step": 2507 + }, + { + "epoch": 0.6394696583375828, + "grad_norm": 0.42073628306388855, + "learning_rate": 6.306995090577617e-06, + "loss": 0.4541438817977905, + "step": 2508 + }, + { + "epoch": 0.6397246302906681, + "grad_norm": 0.45000380277633667, + "learning_rate": 6.299158784532635e-06, + "loss": 0.45004022121429443, + "step": 2509 + }, + { + "epoch": 0.6399796022437532, + "grad_norm": 0.42614609003067017, + "learning_rate": 6.291325110861805e-06, + "loss": 0.4429459571838379, + "step": 2510 + }, + { + "epoch": 0.6402345741968384, + "grad_norm": 0.4538845717906952, + "learning_rate": 6.283494075137147e-06, + "loss": 0.46047067642211914, + "step": 2511 + }, + { + "epoch": 0.6404895461499235, + "grad_norm": 0.42154061794281006, + "learning_rate": 6.2756656829288035e-06, + "loss": 0.45054006576538086, + "step": 2512 + }, + { + "epoch": 0.6407445181030087, + "grad_norm": 0.44144386053085327, + "learning_rate": 6.267839939805036e-06, + "loss": 0.45737284421920776, + "step": 2513 + }, + { + "epoch": 0.6409994900560938, + "grad_norm": 0.4459270238876343, + "learning_rate": 6.260016851332227e-06, + "loss": 0.44330281019210815, + "step": 2514 + }, + { + "epoch": 0.641254462009179, + "grad_norm": 0.43176722526550293, + "learning_rate": 6.252196423074864e-06, + "loss": 0.4518471360206604, + "step": 2515 + }, + { + "epoch": 0.6415094339622641, + "grad_norm": 0.4171527922153473, + "learning_rate": 6.244378660595544e-06, + "loss": 0.44375455379486084, + "step": 2516 + }, + { + "epoch": 0.6417644059153493, + "grad_norm": 0.4783954322338104, + "learning_rate": 6.236563569454969e-06, + "loss": 0.45610353350639343, + "step": 2517 + }, + { + "epoch": 0.6420193778684344, + "grad_norm": 0.4581439197063446, + "learning_rate": 6.228751155211946e-06, + "loss": 0.45231878757476807, + "step": 2518 + }, + { + "epoch": 0.6422743498215197, + "grad_norm": 0.4603917598724365, + "learning_rate": 6.22094142342337e-06, + "loss": 0.4513261914253235, + "step": 2519 + }, + { + "epoch": 0.6425293217746048, + "grad_norm": 0.4171881675720215, + "learning_rate": 6.213134379644235e-06, + "loss": 0.45569711923599243, + "step": 2520 + }, + { + "epoch": 0.64278429372769, + "grad_norm": 0.438123881816864, + "learning_rate": 6.2053300294276094e-06, + "loss": 0.44259113073349, + "step": 2521 + }, + { + "epoch": 0.6430392656807751, + "grad_norm": 0.4275990128517151, + "learning_rate": 6.197528378324664e-06, + "loss": 0.4489068388938904, + "step": 2522 + }, + { + "epoch": 0.6432942376338603, + "grad_norm": 0.44643691182136536, + "learning_rate": 6.1897294318846394e-06, + "loss": 0.45738035440444946, + "step": 2523 + }, + { + "epoch": 0.6435492095869454, + "grad_norm": 0.4457843601703644, + "learning_rate": 6.1819331956548525e-06, + "loss": 0.4524329602718353, + "step": 2524 + }, + { + "epoch": 0.6438041815400306, + "grad_norm": 0.44476771354675293, + "learning_rate": 6.174139675180695e-06, + "loss": 0.43885207176208496, + "step": 2525 + }, + { + "epoch": 0.6440591534931157, + "grad_norm": 0.4469187557697296, + "learning_rate": 6.1663488760056256e-06, + "loss": 0.4523865580558777, + "step": 2526 + }, + { + "epoch": 0.6443141254462009, + "grad_norm": 0.4615035951137543, + "learning_rate": 6.158560803671168e-06, + "loss": 0.4440152049064636, + "step": 2527 + }, + { + "epoch": 0.644569097399286, + "grad_norm": 0.43872007727622986, + "learning_rate": 6.150775463716905e-06, + "loss": 0.4510747194290161, + "step": 2528 + }, + { + "epoch": 0.6448240693523712, + "grad_norm": 0.45650118589401245, + "learning_rate": 6.142992861680477e-06, + "loss": 0.4303828477859497, + "step": 2529 + }, + { + "epoch": 0.6450790413054563, + "grad_norm": 0.4566303789615631, + "learning_rate": 6.13521300309758e-06, + "loss": 0.4519597291946411, + "step": 2530 + }, + { + "epoch": 0.6453340132585416, + "grad_norm": 0.47050338983535767, + "learning_rate": 6.127435893501951e-06, + "loss": 0.4555937647819519, + "step": 2531 + }, + { + "epoch": 0.6455889852116268, + "grad_norm": 0.4484924077987671, + "learning_rate": 6.119661538425381e-06, + "loss": 0.44870179891586304, + "step": 2532 + }, + { + "epoch": 0.6458439571647119, + "grad_norm": 0.45548173785209656, + "learning_rate": 6.111889943397695e-06, + "loss": 0.441594660282135, + "step": 2533 + }, + { + "epoch": 0.6460989291177971, + "grad_norm": 0.44344255328178406, + "learning_rate": 6.1041211139467545e-06, + "loss": 0.45279091596603394, + "step": 2534 + }, + { + "epoch": 0.6463539010708822, + "grad_norm": 0.4390721619129181, + "learning_rate": 6.096355055598458e-06, + "loss": 0.45674705505371094, + "step": 2535 + }, + { + "epoch": 0.6466088730239674, + "grad_norm": 0.44609135389328003, + "learning_rate": 6.088591773876731e-06, + "loss": 0.4513790011405945, + "step": 2536 + }, + { + "epoch": 0.6468638449770525, + "grad_norm": 0.44705885648727417, + "learning_rate": 6.080831274303523e-06, + "loss": 0.43511801958084106, + "step": 2537 + }, + { + "epoch": 0.6471188169301377, + "grad_norm": 0.4256707727909088, + "learning_rate": 6.073073562398806e-06, + "loss": 0.4494359493255615, + "step": 2538 + }, + { + "epoch": 0.6473737888832228, + "grad_norm": 0.4406540095806122, + "learning_rate": 6.065318643680573e-06, + "loss": 0.44055265188217163, + "step": 2539 + }, + { + "epoch": 0.647628760836308, + "grad_norm": 0.47909441590309143, + "learning_rate": 6.057566523664819e-06, + "loss": 0.4648555517196655, + "step": 2540 + }, + { + "epoch": 0.6478837327893932, + "grad_norm": 0.44395220279693604, + "learning_rate": 6.049817207865557e-06, + "loss": 0.44198960065841675, + "step": 2541 + }, + { + "epoch": 0.6481387047424784, + "grad_norm": 0.4380699694156647, + "learning_rate": 6.042070701794806e-06, + "loss": 0.44920453429222107, + "step": 2542 + }, + { + "epoch": 0.6483936766955635, + "grad_norm": 0.4582255780696869, + "learning_rate": 6.0343270109625855e-06, + "loss": 0.4538794457912445, + "step": 2543 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 0.43302080035209656, + "learning_rate": 6.026586140876908e-06, + "loss": 0.4537618160247803, + "step": 2544 + }, + { + "epoch": 0.6489036206017338, + "grad_norm": 0.4683970510959625, + "learning_rate": 6.018848097043783e-06, + "loss": 0.45142412185668945, + "step": 2545 + }, + { + "epoch": 0.649158592554819, + "grad_norm": 0.4430343806743622, + "learning_rate": 6.011112884967214e-06, + "loss": 0.45917999744415283, + "step": 2546 + }, + { + "epoch": 0.6494135645079041, + "grad_norm": 0.4492309093475342, + "learning_rate": 6.003380510149179e-06, + "loss": 0.4289212226867676, + "step": 2547 + }, + { + "epoch": 0.6496685364609893, + "grad_norm": 0.4585253596305847, + "learning_rate": 5.995650978089649e-06, + "loss": 0.45338308811187744, + "step": 2548 + }, + { + "epoch": 0.6499235084140744, + "grad_norm": 0.4318457841873169, + "learning_rate": 5.9879242942865675e-06, + "loss": 0.4537937045097351, + "step": 2549 + }, + { + "epoch": 0.6501784803671596, + "grad_norm": 0.438854455947876, + "learning_rate": 5.980200464235852e-06, + "loss": 0.44037315249443054, + "step": 2550 + }, + { + "epoch": 0.6504334523202447, + "grad_norm": 0.44499143958091736, + "learning_rate": 5.972479493431395e-06, + "loss": 0.4457972049713135, + "step": 2551 + }, + { + "epoch": 0.65068842427333, + "grad_norm": 0.43149423599243164, + "learning_rate": 5.964761387365052e-06, + "loss": 0.46129220724105835, + "step": 2552 + }, + { + "epoch": 0.6509433962264151, + "grad_norm": 0.4269418716430664, + "learning_rate": 5.957046151526634e-06, + "loss": 0.44635066390037537, + "step": 2553 + }, + { + "epoch": 0.6511983681795003, + "grad_norm": 0.44937610626220703, + "learning_rate": 5.949333791403923e-06, + "loss": 0.4526378810405731, + "step": 2554 + }, + { + "epoch": 0.6514533401325854, + "grad_norm": 0.4750503897666931, + "learning_rate": 5.941624312482648e-06, + "loss": 0.4481755495071411, + "step": 2555 + }, + { + "epoch": 0.6517083120856706, + "grad_norm": 0.43597346544265747, + "learning_rate": 5.933917720246488e-06, + "loss": 0.451925128698349, + "step": 2556 + }, + { + "epoch": 0.6519632840387557, + "grad_norm": 0.4327908456325531, + "learning_rate": 5.9262140201770745e-06, + "loss": 0.44594454765319824, + "step": 2557 + }, + { + "epoch": 0.6522182559918409, + "grad_norm": 0.4297172725200653, + "learning_rate": 5.918513217753976e-06, + "loss": 0.4475826621055603, + "step": 2558 + }, + { + "epoch": 0.652473227944926, + "grad_norm": 0.4717065095901489, + "learning_rate": 5.910815318454703e-06, + "loss": 0.45425352454185486, + "step": 2559 + }, + { + "epoch": 0.6527281998980112, + "grad_norm": 0.4428306221961975, + "learning_rate": 5.903120327754697e-06, + "loss": 0.4513399600982666, + "step": 2560 + }, + { + "epoch": 0.6529831718510963, + "grad_norm": 0.43051621317863464, + "learning_rate": 5.8954282511273374e-06, + "loss": 0.4462912678718567, + "step": 2561 + }, + { + "epoch": 0.6532381438041815, + "grad_norm": 0.446906179189682, + "learning_rate": 5.887739094043923e-06, + "loss": 0.436113566160202, + "step": 2562 + }, + { + "epoch": 0.6534931157572667, + "grad_norm": 0.46555471420288086, + "learning_rate": 5.880052861973686e-06, + "loss": 0.4455112814903259, + "step": 2563 + }, + { + "epoch": 0.6537480877103519, + "grad_norm": 0.4588684141635895, + "learning_rate": 5.872369560383771e-06, + "loss": 0.4556528925895691, + "step": 2564 + }, + { + "epoch": 0.654003059663437, + "grad_norm": 0.44467881321907043, + "learning_rate": 5.864689194739237e-06, + "loss": 0.44747036695480347, + "step": 2565 + }, + { + "epoch": 0.6542580316165222, + "grad_norm": 0.43433132767677307, + "learning_rate": 5.857011770503056e-06, + "loss": 0.437305212020874, + "step": 2566 + }, + { + "epoch": 0.6545130035696073, + "grad_norm": 0.44600459933280945, + "learning_rate": 5.849337293136113e-06, + "loss": 0.44404035806655884, + "step": 2567 + }, + { + "epoch": 0.6547679755226925, + "grad_norm": 0.43104448914527893, + "learning_rate": 5.841665768097189e-06, + "loss": 0.44813108444213867, + "step": 2568 + }, + { + "epoch": 0.6550229474757777, + "grad_norm": 0.46145471930503845, + "learning_rate": 5.83399720084297e-06, + "loss": 0.448213666677475, + "step": 2569 + }, + { + "epoch": 0.6552779194288628, + "grad_norm": 0.47107648849487305, + "learning_rate": 5.826331596828037e-06, + "loss": 0.45392078161239624, + "step": 2570 + }, + { + "epoch": 0.655532891381948, + "grad_norm": 0.45138102769851685, + "learning_rate": 5.818668961504867e-06, + "loss": 0.4528809189796448, + "step": 2571 + }, + { + "epoch": 0.6557878633350331, + "grad_norm": 0.43152180314064026, + "learning_rate": 5.8110093003238175e-06, + "loss": 0.45032018423080444, + "step": 2572 + }, + { + "epoch": 0.6560428352881184, + "grad_norm": 0.44748079776763916, + "learning_rate": 5.803352618733133e-06, + "loss": 0.44270965456962585, + "step": 2573 + }, + { + "epoch": 0.6562978072412035, + "grad_norm": 0.44162699580192566, + "learning_rate": 5.795698922178947e-06, + "loss": 0.45946773886680603, + "step": 2574 + }, + { + "epoch": 0.6565527791942887, + "grad_norm": 0.5382289290428162, + "learning_rate": 5.788048216105254e-06, + "loss": 0.44528162479400635, + "step": 2575 + }, + { + "epoch": 0.6568077511473738, + "grad_norm": 0.4421813189983368, + "learning_rate": 5.7804005059539355e-06, + "loss": 0.4661310613155365, + "step": 2576 + }, + { + "epoch": 0.657062723100459, + "grad_norm": 0.4564555585384369, + "learning_rate": 5.772755797164743e-06, + "loss": 0.45135313272476196, + "step": 2577 + }, + { + "epoch": 0.6573176950535441, + "grad_norm": 0.45606404542922974, + "learning_rate": 5.765114095175281e-06, + "loss": 0.44829630851745605, + "step": 2578 + }, + { + "epoch": 0.6575726670066293, + "grad_norm": 0.4391755759716034, + "learning_rate": 5.757475405421017e-06, + "loss": 0.44676074385643005, + "step": 2579 + }, + { + "epoch": 0.6578276389597144, + "grad_norm": 0.44971051812171936, + "learning_rate": 5.74983973333529e-06, + "loss": 0.4497562646865845, + "step": 2580 + }, + { + "epoch": 0.6580826109127996, + "grad_norm": 0.4419224262237549, + "learning_rate": 5.742207084349274e-06, + "loss": 0.4493602514266968, + "step": 2581 + }, + { + "epoch": 0.6583375828658847, + "grad_norm": 0.42870786786079407, + "learning_rate": 5.734577463892008e-06, + "loss": 0.4335828721523285, + "step": 2582 + }, + { + "epoch": 0.6585925548189699, + "grad_norm": 0.5611152052879333, + "learning_rate": 5.726950877390367e-06, + "loss": 0.4474702775478363, + "step": 2583 + }, + { + "epoch": 0.658847526772055, + "grad_norm": 0.4444577693939209, + "learning_rate": 5.7193273302690755e-06, + "loss": 0.453504741191864, + "step": 2584 + }, + { + "epoch": 0.6591024987251403, + "grad_norm": 0.44351455569267273, + "learning_rate": 5.711706827950691e-06, + "loss": 0.4382694661617279, + "step": 2585 + }, + { + "epoch": 0.6593574706782254, + "grad_norm": 0.44918960332870483, + "learning_rate": 5.7040893758556e-06, + "loss": 0.4414241313934326, + "step": 2586 + }, + { + "epoch": 0.6596124426313106, + "grad_norm": 0.42611655592918396, + "learning_rate": 5.696474979402036e-06, + "loss": 0.4577499330043793, + "step": 2587 + }, + { + "epoch": 0.6598674145843957, + "grad_norm": 0.44104957580566406, + "learning_rate": 5.688863644006037e-06, + "loss": 0.4392445981502533, + "step": 2588 + }, + { + "epoch": 0.6601223865374809, + "grad_norm": 0.4344611167907715, + "learning_rate": 5.681255375081485e-06, + "loss": 0.44575804471969604, + "step": 2589 + }, + { + "epoch": 0.660377358490566, + "grad_norm": 0.4342581033706665, + "learning_rate": 5.673650178040073e-06, + "loss": 0.4402153491973877, + "step": 2590 + }, + { + "epoch": 0.6606323304436512, + "grad_norm": 0.4354460537433624, + "learning_rate": 5.666048058291298e-06, + "loss": 0.47594496607780457, + "step": 2591 + }, + { + "epoch": 0.6608873023967363, + "grad_norm": 0.44662290811538696, + "learning_rate": 5.658449021242481e-06, + "loss": 0.4612034559249878, + "step": 2592 + }, + { + "epoch": 0.6611422743498215, + "grad_norm": 0.45198825001716614, + "learning_rate": 5.650853072298752e-06, + "loss": 0.44805312156677246, + "step": 2593 + }, + { + "epoch": 0.6613972463029066, + "grad_norm": 0.44156575202941895, + "learning_rate": 5.643260216863031e-06, + "loss": 0.4416990280151367, + "step": 2594 + }, + { + "epoch": 0.6616522182559919, + "grad_norm": 0.43809452652931213, + "learning_rate": 5.6356704603360545e-06, + "loss": 0.45295262336730957, + "step": 2595 + }, + { + "epoch": 0.661907190209077, + "grad_norm": 0.4427354037761688, + "learning_rate": 5.628083808116338e-06, + "loss": 0.44600507616996765, + "step": 2596 + }, + { + "epoch": 0.6621621621621622, + "grad_norm": 0.43249139189720154, + "learning_rate": 5.620500265600206e-06, + "loss": 0.4660620093345642, + "step": 2597 + }, + { + "epoch": 0.6624171341152473, + "grad_norm": 0.43712878227233887, + "learning_rate": 5.612919838181757e-06, + "loss": 0.43594229221343994, + "step": 2598 + }, + { + "epoch": 0.6626721060683325, + "grad_norm": 0.4529092609882355, + "learning_rate": 5.605342531252873e-06, + "loss": 0.4467432498931885, + "step": 2599 + }, + { + "epoch": 0.6629270780214176, + "grad_norm": 0.44129279255867004, + "learning_rate": 5.597768350203231e-06, + "loss": 0.451646625995636, + "step": 2600 + }, + { + "epoch": 0.6631820499745028, + "grad_norm": 0.4357939660549164, + "learning_rate": 5.590197300420277e-06, + "loss": 0.44854840636253357, + "step": 2601 + }, + { + "epoch": 0.6634370219275879, + "grad_norm": 0.550003170967102, + "learning_rate": 5.58262938728922e-06, + "loss": 0.45516157150268555, + "step": 2602 + }, + { + "epoch": 0.6636919938806731, + "grad_norm": 0.4335564076900482, + "learning_rate": 5.575064616193058e-06, + "loss": 0.45366984605789185, + "step": 2603 + }, + { + "epoch": 0.6639469658337583, + "grad_norm": 0.44977179169654846, + "learning_rate": 5.567502992512536e-06, + "loss": 0.4410482347011566, + "step": 2604 + }, + { + "epoch": 0.6642019377868434, + "grad_norm": 0.43469348549842834, + "learning_rate": 5.559944521626164e-06, + "loss": 0.4456717371940613, + "step": 2605 + }, + { + "epoch": 0.6644569097399287, + "grad_norm": 0.45355284214019775, + "learning_rate": 5.552389208910219e-06, + "loss": 0.45244699716567993, + "step": 2606 + }, + { + "epoch": 0.6647118816930138, + "grad_norm": 0.4623728096485138, + "learning_rate": 5.544837059738719e-06, + "loss": 0.45508691668510437, + "step": 2607 + }, + { + "epoch": 0.664966853646099, + "grad_norm": 0.44880279898643494, + "learning_rate": 5.537288079483445e-06, + "loss": 0.4548211991786957, + "step": 2608 + }, + { + "epoch": 0.6652218255991841, + "grad_norm": 0.4464181959629059, + "learning_rate": 5.529742273513914e-06, + "loss": 0.451995313167572, + "step": 2609 + }, + { + "epoch": 0.6654767975522693, + "grad_norm": 0.437755823135376, + "learning_rate": 5.522199647197384e-06, + "loss": 0.4451538920402527, + "step": 2610 + }, + { + "epoch": 0.6657317695053544, + "grad_norm": 0.44787105917930603, + "learning_rate": 5.514660205898861e-06, + "loss": 0.44024309515953064, + "step": 2611 + }, + { + "epoch": 0.6659867414584396, + "grad_norm": 0.43366125226020813, + "learning_rate": 5.507123954981073e-06, + "loss": 0.45478901267051697, + "step": 2612 + }, + { + "epoch": 0.6662417134115247, + "grad_norm": 0.43287813663482666, + "learning_rate": 5.4995908998044915e-06, + "loss": 0.44715046882629395, + "step": 2613 + }, + { + "epoch": 0.6664966853646099, + "grad_norm": 0.4263473153114319, + "learning_rate": 5.49206104572731e-06, + "loss": 0.4595363736152649, + "step": 2614 + }, + { + "epoch": 0.666751657317695, + "grad_norm": 0.4816651940345764, + "learning_rate": 5.484534398105439e-06, + "loss": 0.45576655864715576, + "step": 2615 + }, + { + "epoch": 0.6670066292707802, + "grad_norm": 0.4245269298553467, + "learning_rate": 5.47701096229252e-06, + "loss": 0.4497915506362915, + "step": 2616 + }, + { + "epoch": 0.6672616012238654, + "grad_norm": 0.43695083260536194, + "learning_rate": 5.4694907436399e-06, + "loss": 0.4417057931423187, + "step": 2617 + }, + { + "epoch": 0.6675165731769506, + "grad_norm": 0.5978035926818848, + "learning_rate": 5.461973747496636e-06, + "loss": 0.4519868791103363, + "step": 2618 + }, + { + "epoch": 0.6677715451300357, + "grad_norm": 0.4561963379383087, + "learning_rate": 5.454459979209506e-06, + "loss": 0.4437265396118164, + "step": 2619 + }, + { + "epoch": 0.6680265170831209, + "grad_norm": 0.43772682547569275, + "learning_rate": 5.446949444122976e-06, + "loss": 0.44676685333251953, + "step": 2620 + }, + { + "epoch": 0.668281489036206, + "grad_norm": 0.44906044006347656, + "learning_rate": 5.439442147579226e-06, + "loss": 0.4478064477443695, + "step": 2621 + }, + { + "epoch": 0.6685364609892912, + "grad_norm": 0.4388953745365143, + "learning_rate": 5.431938094918132e-06, + "loss": 0.4440498352050781, + "step": 2622 + }, + { + "epoch": 0.6687914329423763, + "grad_norm": 0.4350358247756958, + "learning_rate": 5.424437291477243e-06, + "loss": 0.4516732692718506, + "step": 2623 + }, + { + "epoch": 0.6690464048954615, + "grad_norm": 0.4323616623878479, + "learning_rate": 5.416939742591818e-06, + "loss": 0.45428466796875, + "step": 2624 + }, + { + "epoch": 0.6693013768485466, + "grad_norm": 0.4260307550430298, + "learning_rate": 5.409445453594799e-06, + "loss": 0.4449426531791687, + "step": 2625 + }, + { + "epoch": 0.6695563488016318, + "grad_norm": 0.4495390057563782, + "learning_rate": 5.401954429816797e-06, + "loss": 0.44637101888656616, + "step": 2626 + }, + { + "epoch": 0.6698113207547169, + "grad_norm": 0.4546131193637848, + "learning_rate": 5.394466676586114e-06, + "loss": 0.4595838487148285, + "step": 2627 + }, + { + "epoch": 0.6700662927078022, + "grad_norm": 0.43755903840065, + "learning_rate": 5.386982199228714e-06, + "loss": 0.4451454281806946, + "step": 2628 + }, + { + "epoch": 0.6703212646608873, + "grad_norm": 0.4387609660625458, + "learning_rate": 5.379501003068244e-06, + "loss": 0.44180548191070557, + "step": 2629 + }, + { + "epoch": 0.6705762366139725, + "grad_norm": 0.42819881439208984, + "learning_rate": 5.372023093426004e-06, + "loss": 0.4571313261985779, + "step": 2630 + }, + { + "epoch": 0.6708312085670576, + "grad_norm": 0.4296024441719055, + "learning_rate": 5.3645484756209605e-06, + "loss": 0.4568363130092621, + "step": 2631 + }, + { + "epoch": 0.6710861805201428, + "grad_norm": 0.44118306040763855, + "learning_rate": 5.357077154969742e-06, + "loss": 0.4438619017601013, + "step": 2632 + }, + { + "epoch": 0.6713411524732279, + "grad_norm": 0.4651823937892914, + "learning_rate": 5.349609136786636e-06, + "loss": 0.4524320960044861, + "step": 2633 + }, + { + "epoch": 0.6715961244263131, + "grad_norm": 0.45834317803382874, + "learning_rate": 5.342144426383563e-06, + "loss": 0.4385998249053955, + "step": 2634 + }, + { + "epoch": 0.6718510963793982, + "grad_norm": 0.43824106454849243, + "learning_rate": 5.334683029070118e-06, + "loss": 0.43711209297180176, + "step": 2635 + }, + { + "epoch": 0.6721060683324834, + "grad_norm": 0.4382132887840271, + "learning_rate": 5.327224950153507e-06, + "loss": 0.4439789056777954, + "step": 2636 + }, + { + "epoch": 0.6723610402855685, + "grad_norm": 0.4576619565486908, + "learning_rate": 5.3197701949386e-06, + "loss": 0.45007187128067017, + "step": 2637 + }, + { + "epoch": 0.6726160122386537, + "grad_norm": 0.43934521079063416, + "learning_rate": 5.312318768727902e-06, + "loss": 0.4436546266078949, + "step": 2638 + }, + { + "epoch": 0.672870984191739, + "grad_norm": 0.4601533114910126, + "learning_rate": 5.304870676821532e-06, + "loss": 0.4548959732055664, + "step": 2639 + }, + { + "epoch": 0.6731259561448241, + "grad_norm": 0.42206525802612305, + "learning_rate": 5.297425924517259e-06, + "loss": 0.45760273933410645, + "step": 2640 + }, + { + "epoch": 0.6733809280979093, + "grad_norm": 0.4502648115158081, + "learning_rate": 5.2899845171104605e-06, + "loss": 0.4489644467830658, + "step": 2641 + }, + { + "epoch": 0.6736359000509944, + "grad_norm": 0.756621241569519, + "learning_rate": 5.28254645989414e-06, + "loss": 0.46550774574279785, + "step": 2642 + }, + { + "epoch": 0.6738908720040796, + "grad_norm": 0.45248404145240784, + "learning_rate": 5.275111758158925e-06, + "loss": 0.4535596966743469, + "step": 2643 + }, + { + "epoch": 0.6741458439571647, + "grad_norm": 0.474893718957901, + "learning_rate": 5.267680417193042e-06, + "loss": 0.4447336196899414, + "step": 2644 + }, + { + "epoch": 0.6744008159102499, + "grad_norm": 0.43572816252708435, + "learning_rate": 5.26025244228234e-06, + "loss": 0.450375497341156, + "step": 2645 + }, + { + "epoch": 0.674655787863335, + "grad_norm": 0.4469342827796936, + "learning_rate": 5.252827838710271e-06, + "loss": 0.44614309072494507, + "step": 2646 + }, + { + "epoch": 0.6749107598164202, + "grad_norm": 0.4533426761627197, + "learning_rate": 5.245406611757882e-06, + "loss": 0.44949886202812195, + "step": 2647 + }, + { + "epoch": 0.6751657317695053, + "grad_norm": 0.45130079984664917, + "learning_rate": 5.237988766703827e-06, + "loss": 0.44672998785972595, + "step": 2648 + }, + { + "epoch": 0.6754207037225906, + "grad_norm": 0.4586695432662964, + "learning_rate": 5.230574308824352e-06, + "loss": 0.4617176055908203, + "step": 2649 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 0.44763344526290894, + "learning_rate": 5.223163243393283e-06, + "loss": 0.4565013647079468, + "step": 2650 + }, + { + "epoch": 0.6759306476287609, + "grad_norm": 0.4288623631000519, + "learning_rate": 5.215755575682054e-06, + "loss": 0.45748433470726013, + "step": 2651 + }, + { + "epoch": 0.676185619581846, + "grad_norm": 0.4510361850261688, + "learning_rate": 5.208351310959662e-06, + "loss": 0.4522618055343628, + "step": 2652 + }, + { + "epoch": 0.6764405915349312, + "grad_norm": 0.4306660592556, + "learning_rate": 5.200950454492694e-06, + "loss": 0.44474905729293823, + "step": 2653 + }, + { + "epoch": 0.6766955634880163, + "grad_norm": 0.4786277413368225, + "learning_rate": 5.19355301154532e-06, + "loss": 0.45132479071617126, + "step": 2654 + }, + { + "epoch": 0.6769505354411015, + "grad_norm": 0.48917731642723083, + "learning_rate": 5.186158987379257e-06, + "loss": 0.4453713893890381, + "step": 2655 + }, + { + "epoch": 0.6772055073941866, + "grad_norm": 0.4612053334712982, + "learning_rate": 5.178768387253815e-06, + "loss": 0.4520571231842041, + "step": 2656 + }, + { + "epoch": 0.6774604793472718, + "grad_norm": 0.47949251532554626, + "learning_rate": 5.1713812164258625e-06, + "loss": 0.4475216269493103, + "step": 2657 + }, + { + "epoch": 0.6777154513003569, + "grad_norm": 0.43021127581596375, + "learning_rate": 5.163997480149818e-06, + "loss": 0.45925724506378174, + "step": 2658 + }, + { + "epoch": 0.6779704232534421, + "grad_norm": 0.4294150769710541, + "learning_rate": 5.156617183677672e-06, + "loss": 0.4427727162837982, + "step": 2659 + }, + { + "epoch": 0.6782253952065272, + "grad_norm": 0.4174054265022278, + "learning_rate": 5.149240332258953e-06, + "loss": 0.4476696848869324, + "step": 2660 + }, + { + "epoch": 0.6784803671596125, + "grad_norm": 0.44336920976638794, + "learning_rate": 5.1418669311407585e-06, + "loss": 0.4482848644256592, + "step": 2661 + }, + { + "epoch": 0.6787353391126976, + "grad_norm": 0.4349704086780548, + "learning_rate": 5.134496985567714e-06, + "loss": 0.4412155747413635, + "step": 2662 + }, + { + "epoch": 0.6789903110657828, + "grad_norm": 0.4378722012042999, + "learning_rate": 5.127130500781988e-06, + "loss": 0.45444124937057495, + "step": 2663 + }, + { + "epoch": 0.6792452830188679, + "grad_norm": 0.4330241084098816, + "learning_rate": 5.1197674820233045e-06, + "loss": 0.4420257806777954, + "step": 2664 + }, + { + "epoch": 0.6795002549719531, + "grad_norm": 0.43697431683540344, + "learning_rate": 5.1124079345289e-06, + "loss": 0.44826623797416687, + "step": 2665 + }, + { + "epoch": 0.6797552269250382, + "grad_norm": 0.44842299818992615, + "learning_rate": 5.105051863533559e-06, + "loss": 0.4589853286743164, + "step": 2666 + }, + { + "epoch": 0.6800101988781234, + "grad_norm": 0.42814716696739197, + "learning_rate": 5.097699274269593e-06, + "loss": 0.45576342940330505, + "step": 2667 + }, + { + "epoch": 0.6802651708312085, + "grad_norm": 0.4533100724220276, + "learning_rate": 5.0903501719668155e-06, + "loss": 0.4435397982597351, + "step": 2668 + }, + { + "epoch": 0.6805201427842937, + "grad_norm": 0.4440280497074127, + "learning_rate": 5.083004561852583e-06, + "loss": 0.4435123801231384, + "step": 2669 + }, + { + "epoch": 0.6807751147373788, + "grad_norm": 0.4627237021923065, + "learning_rate": 5.075662449151764e-06, + "loss": 0.44633591175079346, + "step": 2670 + }, + { + "epoch": 0.681030086690464, + "grad_norm": 0.4171501398086548, + "learning_rate": 5.068323839086728e-06, + "loss": 0.4337880313396454, + "step": 2671 + }, + { + "epoch": 0.6812850586435492, + "grad_norm": 0.45280689001083374, + "learning_rate": 5.060988736877366e-06, + "loss": 0.45015066862106323, + "step": 2672 + }, + { + "epoch": 0.6815400305966344, + "grad_norm": 0.4357544481754303, + "learning_rate": 5.053657147741067e-06, + "loss": 0.45653852820396423, + "step": 2673 + }, + { + "epoch": 0.6817950025497195, + "grad_norm": 0.46855801343917847, + "learning_rate": 5.046329076892719e-06, + "loss": 0.44177255034446716, + "step": 2674 + }, + { + "epoch": 0.6820499745028047, + "grad_norm": 0.42504367232322693, + "learning_rate": 5.039004529544718e-06, + "loss": 0.4501117169857025, + "step": 2675 + }, + { + "epoch": 0.6823049464558899, + "grad_norm": 0.4342193007469177, + "learning_rate": 5.031683510906937e-06, + "loss": 0.44261711835861206, + "step": 2676 + }, + { + "epoch": 0.682559918408975, + "grad_norm": 0.44799095392227173, + "learning_rate": 5.0243660261867554e-06, + "loss": 0.4422341585159302, + "step": 2677 + }, + { + "epoch": 0.6828148903620602, + "grad_norm": 0.4549873173236847, + "learning_rate": 5.0170520805890365e-06, + "loss": 0.45025262236595154, + "step": 2678 + }, + { + "epoch": 0.6830698623151453, + "grad_norm": 0.4405505657196045, + "learning_rate": 5.009741679316113e-06, + "loss": 0.458317369222641, + "step": 2679 + }, + { + "epoch": 0.6833248342682305, + "grad_norm": 0.4512025713920593, + "learning_rate": 5.0024348275678144e-06, + "loss": 0.45345792174339294, + "step": 2680 + }, + { + "epoch": 0.6835798062213156, + "grad_norm": 0.457786500453949, + "learning_rate": 4.9951315305414316e-06, + "loss": 0.4395756721496582, + "step": 2681 + }, + { + "epoch": 0.6838347781744009, + "grad_norm": 0.47746995091438293, + "learning_rate": 4.987831793431731e-06, + "loss": 0.45292338728904724, + "step": 2682 + }, + { + "epoch": 0.684089750127486, + "grad_norm": 0.49980446696281433, + "learning_rate": 4.980535621430953e-06, + "loss": 0.4654287099838257, + "step": 2683 + }, + { + "epoch": 0.6843447220805712, + "grad_norm": 0.47383415699005127, + "learning_rate": 4.973243019728792e-06, + "loss": 0.4488328695297241, + "step": 2684 + }, + { + "epoch": 0.6845996940336563, + "grad_norm": 0.44947731494903564, + "learning_rate": 4.965953993512413e-06, + "loss": 0.4427359998226166, + "step": 2685 + }, + { + "epoch": 0.6848546659867415, + "grad_norm": 0.44544997811317444, + "learning_rate": 4.958668547966432e-06, + "loss": 0.4360436797142029, + "step": 2686 + }, + { + "epoch": 0.6851096379398266, + "grad_norm": 0.4315359592437744, + "learning_rate": 4.951386688272915e-06, + "loss": 0.43482017517089844, + "step": 2687 + }, + { + "epoch": 0.6853646098929118, + "grad_norm": 0.4480088949203491, + "learning_rate": 4.944108419611387e-06, + "loss": 0.43367239832878113, + "step": 2688 + }, + { + "epoch": 0.6856195818459969, + "grad_norm": 0.4426881670951843, + "learning_rate": 4.936833747158806e-06, + "loss": 0.441577285528183, + "step": 2689 + }, + { + "epoch": 0.6858745537990821, + "grad_norm": 0.46219876408576965, + "learning_rate": 4.929562676089586e-06, + "loss": 0.4452260136604309, + "step": 2690 + }, + { + "epoch": 0.6861295257521672, + "grad_norm": 0.43867194652557373, + "learning_rate": 4.922295211575574e-06, + "loss": 0.45053476095199585, + "step": 2691 + }, + { + "epoch": 0.6863844977052524, + "grad_norm": 0.4794599711894989, + "learning_rate": 4.915031358786043e-06, + "loss": 0.442020982503891, + "step": 2692 + }, + { + "epoch": 0.6866394696583376, + "grad_norm": 0.47966474294662476, + "learning_rate": 4.9077711228877125e-06, + "loss": 0.4580918252468109, + "step": 2693 + }, + { + "epoch": 0.6868944416114228, + "grad_norm": 0.43187135457992554, + "learning_rate": 4.900514509044716e-06, + "loss": 0.42957156896591187, + "step": 2694 + }, + { + "epoch": 0.6871494135645079, + "grad_norm": 0.4613824784755707, + "learning_rate": 4.893261522418614e-06, + "loss": 0.45658349990844727, + "step": 2695 + }, + { + "epoch": 0.6874043855175931, + "grad_norm": 0.4548393487930298, + "learning_rate": 4.886012168168395e-06, + "loss": 0.45649951696395874, + "step": 2696 + }, + { + "epoch": 0.6876593574706782, + "grad_norm": 0.4860595166683197, + "learning_rate": 4.878766451450451e-06, + "loss": 0.44177111983299255, + "step": 2697 + }, + { + "epoch": 0.6879143294237634, + "grad_norm": 0.45349612832069397, + "learning_rate": 4.871524377418596e-06, + "loss": 0.449371337890625, + "step": 2698 + }, + { + "epoch": 0.6881693013768485, + "grad_norm": 0.4503442049026489, + "learning_rate": 4.864285951224059e-06, + "loss": 0.45505473017692566, + "step": 2699 + }, + { + "epoch": 0.6884242733299337, + "grad_norm": 0.4361976683139801, + "learning_rate": 4.857051178015451e-06, + "loss": 0.4406062066555023, + "step": 2700 + }, + { + "epoch": 0.6886792452830188, + "grad_norm": 0.4606969952583313, + "learning_rate": 4.849820062938805e-06, + "loss": 0.4530186653137207, + "step": 2701 + }, + { + "epoch": 0.688934217236104, + "grad_norm": 0.43665894865989685, + "learning_rate": 4.842592611137551e-06, + "loss": 0.4582091271877289, + "step": 2702 + }, + { + "epoch": 0.6891891891891891, + "grad_norm": 0.4496651589870453, + "learning_rate": 4.8353688277525015e-06, + "loss": 0.4425380229949951, + "step": 2703 + }, + { + "epoch": 0.6894441611422744, + "grad_norm": 0.4403512477874756, + "learning_rate": 4.828148717921874e-06, + "loss": 0.4357679784297943, + "step": 2704 + }, + { + "epoch": 0.6896991330953595, + "grad_norm": 0.4550356864929199, + "learning_rate": 4.820932286781263e-06, + "loss": 0.4607630670070648, + "step": 2705 + }, + { + "epoch": 0.6899541050484447, + "grad_norm": 0.43422931432724, + "learning_rate": 4.813719539463644e-06, + "loss": 0.4445881247520447, + "step": 2706 + }, + { + "epoch": 0.6902090770015298, + "grad_norm": 0.4598325490951538, + "learning_rate": 4.806510481099386e-06, + "loss": 0.43153107166290283, + "step": 2707 + }, + { + "epoch": 0.690464048954615, + "grad_norm": 0.48792895674705505, + "learning_rate": 4.799305116816217e-06, + "loss": 0.45328664779663086, + "step": 2708 + }, + { + "epoch": 0.6907190209077001, + "grad_norm": 0.4260575473308563, + "learning_rate": 4.7921034517392516e-06, + "loss": 0.4630817174911499, + "step": 2709 + }, + { + "epoch": 0.6909739928607853, + "grad_norm": 0.426405131816864, + "learning_rate": 4.784905490990971e-06, + "loss": 0.45140355825424194, + "step": 2710 + }, + { + "epoch": 0.6912289648138705, + "grad_norm": 0.44859254360198975, + "learning_rate": 4.777711239691208e-06, + "loss": 0.45345211029052734, + "step": 2711 + }, + { + "epoch": 0.6914839367669556, + "grad_norm": 0.4419932961463928, + "learning_rate": 4.770520702957182e-06, + "loss": 0.44984006881713867, + "step": 2712 + }, + { + "epoch": 0.6917389087200408, + "grad_norm": 0.483076274394989, + "learning_rate": 4.763333885903438e-06, + "loss": 0.4438518285751343, + "step": 2713 + }, + { + "epoch": 0.691993880673126, + "grad_norm": 0.5435576438903809, + "learning_rate": 4.756150793641902e-06, + "loss": 0.4360465407371521, + "step": 2714 + }, + { + "epoch": 0.6922488526262112, + "grad_norm": 0.44988253712654114, + "learning_rate": 4.748971431281843e-06, + "loss": 0.43878430128097534, + "step": 2715 + }, + { + "epoch": 0.6925038245792963, + "grad_norm": 0.47290316224098206, + "learning_rate": 4.741795803929867e-06, + "loss": 0.4487782418727875, + "step": 2716 + }, + { + "epoch": 0.6927587965323815, + "grad_norm": 0.44757747650146484, + "learning_rate": 4.734623916689941e-06, + "loss": 0.4293305277824402, + "step": 2717 + }, + { + "epoch": 0.6930137684854666, + "grad_norm": 0.44219687581062317, + "learning_rate": 4.727455774663357e-06, + "loss": 0.44206833839416504, + "step": 2718 + }, + { + "epoch": 0.6932687404385518, + "grad_norm": 0.45080289244651794, + "learning_rate": 4.720291382948745e-06, + "loss": 0.44882553815841675, + "step": 2719 + }, + { + "epoch": 0.6935237123916369, + "grad_norm": 0.4788825213909149, + "learning_rate": 4.713130746642078e-06, + "loss": 0.44349539279937744, + "step": 2720 + }, + { + "epoch": 0.6937786843447221, + "grad_norm": 0.4739687442779541, + "learning_rate": 4.705973870836643e-06, + "loss": 0.43993064761161804, + "step": 2721 + }, + { + "epoch": 0.6940336562978072, + "grad_norm": 0.4558063745498657, + "learning_rate": 4.698820760623064e-06, + "loss": 0.4564177393913269, + "step": 2722 + }, + { + "epoch": 0.6942886282508924, + "grad_norm": 0.4424072206020355, + "learning_rate": 4.691671421089285e-06, + "loss": 0.44053369760513306, + "step": 2723 + }, + { + "epoch": 0.6945436002039775, + "grad_norm": 0.4580995738506317, + "learning_rate": 4.684525857320561e-06, + "loss": 0.431607723236084, + "step": 2724 + }, + { + "epoch": 0.6947985721570628, + "grad_norm": 0.43411484360694885, + "learning_rate": 4.677384074399462e-06, + "loss": 0.4485299289226532, + "step": 2725 + }, + { + "epoch": 0.6950535441101479, + "grad_norm": 0.4523801803588867, + "learning_rate": 4.670246077405881e-06, + "loss": 0.4523230493068695, + "step": 2726 + }, + { + "epoch": 0.6953085160632331, + "grad_norm": 0.44680526852607727, + "learning_rate": 4.663111871417e-06, + "loss": 0.43542763590812683, + "step": 2727 + }, + { + "epoch": 0.6955634880163182, + "grad_norm": 0.44599083065986633, + "learning_rate": 4.6559814615073214e-06, + "loss": 0.4428485333919525, + "step": 2728 + }, + { + "epoch": 0.6958184599694034, + "grad_norm": 0.4294494688510895, + "learning_rate": 4.648854852748635e-06, + "loss": 0.44599223136901855, + "step": 2729 + }, + { + "epoch": 0.6960734319224885, + "grad_norm": 0.44038671255111694, + "learning_rate": 4.641732050210032e-06, + "loss": 0.44695937633514404, + "step": 2730 + }, + { + "epoch": 0.6963284038755737, + "grad_norm": 0.4496418535709381, + "learning_rate": 4.6346130589579055e-06, + "loss": 0.4504983127117157, + "step": 2731 + }, + { + "epoch": 0.6965833758286588, + "grad_norm": 0.9035135507583618, + "learning_rate": 4.627497884055912e-06, + "loss": 0.4419633746147156, + "step": 2732 + }, + { + "epoch": 0.696838347781744, + "grad_norm": 0.4334407448768616, + "learning_rate": 4.620386530565018e-06, + "loss": 0.44229233264923096, + "step": 2733 + }, + { + "epoch": 0.6970933197348291, + "grad_norm": 0.4289253354072571, + "learning_rate": 4.613279003543468e-06, + "loss": 0.44758471846580505, + "step": 2734 + }, + { + "epoch": 0.6973482916879143, + "grad_norm": 0.4214285612106323, + "learning_rate": 4.606175308046772e-06, + "loss": 0.43780988454818726, + "step": 2735 + }, + { + "epoch": 0.6976032636409994, + "grad_norm": 0.45487329363822937, + "learning_rate": 4.59907544912773e-06, + "loss": 0.4521713852882385, + "step": 2736 + }, + { + "epoch": 0.6978582355940847, + "grad_norm": 0.4270506203174591, + "learning_rate": 4.591979431836402e-06, + "loss": 0.43326398730278015, + "step": 2737 + }, + { + "epoch": 0.6981132075471698, + "grad_norm": 0.4325590133666992, + "learning_rate": 4.584887261220117e-06, + "loss": 0.42342886328697205, + "step": 2738 + }, + { + "epoch": 0.698368179500255, + "grad_norm": 0.8522976636886597, + "learning_rate": 4.577798942323477e-06, + "loss": 0.43506431579589844, + "step": 2739 + }, + { + "epoch": 0.6986231514533401, + "grad_norm": 0.491022527217865, + "learning_rate": 4.570714480188331e-06, + "loss": 0.4496845602989197, + "step": 2740 + }, + { + "epoch": 0.6988781234064253, + "grad_norm": 0.4635283052921295, + "learning_rate": 4.563633879853797e-06, + "loss": 0.45886242389678955, + "step": 2741 + }, + { + "epoch": 0.6991330953595104, + "grad_norm": 0.4629541039466858, + "learning_rate": 4.5565571463562365e-06, + "loss": 0.462287038564682, + "step": 2742 + }, + { + "epoch": 0.6993880673125956, + "grad_norm": 0.44179099798202515, + "learning_rate": 4.549484284729265e-06, + "loss": 0.44741857051849365, + "step": 2743 + }, + { + "epoch": 0.6996430392656807, + "grad_norm": 0.4561825394630432, + "learning_rate": 4.542415300003753e-06, + "loss": 0.44358640909194946, + "step": 2744 + }, + { + "epoch": 0.6998980112187659, + "grad_norm": 0.49426209926605225, + "learning_rate": 4.5353501972077885e-06, + "loss": 0.4456411600112915, + "step": 2745 + }, + { + "epoch": 0.7001529831718512, + "grad_norm": 0.4641091227531433, + "learning_rate": 4.528288981366722e-06, + "loss": 0.442534863948822, + "step": 2746 + }, + { + "epoch": 0.7004079551249363, + "grad_norm": 0.433246374130249, + "learning_rate": 4.5212316575031325e-06, + "loss": 0.45268380641937256, + "step": 2747 + }, + { + "epoch": 0.7006629270780215, + "grad_norm": 0.44399333000183105, + "learning_rate": 4.514178230636824e-06, + "loss": 0.43584078550338745, + "step": 2748 + }, + { + "epoch": 0.7009178990311066, + "grad_norm": 0.4575839936733246, + "learning_rate": 4.507128705784841e-06, + "loss": 0.4431343078613281, + "step": 2749 + }, + { + "epoch": 0.7011728709841918, + "grad_norm": 0.4551209509372711, + "learning_rate": 4.500083087961442e-06, + "loss": 0.4474720358848572, + "step": 2750 + }, + { + "epoch": 0.7014278429372769, + "grad_norm": 0.48329710960388184, + "learning_rate": 4.493041382178105e-06, + "loss": 0.4326696991920471, + "step": 2751 + }, + { + "epoch": 0.7016828148903621, + "grad_norm": 0.44136470556259155, + "learning_rate": 4.4860035934435375e-06, + "loss": 0.45715269446372986, + "step": 2752 + }, + { + "epoch": 0.7019377868434472, + "grad_norm": 0.425485223531723, + "learning_rate": 4.478969726763648e-06, + "loss": 0.43804869055747986, + "step": 2753 + }, + { + "epoch": 0.7021927587965324, + "grad_norm": 0.4444881081581116, + "learning_rate": 4.471939787141561e-06, + "loss": 0.45472773909568787, + "step": 2754 + }, + { + "epoch": 0.7024477307496175, + "grad_norm": 0.43286269903182983, + "learning_rate": 4.464913779577617e-06, + "loss": 0.4386185109615326, + "step": 2755 + }, + { + "epoch": 0.7027027027027027, + "grad_norm": 0.45265665650367737, + "learning_rate": 4.457891709069341e-06, + "loss": 0.4427916705608368, + "step": 2756 + }, + { + "epoch": 0.7029576746557878, + "grad_norm": 0.6262911558151245, + "learning_rate": 4.450873580611466e-06, + "loss": 0.43673306703567505, + "step": 2757 + }, + { + "epoch": 0.7032126466088731, + "grad_norm": 0.4524223804473877, + "learning_rate": 4.443859399195928e-06, + "loss": 0.4502025842666626, + "step": 2758 + }, + { + "epoch": 0.7034676185619582, + "grad_norm": 0.4436202645301819, + "learning_rate": 4.4368491698118425e-06, + "loss": 0.4552689492702484, + "step": 2759 + }, + { + "epoch": 0.7037225905150434, + "grad_norm": 0.4441589415073395, + "learning_rate": 4.429842897445529e-06, + "loss": 0.4459702968597412, + "step": 2760 + }, + { + "epoch": 0.7039775624681285, + "grad_norm": 0.442243754863739, + "learning_rate": 4.422840587080476e-06, + "loss": 0.45736703276634216, + "step": 2761 + }, + { + "epoch": 0.7042325344212137, + "grad_norm": 0.5277527570724487, + "learning_rate": 4.415842243697369e-06, + "loss": 0.43360424041748047, + "step": 2762 + }, + { + "epoch": 0.7044875063742988, + "grad_norm": 0.47451654076576233, + "learning_rate": 4.4088478722740635e-06, + "loss": 0.4568926692008972, + "step": 2763 + }, + { + "epoch": 0.704742478327384, + "grad_norm": 0.48908933997154236, + "learning_rate": 4.401857477785586e-06, + "loss": 0.4532874524593353, + "step": 2764 + }, + { + "epoch": 0.7049974502804691, + "grad_norm": 0.4372377097606659, + "learning_rate": 4.394871065204146e-06, + "loss": 0.4365415573120117, + "step": 2765 + }, + { + "epoch": 0.7052524222335543, + "grad_norm": 0.43009495735168457, + "learning_rate": 4.387888639499109e-06, + "loss": 0.4571770131587982, + "step": 2766 + }, + { + "epoch": 0.7055073941866394, + "grad_norm": 0.4533731937408447, + "learning_rate": 4.380910205637012e-06, + "loss": 0.44067034125328064, + "step": 2767 + }, + { + "epoch": 0.7057623661397247, + "grad_norm": 0.444358229637146, + "learning_rate": 4.373935768581554e-06, + "loss": 0.4552963376045227, + "step": 2768 + }, + { + "epoch": 0.7060173380928098, + "grad_norm": 0.44427451491355896, + "learning_rate": 4.366965333293586e-06, + "loss": 0.43634259700775146, + "step": 2769 + }, + { + "epoch": 0.706272310045895, + "grad_norm": 0.45108288526535034, + "learning_rate": 4.359998904731107e-06, + "loss": 0.4482075572013855, + "step": 2770 + }, + { + "epoch": 0.7065272819989801, + "grad_norm": 0.42642563581466675, + "learning_rate": 4.353036487849285e-06, + "loss": 0.44058769941329956, + "step": 2771 + }, + { + "epoch": 0.7067822539520653, + "grad_norm": 0.4614061415195465, + "learning_rate": 4.346078087600411e-06, + "loss": 0.43684858083724976, + "step": 2772 + }, + { + "epoch": 0.7070372259051504, + "grad_norm": 0.4506364166736603, + "learning_rate": 4.339123708933941e-06, + "loss": 0.44278430938720703, + "step": 2773 + }, + { + "epoch": 0.7072921978582356, + "grad_norm": 0.44530582427978516, + "learning_rate": 4.332173356796452e-06, + "loss": 0.43861445784568787, + "step": 2774 + }, + { + "epoch": 0.7075471698113207, + "grad_norm": 0.4369005560874939, + "learning_rate": 4.3252270361316695e-06, + "loss": 0.4379958510398865, + "step": 2775 + }, + { + "epoch": 0.7078021417644059, + "grad_norm": 0.43202000856399536, + "learning_rate": 4.3182847518804536e-06, + "loss": 0.4420497417449951, + "step": 2776 + }, + { + "epoch": 0.708057113717491, + "grad_norm": 0.43807902932167053, + "learning_rate": 4.311346508980773e-06, + "loss": 0.4428611993789673, + "step": 2777 + }, + { + "epoch": 0.7083120856705762, + "grad_norm": 0.42809081077575684, + "learning_rate": 4.304412312367743e-06, + "loss": 0.46127551794052124, + "step": 2778 + }, + { + "epoch": 0.7085670576236613, + "grad_norm": 0.45571109652519226, + "learning_rate": 4.297482166973596e-06, + "loss": 0.45550617575645447, + "step": 2779 + }, + { + "epoch": 0.7088220295767466, + "grad_norm": 0.433366060256958, + "learning_rate": 4.290556077727675e-06, + "loss": 0.4468022286891937, + "step": 2780 + }, + { + "epoch": 0.7090770015298317, + "grad_norm": 0.4316498041152954, + "learning_rate": 4.283634049556449e-06, + "loss": 0.4427945017814636, + "step": 2781 + }, + { + "epoch": 0.7093319734829169, + "grad_norm": 0.45922383666038513, + "learning_rate": 4.2767160873834904e-06, + "loss": 0.4336870610713959, + "step": 2782 + }, + { + "epoch": 0.7095869454360021, + "grad_norm": 0.44714367389678955, + "learning_rate": 4.269802196129477e-06, + "loss": 0.44349512457847595, + "step": 2783 + }, + { + "epoch": 0.7098419173890872, + "grad_norm": 0.44446757435798645, + "learning_rate": 4.2628923807122024e-06, + "loss": 0.44376105070114136, + "step": 2784 + }, + { + "epoch": 0.7100968893421724, + "grad_norm": 0.4495147168636322, + "learning_rate": 4.255986646046549e-06, + "loss": 0.4431377649307251, + "step": 2785 + }, + { + "epoch": 0.7103518612952575, + "grad_norm": 0.4551457166671753, + "learning_rate": 4.249084997044505e-06, + "loss": 0.44691866636276245, + "step": 2786 + }, + { + "epoch": 0.7106068332483427, + "grad_norm": 0.44687506556510925, + "learning_rate": 4.242187438615153e-06, + "loss": 0.4500548541545868, + "step": 2787 + }, + { + "epoch": 0.7108618052014278, + "grad_norm": 0.4545448124408722, + "learning_rate": 4.235293975664658e-06, + "loss": 0.43516406416893005, + "step": 2788 + }, + { + "epoch": 0.711116777154513, + "grad_norm": 0.4348348379135132, + "learning_rate": 4.228404613096275e-06, + "loss": 0.4404086768627167, + "step": 2789 + }, + { + "epoch": 0.7113717491075982, + "grad_norm": 0.4600769877433777, + "learning_rate": 4.22151935581035e-06, + "loss": 0.44938015937805176, + "step": 2790 + }, + { + "epoch": 0.7116267210606834, + "grad_norm": 0.45333588123321533, + "learning_rate": 4.214638208704295e-06, + "loss": 0.4313281774520874, + "step": 2791 + }, + { + "epoch": 0.7118816930137685, + "grad_norm": 0.4451024532318115, + "learning_rate": 4.207761176672615e-06, + "loss": 0.44445693492889404, + "step": 2792 + }, + { + "epoch": 0.7121366649668537, + "grad_norm": 0.4356543719768524, + "learning_rate": 4.200888264606869e-06, + "loss": 0.4430459141731262, + "step": 2793 + }, + { + "epoch": 0.7123916369199388, + "grad_norm": 0.43236348032951355, + "learning_rate": 4.194019477395708e-06, + "loss": 0.4271642565727234, + "step": 2794 + }, + { + "epoch": 0.712646608873024, + "grad_norm": 0.4406087398529053, + "learning_rate": 4.187154819924829e-06, + "loss": 0.4488407075405121, + "step": 2795 + }, + { + "epoch": 0.7129015808261091, + "grad_norm": 0.47709518671035767, + "learning_rate": 4.180294297076997e-06, + "loss": 0.44116121530532837, + "step": 2796 + }, + { + "epoch": 0.7131565527791943, + "grad_norm": 0.4376133978366852, + "learning_rate": 4.173437913732048e-06, + "loss": 0.4433579444885254, + "step": 2797 + }, + { + "epoch": 0.7134115247322794, + "grad_norm": 0.4503537118434906, + "learning_rate": 4.166585674766853e-06, + "loss": 0.4418359696865082, + "step": 2798 + }, + { + "epoch": 0.7136664966853646, + "grad_norm": 0.44435012340545654, + "learning_rate": 4.159737585055353e-06, + "loss": 0.453326940536499, + "step": 2799 + }, + { + "epoch": 0.7139214686384497, + "grad_norm": 0.4249648153781891, + "learning_rate": 4.152893649468533e-06, + "loss": 0.4535840153694153, + "step": 2800 + }, + { + "epoch": 0.714176440591535, + "grad_norm": 0.4280673861503601, + "learning_rate": 4.146053872874417e-06, + "loss": 0.4415363073348999, + "step": 2801 + }, + { + "epoch": 0.7144314125446201, + "grad_norm": 0.5632311105728149, + "learning_rate": 4.139218260138074e-06, + "loss": 0.43556785583496094, + "step": 2802 + }, + { + "epoch": 0.7146863844977053, + "grad_norm": 0.435977041721344, + "learning_rate": 4.132386816121616e-06, + "loss": 0.4506576955318451, + "step": 2803 + }, + { + "epoch": 0.7149413564507904, + "grad_norm": 0.44648057222366333, + "learning_rate": 4.12555954568418e-06, + "loss": 0.43422961235046387, + "step": 2804 + }, + { + "epoch": 0.7151963284038756, + "grad_norm": 0.4379127025604248, + "learning_rate": 4.118736453681949e-06, + "loss": 0.44232869148254395, + "step": 2805 + }, + { + "epoch": 0.7154513003569607, + "grad_norm": 0.43217480182647705, + "learning_rate": 4.111917544968116e-06, + "loss": 0.44452977180480957, + "step": 2806 + }, + { + "epoch": 0.7157062723100459, + "grad_norm": 0.4436221420764923, + "learning_rate": 4.105102824392912e-06, + "loss": 0.4404059052467346, + "step": 2807 + }, + { + "epoch": 0.715961244263131, + "grad_norm": 0.449494868516922, + "learning_rate": 4.098292296803593e-06, + "loss": 0.44439762830734253, + "step": 2808 + }, + { + "epoch": 0.7162162162162162, + "grad_norm": 0.43304330110549927, + "learning_rate": 4.091485967044409e-06, + "loss": 0.4660702347755432, + "step": 2809 + }, + { + "epoch": 0.7164711881693013, + "grad_norm": 0.44333916902542114, + "learning_rate": 4.084683839956647e-06, + "loss": 0.4513348937034607, + "step": 2810 + }, + { + "epoch": 0.7167261601223865, + "grad_norm": 0.44252026081085205, + "learning_rate": 4.0778859203786e-06, + "loss": 0.44663819670677185, + "step": 2811 + }, + { + "epoch": 0.7169811320754716, + "grad_norm": 0.46256279945373535, + "learning_rate": 4.0710922131455595e-06, + "loss": 0.4497140645980835, + "step": 2812 + }, + { + "epoch": 0.7172361040285569, + "grad_norm": 0.5449967980384827, + "learning_rate": 4.064302723089832e-06, + "loss": 0.43796730041503906, + "step": 2813 + }, + { + "epoch": 0.717491075981642, + "grad_norm": 0.43101754784584045, + "learning_rate": 4.057517455040717e-06, + "loss": 0.4442185163497925, + "step": 2814 + }, + { + "epoch": 0.7177460479347272, + "grad_norm": 0.43088218569755554, + "learning_rate": 4.050736413824509e-06, + "loss": 0.45426806807518005, + "step": 2815 + }, + { + "epoch": 0.7180010198878123, + "grad_norm": 0.4459136724472046, + "learning_rate": 4.0439596042645065e-06, + "loss": 0.45037540793418884, + "step": 2816 + }, + { + "epoch": 0.7182559918408975, + "grad_norm": 0.4530165195465088, + "learning_rate": 4.037187031180986e-06, + "loss": 0.45242929458618164, + "step": 2817 + }, + { + "epoch": 0.7185109637939827, + "grad_norm": 0.443611204624176, + "learning_rate": 4.0304186993912205e-06, + "loss": 0.4525124430656433, + "step": 2818 + }, + { + "epoch": 0.7187659357470678, + "grad_norm": 0.4550294876098633, + "learning_rate": 4.0236546137094576e-06, + "loss": 0.4362117052078247, + "step": 2819 + }, + { + "epoch": 0.719020907700153, + "grad_norm": 0.4554041922092438, + "learning_rate": 4.016894778946934e-06, + "loss": 0.44021332263946533, + "step": 2820 + }, + { + "epoch": 0.7192758796532381, + "grad_norm": 0.49561822414398193, + "learning_rate": 4.010139199911854e-06, + "loss": 0.4398931860923767, + "step": 2821 + }, + { + "epoch": 0.7195308516063234, + "grad_norm": 0.4929938018321991, + "learning_rate": 4.003387881409397e-06, + "loss": 0.4550277590751648, + "step": 2822 + }, + { + "epoch": 0.7197858235594085, + "grad_norm": 0.4283452033996582, + "learning_rate": 3.9966408282417155e-06, + "loss": 0.44857415556907654, + "step": 2823 + }, + { + "epoch": 0.7200407955124937, + "grad_norm": 0.473547101020813, + "learning_rate": 3.989898045207931e-06, + "loss": 0.4334336221218109, + "step": 2824 + }, + { + "epoch": 0.7202957674655788, + "grad_norm": 0.4281342923641205, + "learning_rate": 3.983159537104113e-06, + "loss": 0.44296973943710327, + "step": 2825 + }, + { + "epoch": 0.720550739418664, + "grad_norm": 0.48224103450775146, + "learning_rate": 3.97642530872331e-06, + "loss": 0.44094353914260864, + "step": 2826 + }, + { + "epoch": 0.7208057113717491, + "grad_norm": 0.43880414962768555, + "learning_rate": 3.969695364855512e-06, + "loss": 0.44613009691238403, + "step": 2827 + }, + { + "epoch": 0.7210606833248343, + "grad_norm": 0.44216209650039673, + "learning_rate": 3.96296971028766e-06, + "loss": 0.4331624209880829, + "step": 2828 + }, + { + "epoch": 0.7213156552779194, + "grad_norm": 0.44664260745048523, + "learning_rate": 3.95624834980366e-06, + "loss": 0.4392186105251312, + "step": 2829 + }, + { + "epoch": 0.7215706272310046, + "grad_norm": 0.4344140589237213, + "learning_rate": 3.949531288184345e-06, + "loss": 0.4357095956802368, + "step": 2830 + }, + { + "epoch": 0.7218255991840897, + "grad_norm": 0.4699135422706604, + "learning_rate": 3.942818530207504e-06, + "loss": 0.43042606115341187, + "step": 2831 + }, + { + "epoch": 0.7220805711371749, + "grad_norm": 0.4375179708003998, + "learning_rate": 3.93611008064786e-06, + "loss": 0.44665050506591797, + "step": 2832 + }, + { + "epoch": 0.72233554309026, + "grad_norm": 0.4416513442993164, + "learning_rate": 3.929405944277072e-06, + "loss": 0.4503394067287445, + "step": 2833 + }, + { + "epoch": 0.7225905150433453, + "grad_norm": 0.44849449396133423, + "learning_rate": 3.9227061258637236e-06, + "loss": 0.4251297116279602, + "step": 2834 + }, + { + "epoch": 0.7228454869964304, + "grad_norm": 0.45465877652168274, + "learning_rate": 3.916010630173341e-06, + "loss": 0.44149231910705566, + "step": 2835 + }, + { + "epoch": 0.7231004589495156, + "grad_norm": 0.45338740944862366, + "learning_rate": 3.909319461968363e-06, + "loss": 0.4441741406917572, + "step": 2836 + }, + { + "epoch": 0.7233554309026007, + "grad_norm": 0.4316529631614685, + "learning_rate": 3.90263262600816e-06, + "loss": 0.44860753417015076, + "step": 2837 + }, + { + "epoch": 0.7236104028556859, + "grad_norm": 0.44786977767944336, + "learning_rate": 3.895950127049013e-06, + "loss": 0.4485016465187073, + "step": 2838 + }, + { + "epoch": 0.723865374808771, + "grad_norm": 0.42609506845474243, + "learning_rate": 3.889271969844123e-06, + "loss": 0.4528493285179138, + "step": 2839 + }, + { + "epoch": 0.7241203467618562, + "grad_norm": 0.44411757588386536, + "learning_rate": 3.882598159143609e-06, + "loss": 0.4639114737510681, + "step": 2840 + }, + { + "epoch": 0.7243753187149413, + "grad_norm": 0.45524635910987854, + "learning_rate": 3.875928699694477e-06, + "loss": 0.44869527220726013, + "step": 2841 + }, + { + "epoch": 0.7246302906680265, + "grad_norm": 0.4178290367126465, + "learning_rate": 3.869263596240661e-06, + "loss": 0.46156296133995056, + "step": 2842 + }, + { + "epoch": 0.7248852626211116, + "grad_norm": 0.43635308742523193, + "learning_rate": 3.862602853522983e-06, + "loss": 0.4542333781719208, + "step": 2843 + }, + { + "epoch": 0.7251402345741969, + "grad_norm": 0.42462316155433655, + "learning_rate": 3.85594647627917e-06, + "loss": 0.44222697615623474, + "step": 2844 + }, + { + "epoch": 0.725395206527282, + "grad_norm": 0.4641839265823364, + "learning_rate": 3.8492944692438446e-06, + "loss": 0.4510359466075897, + "step": 2845 + }, + { + "epoch": 0.7256501784803672, + "grad_norm": 0.44939640164375305, + "learning_rate": 3.842646837148515e-06, + "loss": 0.4434684216976166, + "step": 2846 + }, + { + "epoch": 0.7259051504334523, + "grad_norm": 0.44022607803344727, + "learning_rate": 3.836003584721577e-06, + "loss": 0.43942567706108093, + "step": 2847 + }, + { + "epoch": 0.7261601223865375, + "grad_norm": 0.45273932814598083, + "learning_rate": 3.8293647166883215e-06, + "loss": 0.44515150785446167, + "step": 2848 + }, + { + "epoch": 0.7264150943396226, + "grad_norm": 0.47738298773765564, + "learning_rate": 3.822730237770908e-06, + "loss": 0.4392485022544861, + "step": 2849 + }, + { + "epoch": 0.7266700662927078, + "grad_norm": 0.430848628282547, + "learning_rate": 3.816100152688385e-06, + "loss": 0.44411778450012207, + "step": 2850 + }, + { + "epoch": 0.7269250382457929, + "grad_norm": 0.439805805683136, + "learning_rate": 3.809474466156664e-06, + "loss": 0.4501129984855652, + "step": 2851 + }, + { + "epoch": 0.7271800101988781, + "grad_norm": 0.46396151185035706, + "learning_rate": 3.802853182888543e-06, + "loss": 0.4317866265773773, + "step": 2852 + }, + { + "epoch": 0.7274349821519633, + "grad_norm": 0.5632092952728271, + "learning_rate": 3.796236307593676e-06, + "loss": 0.44473159313201904, + "step": 2853 + }, + { + "epoch": 0.7276899541050484, + "grad_norm": 0.5041026473045349, + "learning_rate": 3.7896238449785803e-06, + "loss": 0.44062238931655884, + "step": 2854 + }, + { + "epoch": 0.7279449260581337, + "grad_norm": 0.4421965777873993, + "learning_rate": 3.783015799746644e-06, + "loss": 0.46264347434043884, + "step": 2855 + }, + { + "epoch": 0.7281998980112188, + "grad_norm": 0.4480365514755249, + "learning_rate": 3.7764121765981133e-06, + "loss": 0.44427475333213806, + "step": 2856 + }, + { + "epoch": 0.728454869964304, + "grad_norm": 0.44947126507759094, + "learning_rate": 3.7698129802300744e-06, + "loss": 0.451511025428772, + "step": 2857 + }, + { + "epoch": 0.7287098419173891, + "grad_norm": 0.5991028547286987, + "learning_rate": 3.7632182153364838e-06, + "loss": 0.4486866891384125, + "step": 2858 + }, + { + "epoch": 0.7289648138704743, + "grad_norm": 0.4347856342792511, + "learning_rate": 3.756627886608133e-06, + "loss": 0.4447828233242035, + "step": 2859 + }, + { + "epoch": 0.7292197858235594, + "grad_norm": 0.42992183566093445, + "learning_rate": 3.7500419987326574e-06, + "loss": 0.44058841466903687, + "step": 2860 + }, + { + "epoch": 0.7294747577766446, + "grad_norm": 0.4386395215988159, + "learning_rate": 3.743460556394547e-06, + "loss": 0.44062167406082153, + "step": 2861 + }, + { + "epoch": 0.7297297297297297, + "grad_norm": 0.4650445878505707, + "learning_rate": 3.7368835642751123e-06, + "loss": 0.4424327313899994, + "step": 2862 + }, + { + "epoch": 0.7299847016828149, + "grad_norm": 0.4388737976551056, + "learning_rate": 3.730311027052511e-06, + "loss": 0.43483245372772217, + "step": 2863 + }, + { + "epoch": 0.7302396736359, + "grad_norm": 0.42335045337677, + "learning_rate": 3.7237429494017306e-06, + "loss": 0.4343976378440857, + "step": 2864 + }, + { + "epoch": 0.7304946455889852, + "grad_norm": 0.4400503933429718, + "learning_rate": 3.717179335994583e-06, + "loss": 0.4404789209365845, + "step": 2865 + }, + { + "epoch": 0.7307496175420704, + "grad_norm": 0.4329128563404083, + "learning_rate": 3.710620191499701e-06, + "loss": 0.4489637315273285, + "step": 2866 + }, + { + "epoch": 0.7310045894951556, + "grad_norm": 0.43637967109680176, + "learning_rate": 3.704065520582549e-06, + "loss": 0.44528207182884216, + "step": 2867 + }, + { + "epoch": 0.7312595614482407, + "grad_norm": 0.42939335107803345, + "learning_rate": 3.697515327905399e-06, + "loss": 0.434497207403183, + "step": 2868 + }, + { + "epoch": 0.7315145334013259, + "grad_norm": 0.5018973350524902, + "learning_rate": 3.690969618127348e-06, + "loss": 0.45246458053588867, + "step": 2869 + }, + { + "epoch": 0.731769505354411, + "grad_norm": 0.43627703189849854, + "learning_rate": 3.6844283959042924e-06, + "loss": 0.42965325713157654, + "step": 2870 + }, + { + "epoch": 0.7320244773074962, + "grad_norm": 0.46314767003059387, + "learning_rate": 3.6778916658889506e-06, + "loss": 0.43974757194519043, + "step": 2871 + }, + { + "epoch": 0.7322794492605813, + "grad_norm": 0.47196269035339355, + "learning_rate": 3.6713594327308343e-06, + "loss": 0.4353893995285034, + "step": 2872 + }, + { + "epoch": 0.7325344212136665, + "grad_norm": 0.45461487770080566, + "learning_rate": 3.664831701076258e-06, + "loss": 0.44562897086143494, + "step": 2873 + }, + { + "epoch": 0.7327893931667516, + "grad_norm": 0.46071672439575195, + "learning_rate": 3.658308475568342e-06, + "loss": 0.43509751558303833, + "step": 2874 + }, + { + "epoch": 0.7330443651198368, + "grad_norm": 0.44231125712394714, + "learning_rate": 3.65178976084699e-06, + "loss": 0.442873477935791, + "step": 2875 + }, + { + "epoch": 0.7332993370729219, + "grad_norm": 0.4249587953090668, + "learning_rate": 3.6452755615489077e-06, + "loss": 0.43926429748535156, + "step": 2876 + }, + { + "epoch": 0.7335543090260072, + "grad_norm": 0.46666327118873596, + "learning_rate": 3.638765882307589e-06, + "loss": 0.4330987334251404, + "step": 2877 + }, + { + "epoch": 0.7338092809790923, + "grad_norm": 0.45984357595443726, + "learning_rate": 3.6322607277533027e-06, + "loss": 0.4495600461959839, + "step": 2878 + }, + { + "epoch": 0.7340642529321775, + "grad_norm": 0.4424566328525543, + "learning_rate": 3.625760102513103e-06, + "loss": 0.4446689486503601, + "step": 2879 + }, + { + "epoch": 0.7343192248852626, + "grad_norm": 0.44157835841178894, + "learning_rate": 3.6192640112108322e-06, + "loss": 0.44923990964889526, + "step": 2880 + }, + { + "epoch": 0.7345741968383478, + "grad_norm": 0.4445481300354004, + "learning_rate": 3.612772458467092e-06, + "loss": 0.44120463728904724, + "step": 2881 + }, + { + "epoch": 0.7348291687914329, + "grad_norm": 0.5572802424430847, + "learning_rate": 3.6062854488992714e-06, + "loss": 0.43533897399902344, + "step": 2882 + }, + { + "epoch": 0.7350841407445181, + "grad_norm": 0.45228099822998047, + "learning_rate": 3.599802987121512e-06, + "loss": 0.4434191584587097, + "step": 2883 + }, + { + "epoch": 0.7353391126976032, + "grad_norm": 0.46297982335090637, + "learning_rate": 3.593325077744739e-06, + "loss": 0.4504498839378357, + "step": 2884 + }, + { + "epoch": 0.7355940846506884, + "grad_norm": 0.44489234685897827, + "learning_rate": 3.5868517253766235e-06, + "loss": 0.43913325667381287, + "step": 2885 + }, + { + "epoch": 0.7358490566037735, + "grad_norm": 0.4312242567539215, + "learning_rate": 3.580382934621598e-06, + "loss": 0.4557768702507019, + "step": 2886 + }, + { + "epoch": 0.7361040285568587, + "grad_norm": 0.44070443511009216, + "learning_rate": 3.573918710080857e-06, + "loss": 0.4463081359863281, + "step": 2887 + }, + { + "epoch": 0.7363590005099439, + "grad_norm": 0.4172808527946472, + "learning_rate": 3.567459056352347e-06, + "loss": 0.44776830077171326, + "step": 2888 + }, + { + "epoch": 0.7366139724630291, + "grad_norm": 0.43707603216171265, + "learning_rate": 3.5610039780307546e-06, + "loss": 0.4435421824455261, + "step": 2889 + }, + { + "epoch": 0.7368689444161143, + "grad_norm": 0.4548320770263672, + "learning_rate": 3.5545534797075234e-06, + "loss": 0.4290686845779419, + "step": 2890 + }, + { + "epoch": 0.7371239163691994, + "grad_norm": 0.42244952917099, + "learning_rate": 3.5481075659708287e-06, + "loss": 0.4392220973968506, + "step": 2891 + }, + { + "epoch": 0.7373788883222846, + "grad_norm": 0.4414602518081665, + "learning_rate": 3.541666241405588e-06, + "loss": 0.45275434851646423, + "step": 2892 + }, + { + "epoch": 0.7376338602753697, + "grad_norm": 0.45304572582244873, + "learning_rate": 3.5352295105934607e-06, + "loss": 0.4434977173805237, + "step": 2893 + }, + { + "epoch": 0.7378888322284549, + "grad_norm": 0.45812326669692993, + "learning_rate": 3.528797378112828e-06, + "loss": 0.4410284459590912, + "step": 2894 + }, + { + "epoch": 0.73814380418154, + "grad_norm": 0.44536668062210083, + "learning_rate": 3.5223698485388136e-06, + "loss": 0.4378358721733093, + "step": 2895 + }, + { + "epoch": 0.7383987761346252, + "grad_norm": 0.43989458680152893, + "learning_rate": 3.515946926443252e-06, + "loss": 0.4467636048793793, + "step": 2896 + }, + { + "epoch": 0.7386537480877103, + "grad_norm": 0.4682613015174866, + "learning_rate": 3.509528616394716e-06, + "loss": 0.4479398727416992, + "step": 2897 + }, + { + "epoch": 0.7389087200407956, + "grad_norm": 0.4957796633243561, + "learning_rate": 3.503114922958486e-06, + "loss": 0.4401901364326477, + "step": 2898 + }, + { + "epoch": 0.7391636919938807, + "grad_norm": 0.4830598831176758, + "learning_rate": 3.496705850696561e-06, + "loss": 0.4409579634666443, + "step": 2899 + }, + { + "epoch": 0.7394186639469659, + "grad_norm": 0.43040627241134644, + "learning_rate": 3.4903014041676565e-06, + "loss": 0.43947097659111023, + "step": 2900 + }, + { + "epoch": 0.739673635900051, + "grad_norm": 0.4375455975532532, + "learning_rate": 3.483901587927201e-06, + "loss": 0.4315758943557739, + "step": 2901 + }, + { + "epoch": 0.7399286078531362, + "grad_norm": 0.44875892996788025, + "learning_rate": 3.477506406527317e-06, + "loss": 0.44477686285972595, + "step": 2902 + }, + { + "epoch": 0.7401835798062213, + "grad_norm": 0.5409514307975769, + "learning_rate": 3.4711158645168443e-06, + "loss": 0.4459382891654968, + "step": 2903 + }, + { + "epoch": 0.7404385517593065, + "grad_norm": 0.44331035017967224, + "learning_rate": 3.4647299664413136e-06, + "loss": 0.4305277466773987, + "step": 2904 + }, + { + "epoch": 0.7406935237123916, + "grad_norm": 0.46023833751678467, + "learning_rate": 3.458348716842952e-06, + "loss": 0.42848941683769226, + "step": 2905 + }, + { + "epoch": 0.7409484956654768, + "grad_norm": 0.453451544046402, + "learning_rate": 3.4519721202606895e-06, + "loss": 0.4502413272857666, + "step": 2906 + }, + { + "epoch": 0.7412034676185619, + "grad_norm": 0.43192124366760254, + "learning_rate": 3.4456001812301344e-06, + "loss": 0.4459025263786316, + "step": 2907 + }, + { + "epoch": 0.7414584395716471, + "grad_norm": 0.4435686767101288, + "learning_rate": 3.4392329042835904e-06, + "loss": 0.44413721561431885, + "step": 2908 + }, + { + "epoch": 0.7417134115247322, + "grad_norm": 0.44830721616744995, + "learning_rate": 3.432870293950047e-06, + "loss": 0.45666563510894775, + "step": 2909 + }, + { + "epoch": 0.7419683834778175, + "grad_norm": 0.42248955368995667, + "learning_rate": 3.426512354755166e-06, + "loss": 0.43796348571777344, + "step": 2910 + }, + { + "epoch": 0.7422233554309026, + "grad_norm": 0.437529057264328, + "learning_rate": 3.420159091221288e-06, + "loss": 0.4518410563468933, + "step": 2911 + }, + { + "epoch": 0.7424783273839878, + "grad_norm": 0.43285441398620605, + "learning_rate": 3.4138105078674366e-06, + "loss": 0.43692296743392944, + "step": 2912 + }, + { + "epoch": 0.7427332993370729, + "grad_norm": 0.45706868171691895, + "learning_rate": 3.4074666092092945e-06, + "loss": 0.4546869397163391, + "step": 2913 + }, + { + "epoch": 0.7429882712901581, + "grad_norm": 0.44124263525009155, + "learning_rate": 3.401127399759225e-06, + "loss": 0.4413590431213379, + "step": 2914 + }, + { + "epoch": 0.7432432432432432, + "grad_norm": 0.4383951723575592, + "learning_rate": 3.3947928840262425e-06, + "loss": 0.4420372247695923, + "step": 2915 + }, + { + "epoch": 0.7434982151963284, + "grad_norm": 0.44535309076309204, + "learning_rate": 3.3884630665160344e-06, + "loss": 0.4349723756313324, + "step": 2916 + }, + { + "epoch": 0.7437531871494135, + "grad_norm": 0.41102197766304016, + "learning_rate": 3.3821379517309406e-06, + "loss": 0.44966214895248413, + "step": 2917 + }, + { + "epoch": 0.7440081591024987, + "grad_norm": 0.44411683082580566, + "learning_rate": 3.3758175441699525e-06, + "loss": 0.4433893859386444, + "step": 2918 + }, + { + "epoch": 0.7442631310555838, + "grad_norm": 0.470836877822876, + "learning_rate": 3.3695018483287212e-06, + "loss": 0.44160282611846924, + "step": 2919 + }, + { + "epoch": 0.744518103008669, + "grad_norm": 0.5225307941436768, + "learning_rate": 3.363190868699544e-06, + "loss": 0.44049859046936035, + "step": 2920 + }, + { + "epoch": 0.7447730749617542, + "grad_norm": 0.4494461119174957, + "learning_rate": 3.3568846097713594e-06, + "loss": 0.4394533038139343, + "step": 2921 + }, + { + "epoch": 0.7450280469148394, + "grad_norm": 0.4861459732055664, + "learning_rate": 3.3505830760297543e-06, + "loss": 0.4236561059951782, + "step": 2922 + }, + { + "epoch": 0.7452830188679245, + "grad_norm": 0.4226544201374054, + "learning_rate": 3.34428627195695e-06, + "loss": 0.4505658745765686, + "step": 2923 + }, + { + "epoch": 0.7455379908210097, + "grad_norm": 0.445040762424469, + "learning_rate": 3.3379942020318014e-06, + "loss": 0.46367499232292175, + "step": 2924 + }, + { + "epoch": 0.7457929627740949, + "grad_norm": 0.43944481015205383, + "learning_rate": 3.331706870729806e-06, + "loss": 0.4512088894844055, + "step": 2925 + }, + { + "epoch": 0.74604793472718, + "grad_norm": 0.47440463304519653, + "learning_rate": 3.3254242825230777e-06, + "loss": 0.4355928301811218, + "step": 2926 + }, + { + "epoch": 0.7463029066802652, + "grad_norm": 0.4428374767303467, + "learning_rate": 3.319146441880371e-06, + "loss": 0.44209808111190796, + "step": 2927 + }, + { + "epoch": 0.7465578786333503, + "grad_norm": 0.4566478729248047, + "learning_rate": 3.3128733532670478e-06, + "loss": 0.4385034441947937, + "step": 2928 + }, + { + "epoch": 0.7468128505864355, + "grad_norm": 0.4758487045764923, + "learning_rate": 3.306605021145106e-06, + "loss": 0.4396563470363617, + "step": 2929 + }, + { + "epoch": 0.7470678225395206, + "grad_norm": 0.440157413482666, + "learning_rate": 3.300341449973148e-06, + "loss": 0.4350048899650574, + "step": 2930 + }, + { + "epoch": 0.7473227944926059, + "grad_norm": 0.4477387070655823, + "learning_rate": 3.2940826442063924e-06, + "loss": 0.43772047758102417, + "step": 2931 + }, + { + "epoch": 0.747577766445691, + "grad_norm": 0.42891183495521545, + "learning_rate": 3.2878286082966704e-06, + "loss": 0.44212839007377625, + "step": 2932 + }, + { + "epoch": 0.7478327383987762, + "grad_norm": 0.4327998459339142, + "learning_rate": 3.281579346692426e-06, + "loss": 0.4553752541542053, + "step": 2933 + }, + { + "epoch": 0.7480877103518613, + "grad_norm": 0.44166067242622375, + "learning_rate": 3.275334863838694e-06, + "loss": 0.41830703616142273, + "step": 2934 + }, + { + "epoch": 0.7483426823049465, + "grad_norm": 0.45415228605270386, + "learning_rate": 3.269095164177123e-06, + "loss": 0.4383961856365204, + "step": 2935 + }, + { + "epoch": 0.7485976542580316, + "grad_norm": 0.4422258138656616, + "learning_rate": 3.2628602521459496e-06, + "loss": 0.4465511739253998, + "step": 2936 + }, + { + "epoch": 0.7488526262111168, + "grad_norm": 0.4468015730381012, + "learning_rate": 3.256630132180009e-06, + "loss": 0.4185349941253662, + "step": 2937 + }, + { + "epoch": 0.7491075981642019, + "grad_norm": 0.4371696710586548, + "learning_rate": 3.250404808710731e-06, + "loss": 0.4337192475795746, + "step": 2938 + }, + { + "epoch": 0.7493625701172871, + "grad_norm": 0.4528083801269531, + "learning_rate": 3.2441842861661266e-06, + "loss": 0.44116291403770447, + "step": 2939 + }, + { + "epoch": 0.7496175420703722, + "grad_norm": 0.42228686809539795, + "learning_rate": 3.2379685689707985e-06, + "loss": 0.45381030440330505, + "step": 2940 + }, + { + "epoch": 0.7498725140234574, + "grad_norm": 0.4403478503227234, + "learning_rate": 3.2317576615459324e-06, + "loss": 0.43453022837638855, + "step": 2941 + }, + { + "epoch": 0.7501274859765426, + "grad_norm": 0.42008721828460693, + "learning_rate": 3.225551568309284e-06, + "loss": 0.4479690492153168, + "step": 2942 + }, + { + "epoch": 0.7503824579296278, + "grad_norm": 0.435314804315567, + "learning_rate": 3.2193502936751895e-06, + "loss": 0.43803542852401733, + "step": 2943 + }, + { + "epoch": 0.7506374298827129, + "grad_norm": 0.43313083052635193, + "learning_rate": 3.213153842054564e-06, + "loss": 0.4430111050605774, + "step": 2944 + }, + { + "epoch": 0.7508924018357981, + "grad_norm": 0.4652822017669678, + "learning_rate": 3.20696221785488e-06, + "loss": 0.4293684959411621, + "step": 2945 + }, + { + "epoch": 0.7511473737888832, + "grad_norm": 0.46115192770957947, + "learning_rate": 3.2007754254801872e-06, + "loss": 0.43593597412109375, + "step": 2946 + }, + { + "epoch": 0.7514023457419684, + "grad_norm": 0.46106842160224915, + "learning_rate": 3.1945934693310897e-06, + "loss": 0.4410976767539978, + "step": 2947 + }, + { + "epoch": 0.7516573176950535, + "grad_norm": 0.45949119329452515, + "learning_rate": 3.1884163538047607e-06, + "loss": 0.44500434398651123, + "step": 2948 + }, + { + "epoch": 0.7519122896481387, + "grad_norm": 0.4491775631904602, + "learning_rate": 3.182244083294923e-06, + "loss": 0.44120240211486816, + "step": 2949 + }, + { + "epoch": 0.7521672616012238, + "grad_norm": 0.42800047993659973, + "learning_rate": 3.1760766621918515e-06, + "loss": 0.43075132369995117, + "step": 2950 + }, + { + "epoch": 0.752422233554309, + "grad_norm": 0.45833832025527954, + "learning_rate": 3.1699140948823835e-06, + "loss": 0.430724561214447, + "step": 2951 + }, + { + "epoch": 0.7526772055073941, + "grad_norm": 0.45019084215164185, + "learning_rate": 3.1637563857498886e-06, + "loss": 0.43860697746276855, + "step": 2952 + }, + { + "epoch": 0.7529321774604794, + "grad_norm": 0.45236337184906006, + "learning_rate": 3.157603539174293e-06, + "loss": 0.44204622507095337, + "step": 2953 + }, + { + "epoch": 0.7531871494135645, + "grad_norm": 0.43661150336265564, + "learning_rate": 3.1514555595320604e-06, + "loss": 0.4306318461894989, + "step": 2954 + }, + { + "epoch": 0.7534421213666497, + "grad_norm": 0.43726611137390137, + "learning_rate": 3.145312451196192e-06, + "loss": 0.42486798763275146, + "step": 2955 + }, + { + "epoch": 0.7536970933197348, + "grad_norm": 0.4403837025165558, + "learning_rate": 3.1391742185362174e-06, + "loss": 0.446133553981781, + "step": 2956 + }, + { + "epoch": 0.75395206527282, + "grad_norm": 0.47683706879615784, + "learning_rate": 3.133040865918213e-06, + "loss": 0.4425346255302429, + "step": 2957 + }, + { + "epoch": 0.7542070372259051, + "grad_norm": 0.44009360671043396, + "learning_rate": 3.1269123977047687e-06, + "loss": 0.4544362425804138, + "step": 2958 + }, + { + "epoch": 0.7544620091789903, + "grad_norm": 0.43296313285827637, + "learning_rate": 3.120788818255015e-06, + "loss": 0.4442460536956787, + "step": 2959 + }, + { + "epoch": 0.7547169811320755, + "grad_norm": 0.42778074741363525, + "learning_rate": 3.1146701319245896e-06, + "loss": 0.4324518144130707, + "step": 2960 + }, + { + "epoch": 0.7549719530851606, + "grad_norm": 0.44995859265327454, + "learning_rate": 3.1085563430656653e-06, + "loss": 0.4532390832901001, + "step": 2961 + }, + { + "epoch": 0.7552269250382458, + "grad_norm": 0.4304356873035431, + "learning_rate": 3.102447456026919e-06, + "loss": 0.4366806745529175, + "step": 2962 + }, + { + "epoch": 0.755481896991331, + "grad_norm": 0.466528981924057, + "learning_rate": 3.0963434751535427e-06, + "loss": 0.45421624183654785, + "step": 2963 + }, + { + "epoch": 0.7557368689444162, + "grad_norm": 0.576439380645752, + "learning_rate": 3.090244404787245e-06, + "loss": 0.43155166506767273, + "step": 2964 + }, + { + "epoch": 0.7559918408975013, + "grad_norm": 0.4535970091819763, + "learning_rate": 3.084150249266241e-06, + "loss": 0.43590113520622253, + "step": 2965 + }, + { + "epoch": 0.7562468128505865, + "grad_norm": 0.45630592107772827, + "learning_rate": 3.078061012925242e-06, + "loss": 0.43900856375694275, + "step": 2966 + }, + { + "epoch": 0.7565017848036716, + "grad_norm": 0.42787259817123413, + "learning_rate": 3.0719767000954714e-06, + "loss": 0.44750213623046875, + "step": 2967 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 0.4446224570274353, + "learning_rate": 3.065897315104641e-06, + "loss": 0.43496066331863403, + "step": 2968 + }, + { + "epoch": 0.7570117287098419, + "grad_norm": 0.4456896185874939, + "learning_rate": 3.059822862276958e-06, + "loss": 0.42623668909072876, + "step": 2969 + }, + { + "epoch": 0.7572667006629271, + "grad_norm": 0.4380398392677307, + "learning_rate": 3.0537533459331312e-06, + "loss": 0.42342907190322876, + "step": 2970 + }, + { + "epoch": 0.7575216726160122, + "grad_norm": 0.45309311151504517, + "learning_rate": 3.0476887703903456e-06, + "loss": 0.44981467723846436, + "step": 2971 + }, + { + "epoch": 0.7577766445690974, + "grad_norm": 0.49701908230781555, + "learning_rate": 3.0416291399622834e-06, + "loss": 0.4507873058319092, + "step": 2972 + }, + { + "epoch": 0.7580316165221825, + "grad_norm": 0.41651859879493713, + "learning_rate": 3.0355744589590976e-06, + "loss": 0.42973122000694275, + "step": 2973 + }, + { + "epoch": 0.7582865884752678, + "grad_norm": 0.41733089089393616, + "learning_rate": 3.029524731687432e-06, + "loss": 0.4291536211967468, + "step": 2974 + }, + { + "epoch": 0.7585415604283529, + "grad_norm": 0.42825251817703247, + "learning_rate": 3.0234799624504008e-06, + "loss": 0.43868499994277954, + "step": 2975 + }, + { + "epoch": 0.7587965323814381, + "grad_norm": 0.45960578322410583, + "learning_rate": 3.0174401555475884e-06, + "loss": 0.4422037601470947, + "step": 2976 + }, + { + "epoch": 0.7590515043345232, + "grad_norm": 0.45203688740730286, + "learning_rate": 3.0114053152750557e-06, + "loss": 0.4476014971733093, + "step": 2977 + }, + { + "epoch": 0.7593064762876084, + "grad_norm": 0.4353548586368561, + "learning_rate": 3.0053754459253338e-06, + "loss": 0.4427911937236786, + "step": 2978 + }, + { + "epoch": 0.7595614482406935, + "grad_norm": 0.4374661147594452, + "learning_rate": 2.9993505517874043e-06, + "loss": 0.4469697177410126, + "step": 2979 + }, + { + "epoch": 0.7598164201937787, + "grad_norm": 0.42508962750434875, + "learning_rate": 2.9933306371467276e-06, + "loss": 0.44563043117523193, + "step": 2980 + }, + { + "epoch": 0.7600713921468638, + "grad_norm": 0.4326213300228119, + "learning_rate": 2.987315706285209e-06, + "loss": 0.4384555220603943, + "step": 2981 + }, + { + "epoch": 0.760326364099949, + "grad_norm": 0.4297401010990143, + "learning_rate": 2.9813057634812104e-06, + "loss": 0.4502851366996765, + "step": 2982 + }, + { + "epoch": 0.7605813360530341, + "grad_norm": 0.4279881715774536, + "learning_rate": 2.975300813009554e-06, + "loss": 0.4402778148651123, + "step": 2983 + }, + { + "epoch": 0.7608363080061193, + "grad_norm": 0.43452244997024536, + "learning_rate": 2.9693008591414998e-06, + "loss": 0.43243128061294556, + "step": 2984 + }, + { + "epoch": 0.7610912799592044, + "grad_norm": 0.4346815049648285, + "learning_rate": 2.963305906144763e-06, + "loss": 0.4579737186431885, + "step": 2985 + }, + { + "epoch": 0.7613462519122897, + "grad_norm": 0.44520387053489685, + "learning_rate": 2.957315958283504e-06, + "loss": 0.4395790994167328, + "step": 2986 + }, + { + "epoch": 0.7616012238653748, + "grad_norm": 0.42731431126594543, + "learning_rate": 2.9513310198183067e-06, + "loss": 0.446267694234848, + "step": 2987 + }, + { + "epoch": 0.76185619581846, + "grad_norm": 0.449008584022522, + "learning_rate": 2.9453510950062057e-06, + "loss": 0.45317041873931885, + "step": 2988 + }, + { + "epoch": 0.7621111677715451, + "grad_norm": 0.4196610748767853, + "learning_rate": 2.939376188100671e-06, + "loss": 0.4390903413295746, + "step": 2989 + }, + { + "epoch": 0.7623661397246303, + "grad_norm": 0.4226979613304138, + "learning_rate": 2.9334063033515926e-06, + "loss": 0.4473586976528168, + "step": 2990 + }, + { + "epoch": 0.7626211116777154, + "grad_norm": 0.44532281160354614, + "learning_rate": 2.9274414450053003e-06, + "loss": 0.42633652687072754, + "step": 2991 + }, + { + "epoch": 0.7628760836308006, + "grad_norm": 0.4505890905857086, + "learning_rate": 2.921481617304536e-06, + "loss": 0.4464772641658783, + "step": 2992 + }, + { + "epoch": 0.7631310555838857, + "grad_norm": 0.46233832836151123, + "learning_rate": 2.9155268244884773e-06, + "loss": 0.43518686294555664, + "step": 2993 + }, + { + "epoch": 0.7633860275369709, + "grad_norm": 0.4125634729862213, + "learning_rate": 2.9095770707927086e-06, + "loss": 0.4407936930656433, + "step": 2994 + }, + { + "epoch": 0.7636409994900561, + "grad_norm": 0.4293194115161896, + "learning_rate": 2.9036323604492333e-06, + "loss": 0.43980568647384644, + "step": 2995 + }, + { + "epoch": 0.7638959714431413, + "grad_norm": 0.434973806142807, + "learning_rate": 2.897692697686472e-06, + "loss": 0.426477313041687, + "step": 2996 + }, + { + "epoch": 0.7641509433962265, + "grad_norm": 0.4353492259979248, + "learning_rate": 2.891758086729253e-06, + "loss": 0.43505096435546875, + "step": 2997 + }, + { + "epoch": 0.7644059153493116, + "grad_norm": 0.4570540189743042, + "learning_rate": 2.8858285317988065e-06, + "loss": 0.4340648055076599, + "step": 2998 + }, + { + "epoch": 0.7646608873023968, + "grad_norm": 0.43070581555366516, + "learning_rate": 2.8799040371127772e-06, + "loss": 0.4464573264122009, + "step": 2999 + }, + { + "epoch": 0.7649158592554819, + "grad_norm": 0.4650447964668274, + "learning_rate": 2.873984606885193e-06, + "loss": 0.4371274411678314, + "step": 3000 + }, + { + "epoch": 0.7651708312085671, + "grad_norm": 0.4581238031387329, + "learning_rate": 2.8680702453264953e-06, + "loss": 0.44181880354881287, + "step": 3001 + }, + { + "epoch": 0.7654258031616522, + "grad_norm": 0.42868974804878235, + "learning_rate": 2.8621609566435173e-06, + "loss": 0.4534147381782532, + "step": 3002 + }, + { + "epoch": 0.7656807751147374, + "grad_norm": 0.4373212456703186, + "learning_rate": 2.856256745039476e-06, + "loss": 0.459540456533432, + "step": 3003 + }, + { + "epoch": 0.7659357470678225, + "grad_norm": 0.432050496339798, + "learning_rate": 2.8503576147139887e-06, + "loss": 0.4405026435852051, + "step": 3004 + }, + { + "epoch": 0.7661907190209077, + "grad_norm": 0.46295300126075745, + "learning_rate": 2.8444635698630464e-06, + "loss": 0.43494483828544617, + "step": 3005 + }, + { + "epoch": 0.7664456909739928, + "grad_norm": 0.43868863582611084, + "learning_rate": 2.838574614679034e-06, + "loss": 0.4434785544872284, + "step": 3006 + }, + { + "epoch": 0.7667006629270781, + "grad_norm": 0.451294481754303, + "learning_rate": 2.8326907533507074e-06, + "loss": 0.4428268373012543, + "step": 3007 + }, + { + "epoch": 0.7669556348801632, + "grad_norm": 0.43986567854881287, + "learning_rate": 2.826811990063201e-06, + "loss": 0.44829750061035156, + "step": 3008 + }, + { + "epoch": 0.7672106068332484, + "grad_norm": 0.45006152987480164, + "learning_rate": 2.8209383289980272e-06, + "loss": 0.4289799630641937, + "step": 3009 + }, + { + "epoch": 0.7674655787863335, + "grad_norm": 0.45366767048835754, + "learning_rate": 2.8150697743330713e-06, + "loss": 0.42882341146469116, + "step": 3010 + }, + { + "epoch": 0.7677205507394187, + "grad_norm": 0.43628907203674316, + "learning_rate": 2.8092063302425733e-06, + "loss": 0.43587619066238403, + "step": 3011 + }, + { + "epoch": 0.7679755226925038, + "grad_norm": 0.4418754577636719, + "learning_rate": 2.803348000897155e-06, + "loss": 0.44426703453063965, + "step": 3012 + }, + { + "epoch": 0.768230494645589, + "grad_norm": 0.44897258281707764, + "learning_rate": 2.7974947904637872e-06, + "loss": 0.43549928069114685, + "step": 3013 + }, + { + "epoch": 0.7684854665986741, + "grad_norm": 0.4471312463283539, + "learning_rate": 2.7916467031058027e-06, + "loss": 0.4412408769130707, + "step": 3014 + }, + { + "epoch": 0.7687404385517593, + "grad_norm": 0.43710899353027344, + "learning_rate": 2.7858037429828978e-06, + "loss": 0.4489285349845886, + "step": 3015 + }, + { + "epoch": 0.7689954105048444, + "grad_norm": 0.4264802932739258, + "learning_rate": 2.779965914251109e-06, + "loss": 0.43275749683380127, + "step": 3016 + }, + { + "epoch": 0.7692503824579296, + "grad_norm": 0.42677900195121765, + "learning_rate": 2.7741332210628346e-06, + "loss": 0.44439825415611267, + "step": 3017 + }, + { + "epoch": 0.7695053544110148, + "grad_norm": 0.4425060749053955, + "learning_rate": 2.7683056675668206e-06, + "loss": 0.439004123210907, + "step": 3018 + }, + { + "epoch": 0.7697603263641, + "grad_norm": 0.43179410696029663, + "learning_rate": 2.7624832579081396e-06, + "loss": 0.45458805561065674, + "step": 3019 + }, + { + "epoch": 0.7700152983171851, + "grad_norm": 0.4506986737251282, + "learning_rate": 2.756665996228224e-06, + "loss": 0.4397001266479492, + "step": 3020 + }, + { + "epoch": 0.7702702702702703, + "grad_norm": 0.5024133324623108, + "learning_rate": 2.7508538866648417e-06, + "loss": 0.413383811712265, + "step": 3021 + }, + { + "epoch": 0.7705252422233554, + "grad_norm": 0.4732454717159271, + "learning_rate": 2.7450469333520856e-06, + "loss": 0.44492125511169434, + "step": 3022 + }, + { + "epoch": 0.7707802141764406, + "grad_norm": 0.4141547679901123, + "learning_rate": 2.7392451404203945e-06, + "loss": 0.4385744035243988, + "step": 3023 + }, + { + "epoch": 0.7710351861295257, + "grad_norm": 0.4609329402446747, + "learning_rate": 2.7334485119965235e-06, + "loss": 0.4328097999095917, + "step": 3024 + }, + { + "epoch": 0.7712901580826109, + "grad_norm": 0.45164668560028076, + "learning_rate": 2.7276570522035672e-06, + "loss": 0.42820605635643005, + "step": 3025 + }, + { + "epoch": 0.771545130035696, + "grad_norm": 0.4409070611000061, + "learning_rate": 2.7218707651609357e-06, + "loss": 0.4500933289527893, + "step": 3026 + }, + { + "epoch": 0.7718001019887812, + "grad_norm": 0.4577195942401886, + "learning_rate": 2.716089654984356e-06, + "loss": 0.4405101239681244, + "step": 3027 + }, + { + "epoch": 0.7720550739418663, + "grad_norm": 0.4214123487472534, + "learning_rate": 2.7103137257858867e-06, + "loss": 0.4437384009361267, + "step": 3028 + }, + { + "epoch": 0.7723100458949516, + "grad_norm": 0.43069449067115784, + "learning_rate": 2.7045429816738855e-06, + "loss": 0.45080098509788513, + "step": 3029 + }, + { + "epoch": 0.7725650178480367, + "grad_norm": 0.4492934048175812, + "learning_rate": 2.698777426753033e-06, + "loss": 0.4323241710662842, + "step": 3030 + }, + { + "epoch": 0.7728199898011219, + "grad_norm": 0.4298577308654785, + "learning_rate": 2.6930170651243206e-06, + "loss": 0.43868204951286316, + "step": 3031 + }, + { + "epoch": 0.7730749617542071, + "grad_norm": 0.4327729046344757, + "learning_rate": 2.6872619008850274e-06, + "loss": 0.44194912910461426, + "step": 3032 + }, + { + "epoch": 0.7733299337072922, + "grad_norm": 0.44278308749198914, + "learning_rate": 2.681511938128757e-06, + "loss": 0.42782241106033325, + "step": 3033 + }, + { + "epoch": 0.7735849056603774, + "grad_norm": 0.4500921070575714, + "learning_rate": 2.6757671809454055e-06, + "loss": 0.42661377787590027, + "step": 3034 + }, + { + "epoch": 0.7738398776134625, + "grad_norm": 0.43823516368865967, + "learning_rate": 2.6700276334211605e-06, + "loss": 0.4408743381500244, + "step": 3035 + }, + { + "epoch": 0.7740948495665477, + "grad_norm": 0.4439128041267395, + "learning_rate": 2.6642932996385163e-06, + "loss": 0.43246230483055115, + "step": 3036 + }, + { + "epoch": 0.7743498215196328, + "grad_norm": 0.4504877030849457, + "learning_rate": 2.658564183676243e-06, + "loss": 0.4362192153930664, + "step": 3037 + }, + { + "epoch": 0.774604793472718, + "grad_norm": 0.4362858831882477, + "learning_rate": 2.6528402896094154e-06, + "loss": 0.43194517493247986, + "step": 3038 + }, + { + "epoch": 0.7748597654258031, + "grad_norm": 0.43476131558418274, + "learning_rate": 2.647121621509383e-06, + "loss": 0.443441241979599, + "step": 3039 + }, + { + "epoch": 0.7751147373788884, + "grad_norm": 0.440054327249527, + "learning_rate": 2.64140818344378e-06, + "loss": 0.44788044691085815, + "step": 3040 + }, + { + "epoch": 0.7753697093319735, + "grad_norm": 0.4809795320034027, + "learning_rate": 2.6356999794765226e-06, + "loss": 0.45182570815086365, + "step": 3041 + }, + { + "epoch": 0.7756246812850587, + "grad_norm": 0.447130024433136, + "learning_rate": 2.629997013667808e-06, + "loss": 0.44849371910095215, + "step": 3042 + }, + { + "epoch": 0.7758796532381438, + "grad_norm": 0.4554246962070465, + "learning_rate": 2.624299290074097e-06, + "loss": 0.44359302520751953, + "step": 3043 + }, + { + "epoch": 0.776134625191229, + "grad_norm": 0.447831392288208, + "learning_rate": 2.6186068127481333e-06, + "loss": 0.43672287464141846, + "step": 3044 + }, + { + "epoch": 0.7763895971443141, + "grad_norm": 0.4739379286766052, + "learning_rate": 2.61291958573892e-06, + "loss": 0.4372883439064026, + "step": 3045 + }, + { + "epoch": 0.7766445690973993, + "grad_norm": 0.45469939708709717, + "learning_rate": 2.6072376130917275e-06, + "loss": 0.4256613254547119, + "step": 3046 + }, + { + "epoch": 0.7768995410504844, + "grad_norm": 0.43982696533203125, + "learning_rate": 2.6015608988480956e-06, + "loss": 0.44357165694236755, + "step": 3047 + }, + { + "epoch": 0.7771545130035696, + "grad_norm": 0.44001829624176025, + "learning_rate": 2.595889447045813e-06, + "loss": 0.4202490448951721, + "step": 3048 + }, + { + "epoch": 0.7774094849566547, + "grad_norm": 0.4493784010410309, + "learning_rate": 2.590223261718937e-06, + "loss": 0.4398352801799774, + "step": 3049 + }, + { + "epoch": 0.77766445690974, + "grad_norm": 0.44004541635513306, + "learning_rate": 2.5845623468977687e-06, + "loss": 0.43794482946395874, + "step": 3050 + }, + { + "epoch": 0.7779194288628251, + "grad_norm": 0.4580850303173065, + "learning_rate": 2.5789067066088633e-06, + "loss": 0.4361294209957123, + "step": 3051 + }, + { + "epoch": 0.7781744008159103, + "grad_norm": 0.45136088132858276, + "learning_rate": 2.5732563448750304e-06, + "loss": 0.43613120913505554, + "step": 3052 + }, + { + "epoch": 0.7784293727689954, + "grad_norm": 0.46002984046936035, + "learning_rate": 2.567611265715313e-06, + "loss": 0.43921753764152527, + "step": 3053 + }, + { + "epoch": 0.7786843447220806, + "grad_norm": 0.46570706367492676, + "learning_rate": 2.5619714731450086e-06, + "loss": 0.4551700949668884, + "step": 3054 + }, + { + "epoch": 0.7789393166751657, + "grad_norm": 0.4437997341156006, + "learning_rate": 2.5563369711756526e-06, + "loss": 0.45722657442092896, + "step": 3055 + }, + { + "epoch": 0.7791942886282509, + "grad_norm": 0.4493846893310547, + "learning_rate": 2.550707763815007e-06, + "loss": 0.42499202489852905, + "step": 3056 + }, + { + "epoch": 0.779449260581336, + "grad_norm": 0.4641744792461395, + "learning_rate": 2.5450838550670808e-06, + "loss": 0.4430501461029053, + "step": 3057 + }, + { + "epoch": 0.7797042325344212, + "grad_norm": 0.4606056213378906, + "learning_rate": 2.5394652489321057e-06, + "loss": 0.43809938430786133, + "step": 3058 + }, + { + "epoch": 0.7799592044875063, + "grad_norm": 0.44892892241477966, + "learning_rate": 2.533851949406543e-06, + "loss": 0.4352834224700928, + "step": 3059 + }, + { + "epoch": 0.7802141764405915, + "grad_norm": 0.43062809109687805, + "learning_rate": 2.5282439604830845e-06, + "loss": 0.42994245886802673, + "step": 3060 + }, + { + "epoch": 0.7804691483936766, + "grad_norm": 0.448716938495636, + "learning_rate": 2.5226412861506353e-06, + "loss": 0.43783435225486755, + "step": 3061 + }, + { + "epoch": 0.7807241203467619, + "grad_norm": 0.4420609176158905, + "learning_rate": 2.5170439303943295e-06, + "loss": 0.44785523414611816, + "step": 3062 + }, + { + "epoch": 0.780979092299847, + "grad_norm": 0.4563617408275604, + "learning_rate": 2.5114518971955205e-06, + "loss": 0.4503597617149353, + "step": 3063 + }, + { + "epoch": 0.7812340642529322, + "grad_norm": 0.4480021595954895, + "learning_rate": 2.5058651905317577e-06, + "loss": 0.4416431188583374, + "step": 3064 + }, + { + "epoch": 0.7814890362060173, + "grad_norm": 0.46790507435798645, + "learning_rate": 2.5002838143768195e-06, + "loss": 0.44542765617370605, + "step": 3065 + }, + { + "epoch": 0.7817440081591025, + "grad_norm": 0.44628477096557617, + "learning_rate": 2.494707772700691e-06, + "loss": 0.4343700408935547, + "step": 3066 + }, + { + "epoch": 0.7819989801121877, + "grad_norm": 0.4471970796585083, + "learning_rate": 2.4891370694695516e-06, + "loss": 0.4321914315223694, + "step": 3067 + }, + { + "epoch": 0.7822539520652728, + "grad_norm": 0.46172577142715454, + "learning_rate": 2.4835717086457988e-06, + "loss": 0.4497830271720886, + "step": 3068 + }, + { + "epoch": 0.782508924018358, + "grad_norm": 0.489749938249588, + "learning_rate": 2.478011694188015e-06, + "loss": 0.45339810848236084, + "step": 3069 + }, + { + "epoch": 0.7827638959714431, + "grad_norm": 0.4978252649307251, + "learning_rate": 2.472457030050994e-06, + "loss": 0.4448505640029907, + "step": 3070 + }, + { + "epoch": 0.7830188679245284, + "grad_norm": 0.4339428246021271, + "learning_rate": 2.4669077201857138e-06, + "loss": 0.435431569814682, + "step": 3071 + }, + { + "epoch": 0.7832738398776135, + "grad_norm": 0.4410724937915802, + "learning_rate": 2.4613637685393433e-06, + "loss": 0.45155227184295654, + "step": 3072 + }, + { + "epoch": 0.7835288118306987, + "grad_norm": 0.458036333322525, + "learning_rate": 2.455825179055249e-06, + "loss": 0.44565317034721375, + "step": 3073 + }, + { + "epoch": 0.7837837837837838, + "grad_norm": 0.42787349224090576, + "learning_rate": 2.4502919556729798e-06, + "loss": 0.43534278869628906, + "step": 3074 + }, + { + "epoch": 0.784038755736869, + "grad_norm": 0.45477649569511414, + "learning_rate": 2.4447641023282607e-06, + "loss": 0.44423800706863403, + "step": 3075 + }, + { + "epoch": 0.7842937276899541, + "grad_norm": 0.43623751401901245, + "learning_rate": 2.4392416229530093e-06, + "loss": 0.4423667788505554, + "step": 3076 + }, + { + "epoch": 0.7845486996430393, + "grad_norm": 0.4349161684513092, + "learning_rate": 2.4337245214753104e-06, + "loss": 0.44194480776786804, + "step": 3077 + }, + { + "epoch": 0.7848036715961244, + "grad_norm": 0.47349271178245544, + "learning_rate": 2.428212801819424e-06, + "loss": 0.432020366191864, + "step": 3078 + }, + { + "epoch": 0.7850586435492096, + "grad_norm": 0.45997923612594604, + "learning_rate": 2.422706467905792e-06, + "loss": 0.42831337451934814, + "step": 3079 + }, + { + "epoch": 0.7853136155022947, + "grad_norm": 0.4358552396297455, + "learning_rate": 2.4172055236510127e-06, + "loss": 0.44012266397476196, + "step": 3080 + }, + { + "epoch": 0.7855685874553799, + "grad_norm": 0.44077014923095703, + "learning_rate": 2.4117099729678638e-06, + "loss": 0.4489743113517761, + "step": 3081 + }, + { + "epoch": 0.785823559408465, + "grad_norm": 0.447301983833313, + "learning_rate": 2.406219819765275e-06, + "loss": 0.42973965406417847, + "step": 3082 + }, + { + "epoch": 0.7860785313615503, + "grad_norm": 0.4363212287425995, + "learning_rate": 2.4007350679483398e-06, + "loss": 0.4414937496185303, + "step": 3083 + }, + { + "epoch": 0.7863335033146354, + "grad_norm": 0.4348331391811371, + "learning_rate": 2.395255721418317e-06, + "loss": 0.4441762864589691, + "step": 3084 + }, + { + "epoch": 0.7865884752677206, + "grad_norm": 0.4310491383075714, + "learning_rate": 2.389781784072609e-06, + "loss": 0.4399312436580658, + "step": 3085 + }, + { + "epoch": 0.7868434472208057, + "grad_norm": 0.46151554584503174, + "learning_rate": 2.38431325980478e-06, + "loss": 0.42329296469688416, + "step": 3086 + }, + { + "epoch": 0.7870984191738909, + "grad_norm": 0.4397614002227783, + "learning_rate": 2.378850152504544e-06, + "loss": 0.41856569051742554, + "step": 3087 + }, + { + "epoch": 0.787353391126976, + "grad_norm": 0.4383176565170288, + "learning_rate": 2.3733924660577524e-06, + "loss": 0.44408220052719116, + "step": 3088 + }, + { + "epoch": 0.7876083630800612, + "grad_norm": 0.45403745770454407, + "learning_rate": 2.367940204346414e-06, + "loss": 0.43578648567199707, + "step": 3089 + }, + { + "epoch": 0.7878633350331463, + "grad_norm": 0.43478861451148987, + "learning_rate": 2.3624933712486673e-06, + "loss": 0.43671154975891113, + "step": 3090 + }, + { + "epoch": 0.7881183069862315, + "grad_norm": 0.4544563889503479, + "learning_rate": 2.3570519706387928e-06, + "loss": 0.43795761466026306, + "step": 3091 + }, + { + "epoch": 0.7883732789393166, + "grad_norm": 0.4508332908153534, + "learning_rate": 2.351616006387214e-06, + "loss": 0.4364631175994873, + "step": 3092 + }, + { + "epoch": 0.7886282508924018, + "grad_norm": 0.44363948702812195, + "learning_rate": 2.3461854823604766e-06, + "loss": 0.4498422145843506, + "step": 3093 + }, + { + "epoch": 0.788883222845487, + "grad_norm": 0.452188104391098, + "learning_rate": 2.3407604024212636e-06, + "loss": 0.44089120626449585, + "step": 3094 + }, + { + "epoch": 0.7891381947985722, + "grad_norm": 0.45992568135261536, + "learning_rate": 2.3353407704283926e-06, + "loss": 0.43927067518234253, + "step": 3095 + }, + { + "epoch": 0.7893931667516573, + "grad_norm": 0.44867175817489624, + "learning_rate": 2.329926590236784e-06, + "loss": 0.4402957558631897, + "step": 3096 + }, + { + "epoch": 0.7896481387047425, + "grad_norm": 0.4226747751235962, + "learning_rate": 2.324517865697501e-06, + "loss": 0.4406220316886902, + "step": 3097 + }, + { + "epoch": 0.7899031106578276, + "grad_norm": 0.45604899525642395, + "learning_rate": 2.3191146006577202e-06, + "loss": 0.44700348377227783, + "step": 3098 + }, + { + "epoch": 0.7901580826109128, + "grad_norm": 0.42548590898513794, + "learning_rate": 2.3137167989607324e-06, + "loss": 0.4498402774333954, + "step": 3099 + }, + { + "epoch": 0.7904130545639979, + "grad_norm": 1.5527485609054565, + "learning_rate": 2.3083244644459457e-06, + "loss": 0.4142894744873047, + "step": 3100 + }, + { + "epoch": 0.7906680265170831, + "grad_norm": 0.41954970359802246, + "learning_rate": 2.302937600948877e-06, + "loss": 0.4311685562133789, + "step": 3101 + }, + { + "epoch": 0.7909229984701683, + "grad_norm": 0.435149610042572, + "learning_rate": 2.2975562123011497e-06, + "loss": 0.45557522773742676, + "step": 3102 + }, + { + "epoch": 0.7911779704232534, + "grad_norm": 0.43851983547210693, + "learning_rate": 2.2921803023305e-06, + "loss": 0.43911582231521606, + "step": 3103 + }, + { + "epoch": 0.7914329423763387, + "grad_norm": 0.43195638060569763, + "learning_rate": 2.2868098748607594e-06, + "loss": 0.44874662160873413, + "step": 3104 + }, + { + "epoch": 0.7916879143294238, + "grad_norm": 0.42538851499557495, + "learning_rate": 2.281444933711867e-06, + "loss": 0.42983171343803406, + "step": 3105 + }, + { + "epoch": 0.791942886282509, + "grad_norm": 0.45717546343803406, + "learning_rate": 2.2760854826998523e-06, + "loss": 0.4311561584472656, + "step": 3106 + }, + { + "epoch": 0.7921978582355941, + "grad_norm": 0.43942540884017944, + "learning_rate": 2.2707315256368434e-06, + "loss": 0.4412494897842407, + "step": 3107 + }, + { + "epoch": 0.7924528301886793, + "grad_norm": 0.4405248761177063, + "learning_rate": 2.2653830663310693e-06, + "loss": 0.4397597014904022, + "step": 3108 + }, + { + "epoch": 0.7927078021417644, + "grad_norm": 0.4440111517906189, + "learning_rate": 2.2600401085868263e-06, + "loss": 0.4302397668361664, + "step": 3109 + }, + { + "epoch": 0.7929627740948496, + "grad_norm": 0.4543129503726959, + "learning_rate": 2.254702656204516e-06, + "loss": 0.4415404796600342, + "step": 3110 + }, + { + "epoch": 0.7932177460479347, + "grad_norm": 0.4537447988986969, + "learning_rate": 2.249370712980624e-06, + "loss": 0.4310953617095947, + "step": 3111 + }, + { + "epoch": 0.7934727180010199, + "grad_norm": 0.4390498399734497, + "learning_rate": 2.244044282707705e-06, + "loss": 0.4310564696788788, + "step": 3112 + }, + { + "epoch": 0.793727689954105, + "grad_norm": 0.4333949387073517, + "learning_rate": 2.2387233691744047e-06, + "loss": 0.4509139657020569, + "step": 3113 + }, + { + "epoch": 0.7939826619071902, + "grad_norm": 0.4292167127132416, + "learning_rate": 2.233407976165438e-06, + "loss": 0.43772897124290466, + "step": 3114 + }, + { + "epoch": 0.7942376338602753, + "grad_norm": 0.4735633134841919, + "learning_rate": 2.2280981074615926e-06, + "loss": 0.445385217666626, + "step": 3115 + }, + { + "epoch": 0.7944926058133606, + "grad_norm": 0.42386460304260254, + "learning_rate": 2.222793766839734e-06, + "loss": 0.4210456609725952, + "step": 3116 + }, + { + "epoch": 0.7947475777664457, + "grad_norm": 0.4557611346244812, + "learning_rate": 2.2174949580727835e-06, + "loss": 0.4388166666030884, + "step": 3117 + }, + { + "epoch": 0.7950025497195309, + "grad_norm": 0.4518273174762726, + "learning_rate": 2.21220168492974e-06, + "loss": 0.43927669525146484, + "step": 3118 + }, + { + "epoch": 0.795257521672616, + "grad_norm": 0.4459928870201111, + "learning_rate": 2.206913951175661e-06, + "loss": 0.4367228150367737, + "step": 3119 + }, + { + "epoch": 0.7955124936257012, + "grad_norm": 0.444635272026062, + "learning_rate": 2.2016317605716577e-06, + "loss": 0.4311097264289856, + "step": 3120 + }, + { + "epoch": 0.7957674655787863, + "grad_norm": 0.4263254702091217, + "learning_rate": 2.1963551168749097e-06, + "loss": 0.44298744201660156, + "step": 3121 + }, + { + "epoch": 0.7960224375318715, + "grad_norm": 0.4463154077529907, + "learning_rate": 2.19108402383864e-06, + "loss": 0.43678268790245056, + "step": 3122 + }, + { + "epoch": 0.7962774094849566, + "grad_norm": 0.43629640340805054, + "learning_rate": 2.185818485212128e-06, + "loss": 0.4486580491065979, + "step": 3123 + }, + { + "epoch": 0.7965323814380418, + "grad_norm": 0.44561508297920227, + "learning_rate": 2.1805585047407063e-06, + "loss": 0.45028600096702576, + "step": 3124 + }, + { + "epoch": 0.7967873533911269, + "grad_norm": 0.4386787712574005, + "learning_rate": 2.175304086165747e-06, + "loss": 0.43141692876815796, + "step": 3125 + }, + { + "epoch": 0.7970423253442122, + "grad_norm": 0.4359768033027649, + "learning_rate": 2.1700552332246695e-06, + "loss": 0.4307355284690857, + "step": 3126 + }, + { + "epoch": 0.7972972972972973, + "grad_norm": 0.4674769341945648, + "learning_rate": 2.1648119496509423e-06, + "loss": 0.4411862790584564, + "step": 3127 + }, + { + "epoch": 0.7975522692503825, + "grad_norm": 0.436692476272583, + "learning_rate": 2.1595742391740516e-06, + "loss": 0.43730098009109497, + "step": 3128 + }, + { + "epoch": 0.7978072412034676, + "grad_norm": 0.4336155354976654, + "learning_rate": 2.1543421055195425e-06, + "loss": 0.43345558643341064, + "step": 3129 + }, + { + "epoch": 0.7980622131565528, + "grad_norm": 0.43793782591819763, + "learning_rate": 2.1491155524089767e-06, + "loss": 0.43637627363204956, + "step": 3130 + }, + { + "epoch": 0.7983171851096379, + "grad_norm": 0.43755635619163513, + "learning_rate": 2.1438945835599556e-06, + "loss": 0.44143471121788025, + "step": 3131 + }, + { + "epoch": 0.7985721570627231, + "grad_norm": 0.45615267753601074, + "learning_rate": 2.1386792026861103e-06, + "loss": 0.4418499171733856, + "step": 3132 + }, + { + "epoch": 0.7988271290158082, + "grad_norm": 0.45956259965896606, + "learning_rate": 2.1334694134970892e-06, + "loss": 0.43504881858825684, + "step": 3133 + }, + { + "epoch": 0.7990821009688934, + "grad_norm": 0.46168217062950134, + "learning_rate": 2.1282652196985643e-06, + "loss": 0.426724374294281, + "step": 3134 + }, + { + "epoch": 0.7993370729219785, + "grad_norm": 0.4162600338459015, + "learning_rate": 2.123066624992236e-06, + "loss": 0.4324457049369812, + "step": 3135 + }, + { + "epoch": 0.7995920448750637, + "grad_norm": 0.43563950061798096, + "learning_rate": 2.117873633075812e-06, + "loss": 0.4513908326625824, + "step": 3136 + }, + { + "epoch": 0.7998470168281488, + "grad_norm": 0.4455576539039612, + "learning_rate": 2.1126862476430245e-06, + "loss": 0.43949684500694275, + "step": 3137 + }, + { + "epoch": 0.8001019887812341, + "grad_norm": 0.44459694623947144, + "learning_rate": 2.107504472383606e-06, + "loss": 0.4357977509498596, + "step": 3138 + }, + { + "epoch": 0.8003569607343193, + "grad_norm": 0.4605623781681061, + "learning_rate": 2.1023283109833106e-06, + "loss": 0.429220050573349, + "step": 3139 + }, + { + "epoch": 0.8006119326874044, + "grad_norm": 0.43185335397720337, + "learning_rate": 2.0971577671238987e-06, + "loss": 0.43885111808776855, + "step": 3140 + }, + { + "epoch": 0.8008669046404896, + "grad_norm": 0.42108774185180664, + "learning_rate": 2.0919928444831174e-06, + "loss": 0.4211594760417938, + "step": 3141 + }, + { + "epoch": 0.8011218765935747, + "grad_norm": 0.47686824202537537, + "learning_rate": 2.0868335467347367e-06, + "loss": 0.4349004924297333, + "step": 3142 + }, + { + "epoch": 0.8013768485466599, + "grad_norm": 0.45625442266464233, + "learning_rate": 2.081679877548519e-06, + "loss": 0.43549275398254395, + "step": 3143 + }, + { + "epoch": 0.801631820499745, + "grad_norm": 0.43894386291503906, + "learning_rate": 2.0765318405902158e-06, + "loss": 0.4453749656677246, + "step": 3144 + }, + { + "epoch": 0.8018867924528302, + "grad_norm": 0.44987383484840393, + "learning_rate": 2.071389439521584e-06, + "loss": 0.4443401098251343, + "step": 3145 + }, + { + "epoch": 0.8021417644059153, + "grad_norm": 0.42041513323783875, + "learning_rate": 2.066252678000362e-06, + "loss": 0.4356636703014374, + "step": 3146 + }, + { + "epoch": 0.8023967363590006, + "grad_norm": 0.46287089586257935, + "learning_rate": 2.06112155968028e-06, + "loss": 0.44034093618392944, + "step": 3147 + }, + { + "epoch": 0.8026517083120857, + "grad_norm": 0.44690874218940735, + "learning_rate": 2.055996088211061e-06, + "loss": 0.4372246265411377, + "step": 3148 + }, + { + "epoch": 0.8029066802651709, + "grad_norm": 0.4576677978038788, + "learning_rate": 2.0508762672383975e-06, + "loss": 0.43028396368026733, + "step": 3149 + }, + { + "epoch": 0.803161652218256, + "grad_norm": 0.46135053038597107, + "learning_rate": 2.045762100403975e-06, + "loss": 0.4304881691932678, + "step": 3150 + }, + { + "epoch": 0.8034166241713412, + "grad_norm": 0.43473130464553833, + "learning_rate": 2.040653591345455e-06, + "loss": 0.4307914674282074, + "step": 3151 + }, + { + "epoch": 0.8036715961244263, + "grad_norm": 0.42412862181663513, + "learning_rate": 2.0355507436964684e-06, + "loss": 0.4323742389678955, + "step": 3152 + }, + { + "epoch": 0.8039265680775115, + "grad_norm": 0.4608285129070282, + "learning_rate": 2.0304535610866285e-06, + "loss": 0.4228378236293793, + "step": 3153 + }, + { + "epoch": 0.8041815400305966, + "grad_norm": 0.44225743412971497, + "learning_rate": 2.0253620471415115e-06, + "loss": 0.45546650886535645, + "step": 3154 + }, + { + "epoch": 0.8044365119836818, + "grad_norm": 0.4197104573249817, + "learning_rate": 2.0202762054826617e-06, + "loss": 0.4291185736656189, + "step": 3155 + }, + { + "epoch": 0.8046914839367669, + "grad_norm": 0.4479086101055145, + "learning_rate": 2.015196039727597e-06, + "loss": 0.43910154700279236, + "step": 3156 + }, + { + "epoch": 0.8049464558898521, + "grad_norm": 0.4188553988933563, + "learning_rate": 2.0101215534897856e-06, + "loss": 0.4407055377960205, + "step": 3157 + }, + { + "epoch": 0.8052014278429372, + "grad_norm": 0.4461323022842407, + "learning_rate": 2.0050527503786686e-06, + "loss": 0.4346162676811218, + "step": 3158 + }, + { + "epoch": 0.8054563997960225, + "grad_norm": 0.4502837061882019, + "learning_rate": 1.9999896339996373e-06, + "loss": 0.4362154006958008, + "step": 3159 + }, + { + "epoch": 0.8057113717491076, + "grad_norm": 0.45032528042793274, + "learning_rate": 1.9949322079540354e-06, + "loss": 0.4282383322715759, + "step": 3160 + }, + { + "epoch": 0.8059663437021928, + "grad_norm": 0.4429026246070862, + "learning_rate": 1.9898804758391697e-06, + "loss": 0.4364677369594574, + "step": 3161 + }, + { + "epoch": 0.8062213156552779, + "grad_norm": 0.45870086550712585, + "learning_rate": 1.9848344412482856e-06, + "loss": 0.4519317150115967, + "step": 3162 + }, + { + "epoch": 0.8064762876083631, + "grad_norm": 0.4462481141090393, + "learning_rate": 1.979794107770582e-06, + "loss": 0.4192603528499603, + "step": 3163 + }, + { + "epoch": 0.8067312595614482, + "grad_norm": 0.4347079396247864, + "learning_rate": 1.9747594789912038e-06, + "loss": 0.45258253812789917, + "step": 3164 + }, + { + "epoch": 0.8069862315145334, + "grad_norm": 0.42841705679893494, + "learning_rate": 1.969730558491235e-06, + "loss": 0.4304022490978241, + "step": 3165 + }, + { + "epoch": 0.8072412034676185, + "grad_norm": 0.42129963636398315, + "learning_rate": 1.9647073498476977e-06, + "loss": 0.4637308120727539, + "step": 3166 + }, + { + "epoch": 0.8074961754207037, + "grad_norm": 0.4346625506877899, + "learning_rate": 1.9596898566335575e-06, + "loss": 0.43997952342033386, + "step": 3167 + }, + { + "epoch": 0.8077511473737888, + "grad_norm": 0.44551050662994385, + "learning_rate": 1.954678082417707e-06, + "loss": 0.4464991092681885, + "step": 3168 + }, + { + "epoch": 0.808006119326874, + "grad_norm": 0.44049108028411865, + "learning_rate": 1.9496720307649797e-06, + "loss": 0.4449331760406494, + "step": 3169 + }, + { + "epoch": 0.8082610912799592, + "grad_norm": 0.48131170868873596, + "learning_rate": 1.9446717052361286e-06, + "loss": 0.44099175930023193, + "step": 3170 + }, + { + "epoch": 0.8085160632330444, + "grad_norm": 0.44364720582962036, + "learning_rate": 1.939677109387841e-06, + "loss": 0.4477325677871704, + "step": 3171 + }, + { + "epoch": 0.8087710351861295, + "grad_norm": 0.422466903924942, + "learning_rate": 1.9346882467727323e-06, + "loss": 0.43594223260879517, + "step": 3172 + }, + { + "epoch": 0.8090260071392147, + "grad_norm": 0.4314025938510895, + "learning_rate": 1.929705120939326e-06, + "loss": 0.434553325176239, + "step": 3173 + }, + { + "epoch": 0.8092809790922999, + "grad_norm": 0.45429590344429016, + "learning_rate": 1.924727735432076e-06, + "loss": 0.44354602694511414, + "step": 3174 + }, + { + "epoch": 0.809535951045385, + "grad_norm": 0.4488116502761841, + "learning_rate": 1.9197560937913538e-06, + "loss": 0.4514884650707245, + "step": 3175 + }, + { + "epoch": 0.8097909229984702, + "grad_norm": 0.43512481451034546, + "learning_rate": 1.914790199553437e-06, + "loss": 0.43873876333236694, + "step": 3176 + }, + { + "epoch": 0.8100458949515553, + "grad_norm": 0.42961785197257996, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.4447658956050873, + "step": 3177 + }, + { + "epoch": 0.8103008669046405, + "grad_norm": 0.43365493416786194, + "learning_rate": 1.9048756674107228e-06, + "loss": 0.4348711669445038, + "step": 3178 + }, + { + "epoch": 0.8105558388577256, + "grad_norm": 0.4372931718826294, + "learning_rate": 1.8999270365580347e-06, + "loss": 0.453824520111084, + "step": 3179 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.42043957114219666, + "learning_rate": 1.8949841672123826e-06, + "loss": 0.4466125965118408, + "step": 3180 + }, + { + "epoch": 0.811065782763896, + "grad_norm": 0.4353785812854767, + "learning_rate": 1.8900470628895783e-06, + "loss": 0.44272512197494507, + "step": 3181 + }, + { + "epoch": 0.8113207547169812, + "grad_norm": 0.4317415654659271, + "learning_rate": 1.8851157271013443e-06, + "loss": 0.4332931935787201, + "step": 3182 + }, + { + "epoch": 0.8115757266700663, + "grad_norm": 0.4312629997730255, + "learning_rate": 1.8801901633552878e-06, + "loss": 0.43788594007492065, + "step": 3183 + }, + { + "epoch": 0.8118306986231515, + "grad_norm": 0.45187297463417053, + "learning_rate": 1.8752703751549207e-06, + "loss": 0.4394760727882385, + "step": 3184 + }, + { + "epoch": 0.8120856705762366, + "grad_norm": 0.44457483291625977, + "learning_rate": 1.8703563659996483e-06, + "loss": 0.42946380376815796, + "step": 3185 + }, + { + "epoch": 0.8123406425293218, + "grad_norm": 0.47339144349098206, + "learning_rate": 1.865448139384748e-06, + "loss": 0.4360730051994324, + "step": 3186 + }, + { + "epoch": 0.8125956144824069, + "grad_norm": 0.42321452498435974, + "learning_rate": 1.8605456988014014e-06, + "loss": 0.43954116106033325, + "step": 3187 + }, + { + "epoch": 0.8128505864354921, + "grad_norm": 0.4437929689884186, + "learning_rate": 1.8556490477366739e-06, + "loss": 0.42642879486083984, + "step": 3188 + }, + { + "epoch": 0.8131055583885772, + "grad_norm": 0.4351274371147156, + "learning_rate": 1.8507581896735004e-06, + "loss": 0.43298158049583435, + "step": 3189 + }, + { + "epoch": 0.8133605303416624, + "grad_norm": 0.4379459619522095, + "learning_rate": 1.8458731280907093e-06, + "loss": 0.4267469048500061, + "step": 3190 + }, + { + "epoch": 0.8136155022947476, + "grad_norm": 0.43378978967666626, + "learning_rate": 1.8409938664629978e-06, + "loss": 0.4531802535057068, + "step": 3191 + }, + { + "epoch": 0.8138704742478328, + "grad_norm": 0.4308216869831085, + "learning_rate": 1.8361204082609353e-06, + "loss": 0.44718247652053833, + "step": 3192 + }, + { + "epoch": 0.8141254462009179, + "grad_norm": 0.42755618691444397, + "learning_rate": 1.8312527569509741e-06, + "loss": 0.44727540016174316, + "step": 3193 + }, + { + "epoch": 0.8143804181540031, + "grad_norm": 0.4255657196044922, + "learning_rate": 1.8263909159954253e-06, + "loss": 0.4319005012512207, + "step": 3194 + }, + { + "epoch": 0.8146353901070882, + "grad_norm": 0.46537476778030396, + "learning_rate": 1.8215348888524709e-06, + "loss": 0.42908668518066406, + "step": 3195 + }, + { + "epoch": 0.8148903620601734, + "grad_norm": 0.4323773980140686, + "learning_rate": 1.8166846789761638e-06, + "loss": 0.434833288192749, + "step": 3196 + }, + { + "epoch": 0.8151453340132585, + "grad_norm": 0.4433038532733917, + "learning_rate": 1.811840289816409e-06, + "loss": 0.4452885091304779, + "step": 3197 + }, + { + "epoch": 0.8154003059663437, + "grad_norm": 0.43234574794769287, + "learning_rate": 1.8070017248189743e-06, + "loss": 0.4334978461265564, + "step": 3198 + }, + { + "epoch": 0.8156552779194288, + "grad_norm": 0.4315129816532135, + "learning_rate": 1.802168987425491e-06, + "loss": 0.44864726066589355, + "step": 3199 + }, + { + "epoch": 0.815910249872514, + "grad_norm": 0.46172434091567993, + "learning_rate": 1.7973420810734354e-06, + "loss": 0.43277662992477417, + "step": 3200 + }, + { + "epoch": 0.8161652218255991, + "grad_norm": 0.4356657564640045, + "learning_rate": 1.7925210091961464e-06, + "loss": 0.4305696189403534, + "step": 3201 + }, + { + "epoch": 0.8164201937786844, + "grad_norm": 0.4411664307117462, + "learning_rate": 1.7877057752228022e-06, + "loss": 0.42217326164245605, + "step": 3202 + }, + { + "epoch": 0.8166751657317695, + "grad_norm": 0.4304858148097992, + "learning_rate": 1.7828963825784385e-06, + "loss": 0.44613325595855713, + "step": 3203 + }, + { + "epoch": 0.8169301376848547, + "grad_norm": 0.4482412338256836, + "learning_rate": 1.7780928346839355e-06, + "loss": 0.445767879486084, + "step": 3204 + }, + { + "epoch": 0.8171851096379398, + "grad_norm": 0.4683200716972351, + "learning_rate": 1.7732951349560024e-06, + "loss": 0.4428790509700775, + "step": 3205 + }, + { + "epoch": 0.817440081591025, + "grad_norm": 0.4400186538696289, + "learning_rate": 1.768503286807206e-06, + "loss": 0.4351852536201477, + "step": 3206 + }, + { + "epoch": 0.8176950535441101, + "grad_norm": 0.4413929283618927, + "learning_rate": 1.763717293645939e-06, + "loss": 0.4290766417980194, + "step": 3207 + }, + { + "epoch": 0.8179500254971953, + "grad_norm": 0.44811487197875977, + "learning_rate": 1.758937158876437e-06, + "loss": 0.43229806423187256, + "step": 3208 + }, + { + "epoch": 0.8182049974502805, + "grad_norm": 0.44337981939315796, + "learning_rate": 1.754162885898768e-06, + "loss": 0.42842841148376465, + "step": 3209 + }, + { + "epoch": 0.8184599694033656, + "grad_norm": 0.4307434856891632, + "learning_rate": 1.7493944781088245e-06, + "loss": 0.44062185287475586, + "step": 3210 + }, + { + "epoch": 0.8187149413564508, + "grad_norm": 0.45656681060791016, + "learning_rate": 1.7446319388983312e-06, + "loss": 0.4341960549354553, + "step": 3211 + }, + { + "epoch": 0.818969913309536, + "grad_norm": 0.4163688123226166, + "learning_rate": 1.7398752716548395e-06, + "loss": 0.4489062428474426, + "step": 3212 + }, + { + "epoch": 0.8192248852626212, + "grad_norm": 0.4449394941329956, + "learning_rate": 1.7351244797617218e-06, + "loss": 0.44037866592407227, + "step": 3213 + }, + { + "epoch": 0.8194798572157063, + "grad_norm": 0.4564632177352905, + "learning_rate": 1.7303795665981738e-06, + "loss": 0.4470926523208618, + "step": 3214 + }, + { + "epoch": 0.8197348291687915, + "grad_norm": 0.44864529371261597, + "learning_rate": 1.725640535539206e-06, + "loss": 0.4593150317668915, + "step": 3215 + }, + { + "epoch": 0.8199898011218766, + "grad_norm": 0.4310421943664551, + "learning_rate": 1.7209073899556504e-06, + "loss": 0.4326831102371216, + "step": 3216 + }, + { + "epoch": 0.8202447730749618, + "grad_norm": 0.4456369876861572, + "learning_rate": 1.7161801332141492e-06, + "loss": 0.43467840552330017, + "step": 3217 + }, + { + "epoch": 0.8204997450280469, + "grad_norm": 0.45582112669944763, + "learning_rate": 1.7114587686771533e-06, + "loss": 0.4258680045604706, + "step": 3218 + }, + { + "epoch": 0.8207547169811321, + "grad_norm": 0.4281396269798279, + "learning_rate": 1.7067432997029265e-06, + "loss": 0.4291533827781677, + "step": 3219 + }, + { + "epoch": 0.8210096889342172, + "grad_norm": 0.4517004191875458, + "learning_rate": 1.7020337296455435e-06, + "loss": 0.4399279057979584, + "step": 3220 + }, + { + "epoch": 0.8212646608873024, + "grad_norm": 0.4636811912059784, + "learning_rate": 1.6973300618548705e-06, + "loss": 0.44841358065605164, + "step": 3221 + }, + { + "epoch": 0.8215196328403875, + "grad_norm": 0.44340163469314575, + "learning_rate": 1.6926322996765899e-06, + "loss": 0.42988523840904236, + "step": 3222 + }, + { + "epoch": 0.8217746047934728, + "grad_norm": 0.43543392419815063, + "learning_rate": 1.687940446452172e-06, + "loss": 0.4322485327720642, + "step": 3223 + }, + { + "epoch": 0.8220295767465579, + "grad_norm": 0.4247783422470093, + "learning_rate": 1.6832545055188886e-06, + "loss": 0.4405077397823334, + "step": 3224 + }, + { + "epoch": 0.8222845486996431, + "grad_norm": 0.4364262819290161, + "learning_rate": 1.678574480209809e-06, + "loss": 0.4284090995788574, + "step": 3225 + }, + { + "epoch": 0.8225395206527282, + "grad_norm": 0.4634256660938263, + "learning_rate": 1.6739003738537874e-06, + "loss": 0.43978071212768555, + "step": 3226 + }, + { + "epoch": 0.8227944926058134, + "grad_norm": 0.44353532791137695, + "learning_rate": 1.669232189775476e-06, + "loss": 0.43755969405174255, + "step": 3227 + }, + { + "epoch": 0.8230494645588985, + "grad_norm": 0.43164727091789246, + "learning_rate": 1.6645699312953123e-06, + "loss": 0.44416743516921997, + "step": 3228 + }, + { + "epoch": 0.8233044365119837, + "grad_norm": 0.5508279204368591, + "learning_rate": 1.6599136017295159e-06, + "loss": 0.42011135816574097, + "step": 3229 + }, + { + "epoch": 0.8235594084650688, + "grad_norm": 0.4682815670967102, + "learning_rate": 1.6552632043900885e-06, + "loss": 0.45232391357421875, + "step": 3230 + }, + { + "epoch": 0.823814380418154, + "grad_norm": 0.442646324634552, + "learning_rate": 1.6506187425848196e-06, + "loss": 0.4356235861778259, + "step": 3231 + }, + { + "epoch": 0.8240693523712391, + "grad_norm": 0.45952385663986206, + "learning_rate": 1.645980219617267e-06, + "loss": 0.4367219805717468, + "step": 3232 + }, + { + "epoch": 0.8243243243243243, + "grad_norm": 0.44393765926361084, + "learning_rate": 1.6413476387867732e-06, + "loss": 0.44239887595176697, + "step": 3233 + }, + { + "epoch": 0.8245792962774094, + "grad_norm": 0.4498979151248932, + "learning_rate": 1.6367210033884474e-06, + "loss": 0.4374240040779114, + "step": 3234 + }, + { + "epoch": 0.8248342682304947, + "grad_norm": 0.4651869833469391, + "learning_rate": 1.6321003167131743e-06, + "loss": 0.4354780912399292, + "step": 3235 + }, + { + "epoch": 0.8250892401835798, + "grad_norm": 0.42792996764183044, + "learning_rate": 1.6274855820476065e-06, + "loss": 0.44007962942123413, + "step": 3236 + }, + { + "epoch": 0.825344212136665, + "grad_norm": 0.4541287422180176, + "learning_rate": 1.622876802674158e-06, + "loss": 0.43488869071006775, + "step": 3237 + }, + { + "epoch": 0.8255991840897501, + "grad_norm": 0.4381212294101715, + "learning_rate": 1.6182739818710158e-06, + "loss": 0.4432847499847412, + "step": 3238 + }, + { + "epoch": 0.8258541560428353, + "grad_norm": 0.47068390250205994, + "learning_rate": 1.6136771229121195e-06, + "loss": 0.43802642822265625, + "step": 3239 + }, + { + "epoch": 0.8261091279959204, + "grad_norm": 0.4551355242729187, + "learning_rate": 1.6090862290671738e-06, + "loss": 0.43210625648498535, + "step": 3240 + }, + { + "epoch": 0.8263640999490056, + "grad_norm": 0.44001293182373047, + "learning_rate": 1.6045013036016422e-06, + "loss": 0.44448336958885193, + "step": 3241 + }, + { + "epoch": 0.8266190719020907, + "grad_norm": 0.43821027874946594, + "learning_rate": 1.5999223497767379e-06, + "loss": 0.4449557065963745, + "step": 3242 + }, + { + "epoch": 0.8268740438551759, + "grad_norm": 0.4449080526828766, + "learning_rate": 1.5953493708494262e-06, + "loss": 0.4334353804588318, + "step": 3243 + }, + { + "epoch": 0.827129015808261, + "grad_norm": 0.42738792300224304, + "learning_rate": 1.590782370072428e-06, + "loss": 0.43336939811706543, + "step": 3244 + }, + { + "epoch": 0.8273839877613463, + "grad_norm": 0.43144291639328003, + "learning_rate": 1.5862213506942037e-06, + "loss": 0.43666523694992065, + "step": 3245 + }, + { + "epoch": 0.8276389597144315, + "grad_norm": 0.4348338842391968, + "learning_rate": 1.5816663159589719e-06, + "loss": 0.4477895498275757, + "step": 3246 + }, + { + "epoch": 0.8278939316675166, + "grad_norm": 0.4303411543369293, + "learning_rate": 1.5771172691066793e-06, + "loss": 0.4419710338115692, + "step": 3247 + }, + { + "epoch": 0.8281489036206018, + "grad_norm": 0.46197861433029175, + "learning_rate": 1.572574213373027e-06, + "loss": 0.42960870265960693, + "step": 3248 + }, + { + "epoch": 0.8284038755736869, + "grad_norm": 0.4203360974788666, + "learning_rate": 1.5680371519894444e-06, + "loss": 0.443248450756073, + "step": 3249 + }, + { + "epoch": 0.8286588475267721, + "grad_norm": 0.47939595580101013, + "learning_rate": 1.563506088183102e-06, + "loss": 0.4249131381511688, + "step": 3250 + }, + { + "epoch": 0.8289138194798572, + "grad_norm": 0.4603368639945984, + "learning_rate": 1.5589810251769034e-06, + "loss": 0.43305230140686035, + "step": 3251 + }, + { + "epoch": 0.8291687914329424, + "grad_norm": 0.43101152777671814, + "learning_rate": 1.5544619661894866e-06, + "loss": 0.42263245582580566, + "step": 3252 + }, + { + "epoch": 0.8294237633860275, + "grad_norm": 0.46168243885040283, + "learning_rate": 1.5499489144352131e-06, + "loss": 0.41834792494773865, + "step": 3253 + }, + { + "epoch": 0.8296787353391127, + "grad_norm": 0.47389018535614014, + "learning_rate": 1.5454418731241793e-06, + "loss": 0.45298704504966736, + "step": 3254 + }, + { + "epoch": 0.8299337072921978, + "grad_norm": 0.4682997167110443, + "learning_rate": 1.5409408454621987e-06, + "loss": 0.44076794385910034, + "step": 3255 + }, + { + "epoch": 0.8301886792452831, + "grad_norm": 0.45663902163505554, + "learning_rate": 1.5364458346508093e-06, + "loss": 0.44266772270202637, + "step": 3256 + }, + { + "epoch": 0.8304436511983682, + "grad_norm": 0.4399891793727875, + "learning_rate": 1.5319568438872746e-06, + "loss": 0.4307417869567871, + "step": 3257 + }, + { + "epoch": 0.8306986231514534, + "grad_norm": 0.4241107702255249, + "learning_rate": 1.527473876364568e-06, + "loss": 0.43898722529411316, + "step": 3258 + }, + { + "epoch": 0.8309535951045385, + "grad_norm": 0.5689083933830261, + "learning_rate": 1.522996935271388e-06, + "loss": 0.44163841009140015, + "step": 3259 + }, + { + "epoch": 0.8312085670576237, + "grad_norm": 0.4378184974193573, + "learning_rate": 1.5185260237921361e-06, + "loss": 0.43709036707878113, + "step": 3260 + }, + { + "epoch": 0.8314635390107088, + "grad_norm": 0.43571507930755615, + "learning_rate": 1.5140611451069354e-06, + "loss": 0.44041547179222107, + "step": 3261 + }, + { + "epoch": 0.831718510963794, + "grad_norm": 0.44554468989372253, + "learning_rate": 1.5096023023916096e-06, + "loss": 0.435102641582489, + "step": 3262 + }, + { + "epoch": 0.8319734829168791, + "grad_norm": 0.46228763461112976, + "learning_rate": 1.5051494988176917e-06, + "loss": 0.43231040239334106, + "step": 3263 + }, + { + "epoch": 0.8322284548699643, + "grad_norm": 0.45202240347862244, + "learning_rate": 1.5007027375524209e-06, + "loss": 0.4369967579841614, + "step": 3264 + }, + { + "epoch": 0.8324834268230494, + "grad_norm": 0.45653805136680603, + "learning_rate": 1.4962620217587386e-06, + "loss": 0.4365310072898865, + "step": 3265 + }, + { + "epoch": 0.8327383987761346, + "grad_norm": 0.44089001417160034, + "learning_rate": 1.4918273545952833e-06, + "loss": 0.4460240304470062, + "step": 3266 + }, + { + "epoch": 0.8329933707292198, + "grad_norm": 0.43628019094467163, + "learning_rate": 1.4873987392163946e-06, + "loss": 0.4349481463432312, + "step": 3267 + }, + { + "epoch": 0.833248342682305, + "grad_norm": 0.4397881031036377, + "learning_rate": 1.4829761787721042e-06, + "loss": 0.438412070274353, + "step": 3268 + }, + { + "epoch": 0.8335033146353901, + "grad_norm": 0.4743090569972992, + "learning_rate": 1.4785596764081366e-06, + "loss": 0.4575331211090088, + "step": 3269 + }, + { + "epoch": 0.8337582865884753, + "grad_norm": 0.4318879246711731, + "learning_rate": 1.4741492352659137e-06, + "loss": 0.43636226654052734, + "step": 3270 + }, + { + "epoch": 0.8340132585415604, + "grad_norm": 0.4453240931034088, + "learning_rate": 1.4697448584825369e-06, + "loss": 0.4309747815132141, + "step": 3271 + }, + { + "epoch": 0.8342682304946456, + "grad_norm": 0.42908501625061035, + "learning_rate": 1.4653465491908003e-06, + "loss": 0.445771187543869, + "step": 3272 + }, + { + "epoch": 0.8345232024477307, + "grad_norm": 0.4467611610889435, + "learning_rate": 1.4609543105191837e-06, + "loss": 0.45422136783599854, + "step": 3273 + }, + { + "epoch": 0.8347781744008159, + "grad_norm": 0.4240168631076813, + "learning_rate": 1.4565681455918434e-06, + "loss": 0.442562460899353, + "step": 3274 + }, + { + "epoch": 0.835033146353901, + "grad_norm": 0.4435538649559021, + "learning_rate": 1.452188057528615e-06, + "loss": 0.42605406045913696, + "step": 3275 + }, + { + "epoch": 0.8352881183069862, + "grad_norm": 0.46490219235420227, + "learning_rate": 1.4478140494450211e-06, + "loss": 0.4366442561149597, + "step": 3276 + }, + { + "epoch": 0.8355430902600713, + "grad_norm": 0.43472519516944885, + "learning_rate": 1.4434461244522458e-06, + "loss": 0.44394561648368835, + "step": 3277 + }, + { + "epoch": 0.8357980622131566, + "grad_norm": 0.424283504486084, + "learning_rate": 1.4390842856571607e-06, + "loss": 0.4361807703971863, + "step": 3278 + }, + { + "epoch": 0.8360530341662417, + "grad_norm": 0.4511633813381195, + "learning_rate": 1.4347285361622966e-06, + "loss": 0.4422985911369324, + "step": 3279 + }, + { + "epoch": 0.8363080061193269, + "grad_norm": 0.45547357201576233, + "learning_rate": 1.4303788790658613e-06, + "loss": 0.4446526765823364, + "step": 3280 + }, + { + "epoch": 0.8365629780724121, + "grad_norm": 0.44244518876075745, + "learning_rate": 1.4260353174617237e-06, + "loss": 0.4395790994167328, + "step": 3281 + }, + { + "epoch": 0.8368179500254972, + "grad_norm": 0.44289788603782654, + "learning_rate": 1.4216978544394177e-06, + "loss": 0.4399866759777069, + "step": 3282 + }, + { + "epoch": 0.8370729219785824, + "grad_norm": 0.4806705117225647, + "learning_rate": 1.4173664930841414e-06, + "loss": 0.43484628200531006, + "step": 3283 + }, + { + "epoch": 0.8373278939316675, + "grad_norm": 0.444036602973938, + "learning_rate": 1.4130412364767565e-06, + "loss": 0.43715572357177734, + "step": 3284 + }, + { + "epoch": 0.8375828658847527, + "grad_norm": 0.44062647223472595, + "learning_rate": 1.4087220876937714e-06, + "loss": 0.4454675316810608, + "step": 3285 + }, + { + "epoch": 0.8378378378378378, + "grad_norm": 0.44224995374679565, + "learning_rate": 1.4044090498073625e-06, + "loss": 0.44212135672569275, + "step": 3286 + }, + { + "epoch": 0.838092809790923, + "grad_norm": 0.41917848587036133, + "learning_rate": 1.400102125885351e-06, + "loss": 0.427121102809906, + "step": 3287 + }, + { + "epoch": 0.8383477817440081, + "grad_norm": 0.4165647625923157, + "learning_rate": 1.395801318991209e-06, + "loss": 0.4368363320827484, + "step": 3288 + }, + { + "epoch": 0.8386027536970934, + "grad_norm": 0.4660046100616455, + "learning_rate": 1.3915066321840675e-06, + "loss": 0.4286162853240967, + "step": 3289 + }, + { + "epoch": 0.8388577256501785, + "grad_norm": 0.4394930899143219, + "learning_rate": 1.387218068518691e-06, + "loss": 0.44216713309288025, + "step": 3290 + }, + { + "epoch": 0.8391126976032637, + "grad_norm": 0.4506072998046875, + "learning_rate": 1.382935631045501e-06, + "loss": 0.422912061214447, + "step": 3291 + }, + { + "epoch": 0.8393676695563488, + "grad_norm": 0.4367363154888153, + "learning_rate": 1.3786593228105494e-06, + "loss": 0.4353519082069397, + "step": 3292 + }, + { + "epoch": 0.839622641509434, + "grad_norm": 0.441277414560318, + "learning_rate": 1.3743891468555415e-06, + "loss": 0.45142579078674316, + "step": 3293 + }, + { + "epoch": 0.8398776134625191, + "grad_norm": 0.4367586672306061, + "learning_rate": 1.3701251062178101e-06, + "loss": 0.4367350935935974, + "step": 3294 + }, + { + "epoch": 0.8401325854156043, + "grad_norm": 0.4555458426475525, + "learning_rate": 1.3658672039303266e-06, + "loss": 0.45928090810775757, + "step": 3295 + }, + { + "epoch": 0.8403875573686894, + "grad_norm": 0.43432533740997314, + "learning_rate": 1.3616154430216989e-06, + "loss": 0.44207262992858887, + "step": 3296 + }, + { + "epoch": 0.8406425293217746, + "grad_norm": 0.4285244941711426, + "learning_rate": 1.3573698265161683e-06, + "loss": 0.4286430776119232, + "step": 3297 + }, + { + "epoch": 0.8408975012748597, + "grad_norm": 0.43814343214035034, + "learning_rate": 1.3531303574335973e-06, + "loss": 0.4361557960510254, + "step": 3298 + }, + { + "epoch": 0.841152473227945, + "grad_norm": 0.43010541796684265, + "learning_rate": 1.3488970387894874e-06, + "loss": 0.4290144145488739, + "step": 3299 + }, + { + "epoch": 0.8414074451810301, + "grad_norm": 0.5369753837585449, + "learning_rate": 1.3446698735949537e-06, + "loss": 0.43052634596824646, + "step": 3300 + }, + { + "epoch": 0.8416624171341153, + "grad_norm": 0.4521333873271942, + "learning_rate": 1.3404488648567405e-06, + "loss": 0.4416801631450653, + "step": 3301 + }, + { + "epoch": 0.8419173890872004, + "grad_norm": 0.420360803604126, + "learning_rate": 1.3362340155772147e-06, + "loss": 0.4436587691307068, + "step": 3302 + }, + { + "epoch": 0.8421723610402856, + "grad_norm": 0.4203634262084961, + "learning_rate": 1.332025328754356e-06, + "loss": 0.4210076332092285, + "step": 3303 + }, + { + "epoch": 0.8424273329933707, + "grad_norm": 0.448469340801239, + "learning_rate": 1.3278228073817667e-06, + "loss": 0.4314681887626648, + "step": 3304 + }, + { + "epoch": 0.8426823049464559, + "grad_norm": 0.4256279766559601, + "learning_rate": 1.3236264544486631e-06, + "loss": 0.4425191879272461, + "step": 3305 + }, + { + "epoch": 0.842937276899541, + "grad_norm": 0.4700623154640198, + "learning_rate": 1.3194362729398702e-06, + "loss": 0.44431278109550476, + "step": 3306 + }, + { + "epoch": 0.8431922488526262, + "grad_norm": 0.4359423518180847, + "learning_rate": 1.3152522658358247e-06, + "loss": 0.4293837249279022, + "step": 3307 + }, + { + "epoch": 0.8434472208057113, + "grad_norm": 0.4656182825565338, + "learning_rate": 1.311074436112575e-06, + "loss": 0.4318646788597107, + "step": 3308 + }, + { + "epoch": 0.8437021927587965, + "grad_norm": 0.46945562958717346, + "learning_rate": 1.3069027867417682e-06, + "loss": 0.4293580651283264, + "step": 3309 + }, + { + "epoch": 0.8439571647118816, + "grad_norm": 0.4682983458042145, + "learning_rate": 1.3027373206906646e-06, + "loss": 0.4242051839828491, + "step": 3310 + }, + { + "epoch": 0.8442121366649669, + "grad_norm": 0.4534981846809387, + "learning_rate": 1.2985780409221183e-06, + "loss": 0.4334355592727661, + "step": 3311 + }, + { + "epoch": 0.844467108618052, + "grad_norm": 0.4599556028842926, + "learning_rate": 1.2944249503945894e-06, + "loss": 0.4311177134513855, + "step": 3312 + }, + { + "epoch": 0.8447220805711372, + "grad_norm": 0.41913193464279175, + "learning_rate": 1.2902780520621328e-06, + "loss": 0.4342377185821533, + "step": 3313 + }, + { + "epoch": 0.8449770525242223, + "grad_norm": 0.4389280378818512, + "learning_rate": 1.2861373488743934e-06, + "loss": 0.4300832152366638, + "step": 3314 + }, + { + "epoch": 0.8452320244773075, + "grad_norm": 0.45004579424858093, + "learning_rate": 1.282002843776623e-06, + "loss": 0.4427967071533203, + "step": 3315 + }, + { + "epoch": 0.8454869964303927, + "grad_norm": 0.42154547572135925, + "learning_rate": 1.2778745397096503e-06, + "loss": 0.42205655574798584, + "step": 3316 + }, + { + "epoch": 0.8457419683834778, + "grad_norm": 0.43388786911964417, + "learning_rate": 1.2737524396099033e-06, + "loss": 0.43303999304771423, + "step": 3317 + }, + { + "epoch": 0.845996940336563, + "grad_norm": 0.445159375667572, + "learning_rate": 1.269636546409394e-06, + "loss": 0.43945175409317017, + "step": 3318 + }, + { + "epoch": 0.8462519122896481, + "grad_norm": 0.43009161949157715, + "learning_rate": 1.2655268630357197e-06, + "loss": 0.43136143684387207, + "step": 3319 + }, + { + "epoch": 0.8465068842427333, + "grad_norm": 0.43622779846191406, + "learning_rate": 1.2614233924120555e-06, + "loss": 0.4358550012111664, + "step": 3320 + }, + { + "epoch": 0.8467618561958185, + "grad_norm": 0.44786137342453003, + "learning_rate": 1.2573261374571677e-06, + "loss": 0.4262104034423828, + "step": 3321 + }, + { + "epoch": 0.8470168281489037, + "grad_norm": 0.4478050172328949, + "learning_rate": 1.2532351010853916e-06, + "loss": 0.43247491121292114, + "step": 3322 + }, + { + "epoch": 0.8472718001019888, + "grad_norm": 0.46326589584350586, + "learning_rate": 1.2491502862066484e-06, + "loss": 0.42917340993881226, + "step": 3323 + }, + { + "epoch": 0.847526772055074, + "grad_norm": 0.4584805965423584, + "learning_rate": 1.2450716957264232e-06, + "loss": 0.43868932127952576, + "step": 3324 + }, + { + "epoch": 0.8477817440081591, + "grad_norm": 0.4304777979850769, + "learning_rate": 1.2409993325457848e-06, + "loss": 0.4443057179450989, + "step": 3325 + }, + { + "epoch": 0.8480367159612443, + "grad_norm": 0.45579901337623596, + "learning_rate": 1.2369331995613664e-06, + "loss": 0.449260413646698, + "step": 3326 + }, + { + "epoch": 0.8482916879143294, + "grad_norm": 0.43227526545524597, + "learning_rate": 1.232873299665367e-06, + "loss": 0.436143159866333, + "step": 3327 + }, + { + "epoch": 0.8485466598674146, + "grad_norm": 0.45995062589645386, + "learning_rate": 1.2288196357455584e-06, + "loss": 0.455481618642807, + "step": 3328 + }, + { + "epoch": 0.8488016318204997, + "grad_norm": 0.5433409810066223, + "learning_rate": 1.2247722106852777e-06, + "loss": 0.4205161929130554, + "step": 3329 + }, + { + "epoch": 0.8490566037735849, + "grad_norm": 0.44447267055511475, + "learning_rate": 1.2207310273634155e-06, + "loss": 0.44169408082962036, + "step": 3330 + }, + { + "epoch": 0.84931157572667, + "grad_norm": 0.4399317502975464, + "learning_rate": 1.2166960886544322e-06, + "loss": 0.4465487003326416, + "step": 3331 + }, + { + "epoch": 0.8495665476797553, + "grad_norm": 0.4159085750579834, + "learning_rate": 1.212667397428342e-06, + "loss": 0.4228609502315521, + "step": 3332 + }, + { + "epoch": 0.8498215196328404, + "grad_norm": 0.43474212288856506, + "learning_rate": 1.2086449565507107e-06, + "loss": 0.4253726899623871, + "step": 3333 + }, + { + "epoch": 0.8500764915859256, + "grad_norm": 0.44543296098709106, + "learning_rate": 1.2046287688826708e-06, + "loss": 0.4422019422054291, + "step": 3334 + }, + { + "epoch": 0.8503314635390107, + "grad_norm": 0.4402267336845398, + "learning_rate": 1.2006188372808925e-06, + "loss": 0.44364506006240845, + "step": 3335 + }, + { + "epoch": 0.8505864354920959, + "grad_norm": 0.44199803471565247, + "learning_rate": 1.1966151645976087e-06, + "loss": 0.42757415771484375, + "step": 3336 + }, + { + "epoch": 0.850841407445181, + "grad_norm": 0.43642616271972656, + "learning_rate": 1.1926177536805905e-06, + "loss": 0.43112000823020935, + "step": 3337 + }, + { + "epoch": 0.8510963793982662, + "grad_norm": 0.45532628893852234, + "learning_rate": 1.1886266073731633e-06, + "loss": 0.4462944269180298, + "step": 3338 + }, + { + "epoch": 0.8513513513513513, + "grad_norm": 0.4233347475528717, + "learning_rate": 1.184641728514191e-06, + "loss": 0.43310827016830444, + "step": 3339 + }, + { + "epoch": 0.8516063233044365, + "grad_norm": 0.4422180652618408, + "learning_rate": 1.1806631199380781e-06, + "loss": 0.4407688081264496, + "step": 3340 + }, + { + "epoch": 0.8518612952575216, + "grad_norm": 0.4382367730140686, + "learning_rate": 1.1766907844747744e-06, + "loss": 0.4392991065979004, + "step": 3341 + }, + { + "epoch": 0.8521162672106068, + "grad_norm": 0.44065603613853455, + "learning_rate": 1.1727247249497686e-06, + "loss": 0.4309106767177582, + "step": 3342 + }, + { + "epoch": 0.852371239163692, + "grad_norm": 0.43926700949668884, + "learning_rate": 1.1687649441840764e-06, + "loss": 0.436871737241745, + "step": 3343 + }, + { + "epoch": 0.8526262111167772, + "grad_norm": 0.4672529399394989, + "learning_rate": 1.1648114449942583e-06, + "loss": 0.4357472360134125, + "step": 3344 + }, + { + "epoch": 0.8528811830698623, + "grad_norm": 0.4307812452316284, + "learning_rate": 1.160864230192399e-06, + "loss": 0.4353850781917572, + "step": 3345 + }, + { + "epoch": 0.8531361550229475, + "grad_norm": 0.44444510340690613, + "learning_rate": 1.1569233025861147e-06, + "loss": 0.4323183000087738, + "step": 3346 + }, + { + "epoch": 0.8533911269760326, + "grad_norm": 0.42350655794143677, + "learning_rate": 1.152988664978556e-06, + "loss": 0.4409070909023285, + "step": 3347 + }, + { + "epoch": 0.8536460989291178, + "grad_norm": 0.44289278984069824, + "learning_rate": 1.149060320168387e-06, + "loss": 0.44024693965911865, + "step": 3348 + }, + { + "epoch": 0.8539010708822029, + "grad_norm": 0.45462486147880554, + "learning_rate": 1.145138270949807e-06, + "loss": 0.4491046369075775, + "step": 3349 + }, + { + "epoch": 0.8541560428352881, + "grad_norm": 0.42163506150245667, + "learning_rate": 1.1412225201125359e-06, + "loss": 0.4273730218410492, + "step": 3350 + }, + { + "epoch": 0.8544110147883732, + "grad_norm": 0.4252319037914276, + "learning_rate": 1.1373130704418089e-06, + "loss": 0.4343377351760864, + "step": 3351 + }, + { + "epoch": 0.8546659867414584, + "grad_norm": 0.43090590834617615, + "learning_rate": 1.1334099247183782e-06, + "loss": 0.44295868277549744, + "step": 3352 + }, + { + "epoch": 0.8549209586945437, + "grad_norm": 0.43933114409446716, + "learning_rate": 1.12951308571852e-06, + "loss": 0.42852818965911865, + "step": 3353 + }, + { + "epoch": 0.8551759306476288, + "grad_norm": 0.8181011080741882, + "learning_rate": 1.1256225562140155e-06, + "loss": 0.4561828374862671, + "step": 3354 + }, + { + "epoch": 0.855430902600714, + "grad_norm": 0.4378681778907776, + "learning_rate": 1.121738338972167e-06, + "loss": 0.4349752366542816, + "step": 3355 + }, + { + "epoch": 0.8556858745537991, + "grad_norm": 0.42967164516448975, + "learning_rate": 1.1178604367557776e-06, + "loss": 0.437456339597702, + "step": 3356 + }, + { + "epoch": 0.8559408465068843, + "grad_norm": 0.4497753381729126, + "learning_rate": 1.113988852323168e-06, + "loss": 0.42805370688438416, + "step": 3357 + }, + { + "epoch": 0.8561958184599694, + "grad_norm": 0.5577053427696228, + "learning_rate": 1.110123588428159e-06, + "loss": 0.4310586452484131, + "step": 3358 + }, + { + "epoch": 0.8564507904130546, + "grad_norm": 0.44477880001068115, + "learning_rate": 1.1062646478200735e-06, + "loss": 0.43208011984825134, + "step": 3359 + }, + { + "epoch": 0.8567057623661397, + "grad_norm": 0.43552449345588684, + "learning_rate": 1.1024120332437428e-06, + "loss": 0.4297208786010742, + "step": 3360 + }, + { + "epoch": 0.8569607343192249, + "grad_norm": 0.453593909740448, + "learning_rate": 1.0985657474394984e-06, + "loss": 0.4424916207790375, + "step": 3361 + }, + { + "epoch": 0.85721570627231, + "grad_norm": 0.4612596035003662, + "learning_rate": 1.0947257931431644e-06, + "loss": 0.44119787216186523, + "step": 3362 + }, + { + "epoch": 0.8574706782253952, + "grad_norm": 0.44879037141799927, + "learning_rate": 1.09089217308607e-06, + "loss": 0.4400937557220459, + "step": 3363 + }, + { + "epoch": 0.8577256501784803, + "grad_norm": 0.442455530166626, + "learning_rate": 1.0870648899950254e-06, + "loss": 0.4569939076900482, + "step": 3364 + }, + { + "epoch": 0.8579806221315656, + "grad_norm": 0.44891437888145447, + "learning_rate": 1.083243946592346e-06, + "loss": 0.4352353513240814, + "step": 3365 + }, + { + "epoch": 0.8582355940846507, + "grad_norm": 0.4200226962566376, + "learning_rate": 1.079429345595836e-06, + "loss": 0.44492048025131226, + "step": 3366 + }, + { + "epoch": 0.8584905660377359, + "grad_norm": 0.41359463334083557, + "learning_rate": 1.0756210897187812e-06, + "loss": 0.4444793164730072, + "step": 3367 + }, + { + "epoch": 0.858745537990821, + "grad_norm": 0.45372694730758667, + "learning_rate": 1.0718191816699631e-06, + "loss": 0.4389488101005554, + "step": 3368 + }, + { + "epoch": 0.8590005099439062, + "grad_norm": 0.4190084636211395, + "learning_rate": 1.06802362415364e-06, + "loss": 0.45514458417892456, + "step": 3369 + }, + { + "epoch": 0.8592554818969913, + "grad_norm": 0.41038715839385986, + "learning_rate": 1.064234419869561e-06, + "loss": 0.43614017963409424, + "step": 3370 + }, + { + "epoch": 0.8595104538500765, + "grad_norm": 0.42078742384910583, + "learning_rate": 1.0604515715129481e-06, + "loss": 0.43874263763427734, + "step": 3371 + }, + { + "epoch": 0.8597654258031616, + "grad_norm": 0.42877811193466187, + "learning_rate": 1.0566750817745076e-06, + "loss": 0.43939733505249023, + "step": 3372 + }, + { + "epoch": 0.8600203977562468, + "grad_norm": 0.5327430367469788, + "learning_rate": 1.0529049533404223e-06, + "loss": 0.4264140725135803, + "step": 3373 + }, + { + "epoch": 0.8602753697093319, + "grad_norm": 0.45152685046195984, + "learning_rate": 1.0491411888923509e-06, + "loss": 0.430926650762558, + "step": 3374 + }, + { + "epoch": 0.8605303416624172, + "grad_norm": 0.43535324931144714, + "learning_rate": 1.0453837911074204e-06, + "loss": 0.43031561374664307, + "step": 3375 + }, + { + "epoch": 0.8607853136155023, + "grad_norm": 0.43099328875541687, + "learning_rate": 1.041632762658238e-06, + "loss": 0.44012102484703064, + "step": 3376 + }, + { + "epoch": 0.8610402855685875, + "grad_norm": 0.4511074125766754, + "learning_rate": 1.0378881062128732e-06, + "loss": 0.4425766170024872, + "step": 3377 + }, + { + "epoch": 0.8612952575216726, + "grad_norm": 0.4516361951828003, + "learning_rate": 1.0341498244348636e-06, + "loss": 0.43071991205215454, + "step": 3378 + }, + { + "epoch": 0.8615502294747578, + "grad_norm": 0.424569696187973, + "learning_rate": 1.0304179199832187e-06, + "loss": 0.44217395782470703, + "step": 3379 + }, + { + "epoch": 0.8618052014278429, + "grad_norm": 0.735274612903595, + "learning_rate": 1.0266923955124032e-06, + "loss": 0.4275852143764496, + "step": 3380 + }, + { + "epoch": 0.8620601733809281, + "grad_norm": 0.4615504741668701, + "learning_rate": 1.0229732536723502e-06, + "loss": 0.4398762583732605, + "step": 3381 + }, + { + "epoch": 0.8623151453340132, + "grad_norm": 0.43356427550315857, + "learning_rate": 1.019260497108453e-06, + "loss": 0.4225383400917053, + "step": 3382 + }, + { + "epoch": 0.8625701172870984, + "grad_norm": 0.4367641806602478, + "learning_rate": 1.01555412846156e-06, + "loss": 0.4380742013454437, + "step": 3383 + }, + { + "epoch": 0.8628250892401835, + "grad_norm": 0.44011420011520386, + "learning_rate": 1.0118541503679724e-06, + "loss": 0.4346046447753906, + "step": 3384 + }, + { + "epoch": 0.8630800611932687, + "grad_norm": 0.4376404881477356, + "learning_rate": 1.0081605654594573e-06, + "loss": 0.42278382182121277, + "step": 3385 + }, + { + "epoch": 0.8633350331463538, + "grad_norm": 0.46831297874450684, + "learning_rate": 1.004473376363222e-06, + "loss": 0.43210935592651367, + "step": 3386 + }, + { + "epoch": 0.8635900050994391, + "grad_norm": 0.42311087250709534, + "learning_rate": 1.000792585701934e-06, + "loss": 0.4354386329650879, + "step": 3387 + }, + { + "epoch": 0.8638449770525243, + "grad_norm": 0.4499448835849762, + "learning_rate": 9.971181960937027e-07, + "loss": 0.4326968193054199, + "step": 3388 + }, + { + "epoch": 0.8640999490056094, + "grad_norm": 0.45473289489746094, + "learning_rate": 9.934502101520904e-07, + "loss": 0.4448380172252655, + "step": 3389 + }, + { + "epoch": 0.8643549209586946, + "grad_norm": 0.4439501464366913, + "learning_rate": 9.897886304860994e-07, + "loss": 0.41785746812820435, + "step": 3390 + }, + { + "epoch": 0.8646098929117797, + "grad_norm": 0.4552282989025116, + "learning_rate": 9.86133459700177e-07, + "loss": 0.45230287313461304, + "step": 3391 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 0.43610405921936035, + "learning_rate": 9.824847003942152e-07, + "loss": 0.4380587041378021, + "step": 3392 + }, + { + "epoch": 0.86511983681795, + "grad_norm": 0.43049919605255127, + "learning_rate": 9.788423551635384e-07, + "loss": 0.4224151074886322, + "step": 3393 + }, + { + "epoch": 0.8653748087710352, + "grad_norm": 0.4411639869213104, + "learning_rate": 9.752064265989158e-07, + "loss": 0.43679267168045044, + "step": 3394 + }, + { + "epoch": 0.8656297807241203, + "grad_norm": 0.45369890332221985, + "learning_rate": 9.715769172865542e-07, + "loss": 0.44111520051956177, + "step": 3395 + }, + { + "epoch": 0.8658847526772055, + "grad_norm": 0.44774970412254333, + "learning_rate": 9.679538298080804e-07, + "loss": 0.41965052485466003, + "step": 3396 + }, + { + "epoch": 0.8661397246302907, + "grad_norm": 0.42721036076545715, + "learning_rate": 9.6433716674057e-07, + "loss": 0.4369600713253021, + "step": 3397 + }, + { + "epoch": 0.8663946965833759, + "grad_norm": 0.4301885664463043, + "learning_rate": 9.607269306565202e-07, + "loss": 0.450730562210083, + "step": 3398 + }, + { + "epoch": 0.866649668536461, + "grad_norm": 0.44836947321891785, + "learning_rate": 9.571231241238578e-07, + "loss": 0.4442753791809082, + "step": 3399 + }, + { + "epoch": 0.8669046404895462, + "grad_norm": 0.4398816227912903, + "learning_rate": 9.535257497059391e-07, + "loss": 0.437112420797348, + "step": 3400 + }, + { + "epoch": 0.8671596124426313, + "grad_norm": 0.44665607810020447, + "learning_rate": 9.499348099615413e-07, + "loss": 0.4469532370567322, + "step": 3401 + }, + { + "epoch": 0.8674145843957165, + "grad_norm": 0.45355597138404846, + "learning_rate": 9.463503074448676e-07, + "loss": 0.42864227294921875, + "step": 3402 + }, + { + "epoch": 0.8676695563488016, + "grad_norm": 0.43615153431892395, + "learning_rate": 9.42772244705542e-07, + "loss": 0.4274861216545105, + "step": 3403 + }, + { + "epoch": 0.8679245283018868, + "grad_norm": 0.4431467056274414, + "learning_rate": 9.392006242886054e-07, + "loss": 0.4423532485961914, + "step": 3404 + }, + { + "epoch": 0.8681795002549719, + "grad_norm": 0.44587862491607666, + "learning_rate": 9.356354487345187e-07, + "loss": 0.4221605658531189, + "step": 3405 + }, + { + "epoch": 0.8684344722080571, + "grad_norm": 0.4480460584163666, + "learning_rate": 9.320767205791614e-07, + "loss": 0.4308694303035736, + "step": 3406 + }, + { + "epoch": 0.8686894441611422, + "grad_norm": 0.46006181836128235, + "learning_rate": 9.285244423538198e-07, + "loss": 0.43253546953201294, + "step": 3407 + }, + { + "epoch": 0.8689444161142275, + "grad_norm": 0.43100929260253906, + "learning_rate": 9.249786165852004e-07, + "loss": 0.43706291913986206, + "step": 3408 + }, + { + "epoch": 0.8691993880673126, + "grad_norm": 0.438680499792099, + "learning_rate": 9.214392457954158e-07, + "loss": 0.4269286096096039, + "step": 3409 + }, + { + "epoch": 0.8694543600203978, + "grad_norm": 0.43813207745552063, + "learning_rate": 9.179063325019832e-07, + "loss": 0.4430767893791199, + "step": 3410 + }, + { + "epoch": 0.8697093319734829, + "grad_norm": 0.434760719537735, + "learning_rate": 9.143798792178371e-07, + "loss": 0.42191946506500244, + "step": 3411 + }, + { + "epoch": 0.8699643039265681, + "grad_norm": 0.44063812494277954, + "learning_rate": 9.108598884513053e-07, + "loss": 0.44339871406555176, + "step": 3412 + }, + { + "epoch": 0.8702192758796532, + "grad_norm": 0.4579174816608429, + "learning_rate": 9.073463627061285e-07, + "loss": 0.44739198684692383, + "step": 3413 + }, + { + "epoch": 0.8704742478327384, + "grad_norm": 0.45135340094566345, + "learning_rate": 9.038393044814475e-07, + "loss": 0.4417533278465271, + "step": 3414 + }, + { + "epoch": 0.8707292197858235, + "grad_norm": 0.44124355912208557, + "learning_rate": 9.003387162717959e-07, + "loss": 0.43357163667678833, + "step": 3415 + }, + { + "epoch": 0.8709841917389087, + "grad_norm": 0.44779467582702637, + "learning_rate": 8.968446005671117e-07, + "loss": 0.43666166067123413, + "step": 3416 + }, + { + "epoch": 0.8712391636919938, + "grad_norm": 0.44657012820243835, + "learning_rate": 8.933569598527247e-07, + "loss": 0.43212875723838806, + "step": 3417 + }, + { + "epoch": 0.871494135645079, + "grad_norm": 0.42799729108810425, + "learning_rate": 8.898757966093629e-07, + "loss": 0.4432377219200134, + "step": 3418 + }, + { + "epoch": 0.8717491075981642, + "grad_norm": 0.4438287317752838, + "learning_rate": 8.864011133131489e-07, + "loss": 0.43603086471557617, + "step": 3419 + }, + { + "epoch": 0.8720040795512494, + "grad_norm": 0.44265010952949524, + "learning_rate": 8.829329124355868e-07, + "loss": 0.43077313899993896, + "step": 3420 + }, + { + "epoch": 0.8722590515043345, + "grad_norm": 0.4266294836997986, + "learning_rate": 8.794711964435821e-07, + "loss": 0.44449281692504883, + "step": 3421 + }, + { + "epoch": 0.8725140234574197, + "grad_norm": 0.43002915382385254, + "learning_rate": 8.760159677994174e-07, + "loss": 0.4223582148551941, + "step": 3422 + }, + { + "epoch": 0.8727689954105049, + "grad_norm": 0.4211961030960083, + "learning_rate": 8.725672289607634e-07, + "loss": 0.42744097113609314, + "step": 3423 + }, + { + "epoch": 0.87302396736359, + "grad_norm": 0.45127928256988525, + "learning_rate": 8.691249823806813e-07, + "loss": 0.43572863936424255, + "step": 3424 + }, + { + "epoch": 0.8732789393166752, + "grad_norm": 0.4222966730594635, + "learning_rate": 8.656892305076047e-07, + "loss": 0.44335150718688965, + "step": 3425 + }, + { + "epoch": 0.8735339112697603, + "grad_norm": 0.45571115612983704, + "learning_rate": 8.622599757853533e-07, + "loss": 0.4440256953239441, + "step": 3426 + }, + { + "epoch": 0.8737888832228455, + "grad_norm": 0.45147934556007385, + "learning_rate": 8.588372206531293e-07, + "loss": 0.44460529088974, + "step": 3427 + }, + { + "epoch": 0.8740438551759306, + "grad_norm": 0.4408029317855835, + "learning_rate": 8.554209675454984e-07, + "loss": 0.4386541247367859, + "step": 3428 + }, + { + "epoch": 0.8742988271290159, + "grad_norm": 0.4156317114830017, + "learning_rate": 8.520112188924135e-07, + "loss": 0.4272143840789795, + "step": 3429 + }, + { + "epoch": 0.874553799082101, + "grad_norm": 0.4348088204860687, + "learning_rate": 8.486079771192001e-07, + "loss": 0.4361407458782196, + "step": 3430 + }, + { + "epoch": 0.8748087710351862, + "grad_norm": 0.4363251328468323, + "learning_rate": 8.452112446465477e-07, + "loss": 0.4313136041164398, + "step": 3431 + }, + { + "epoch": 0.8750637429882713, + "grad_norm": 0.4510456621646881, + "learning_rate": 8.418210238905256e-07, + "loss": 0.44090861082077026, + "step": 3432 + }, + { + "epoch": 0.8753187149413565, + "grad_norm": 0.43219730257987976, + "learning_rate": 8.384373172625626e-07, + "loss": 0.43194374442100525, + "step": 3433 + }, + { + "epoch": 0.8755736868944416, + "grad_norm": 0.4394565522670746, + "learning_rate": 8.350601271694614e-07, + "loss": 0.42495352029800415, + "step": 3434 + }, + { + "epoch": 0.8758286588475268, + "grad_norm": 0.6174067854881287, + "learning_rate": 8.316894560133826e-07, + "loss": 0.4395310580730438, + "step": 3435 + }, + { + "epoch": 0.8760836308006119, + "grad_norm": 0.4343336820602417, + "learning_rate": 8.28325306191855e-07, + "loss": 0.4309213161468506, + "step": 3436 + }, + { + "epoch": 0.8763386027536971, + "grad_norm": 0.45802173018455505, + "learning_rate": 8.249676800977657e-07, + "loss": 0.44333964586257935, + "step": 3437 + }, + { + "epoch": 0.8765935747067822, + "grad_norm": 0.4481572210788727, + "learning_rate": 8.216165801193676e-07, + "loss": 0.43676790595054626, + "step": 3438 + }, + { + "epoch": 0.8768485466598674, + "grad_norm": 0.4250171482563019, + "learning_rate": 8.182720086402618e-07, + "loss": 0.4320659041404724, + "step": 3439 + }, + { + "epoch": 0.8771035186129525, + "grad_norm": 0.4334368407726288, + "learning_rate": 8.149339680394164e-07, + "loss": 0.4514850378036499, + "step": 3440 + }, + { + "epoch": 0.8773584905660378, + "grad_norm": 0.41408801078796387, + "learning_rate": 8.116024606911444e-07, + "loss": 0.43668755888938904, + "step": 3441 + }, + { + "epoch": 0.8776134625191229, + "grad_norm": 0.429240345954895, + "learning_rate": 8.082774889651168e-07, + "loss": 0.43987786769866943, + "step": 3442 + }, + { + "epoch": 0.8778684344722081, + "grad_norm": 0.4321937561035156, + "learning_rate": 8.049590552263575e-07, + "loss": 0.43867743015289307, + "step": 3443 + }, + { + "epoch": 0.8781234064252932, + "grad_norm": 0.43646883964538574, + "learning_rate": 8.01647161835235e-07, + "loss": 0.44158801436424255, + "step": 3444 + }, + { + "epoch": 0.8783783783783784, + "grad_norm": 0.4338165521621704, + "learning_rate": 7.983418111474717e-07, + "loss": 0.43622469902038574, + "step": 3445 + }, + { + "epoch": 0.8786333503314635, + "grad_norm": 0.42995837330818176, + "learning_rate": 7.950430055141312e-07, + "loss": 0.4489513337612152, + "step": 3446 + }, + { + "epoch": 0.8788883222845487, + "grad_norm": 0.4226197302341461, + "learning_rate": 7.91750747281621e-07, + "loss": 0.43353432416915894, + "step": 3447 + }, + { + "epoch": 0.8791432942376338, + "grad_norm": 0.48948803544044495, + "learning_rate": 7.884650387916992e-07, + "loss": 0.4460390508174896, + "step": 3448 + }, + { + "epoch": 0.879398266190719, + "grad_norm": 0.42587754130363464, + "learning_rate": 7.85185882381454e-07, + "loss": 0.43023681640625, + "step": 3449 + }, + { + "epoch": 0.8796532381438041, + "grad_norm": 0.44122031331062317, + "learning_rate": 7.819132803833229e-07, + "loss": 0.44698429107666016, + "step": 3450 + }, + { + "epoch": 0.8799082100968894, + "grad_norm": 0.43389633297920227, + "learning_rate": 7.786472351250785e-07, + "loss": 0.4534319341182709, + "step": 3451 + }, + { + "epoch": 0.8801631820499745, + "grad_norm": 0.44629064202308655, + "learning_rate": 7.753877489298245e-07, + "loss": 0.4405984878540039, + "step": 3452 + }, + { + "epoch": 0.8804181540030597, + "grad_norm": 0.44391247630119324, + "learning_rate": 7.721348241160087e-07, + "loss": 0.4256533086299896, + "step": 3453 + }, + { + "epoch": 0.8806731259561448, + "grad_norm": 0.44749441742897034, + "learning_rate": 7.688884629974013e-07, + "loss": 0.42853403091430664, + "step": 3454 + }, + { + "epoch": 0.88092809790923, + "grad_norm": 0.4337758719921112, + "learning_rate": 7.65648667883111e-07, + "loss": 0.43673738837242126, + "step": 3455 + }, + { + "epoch": 0.8811830698623151, + "grad_norm": 0.44894421100616455, + "learning_rate": 7.624154410775753e-07, + "loss": 0.4365650415420532, + "step": 3456 + }, + { + "epoch": 0.8814380418154003, + "grad_norm": 0.4481375813484192, + "learning_rate": 7.591887848805545e-07, + "loss": 0.4377281069755554, + "step": 3457 + }, + { + "epoch": 0.8816930137684854, + "grad_norm": 0.4205091893672943, + "learning_rate": 7.559687015871431e-07, + "loss": 0.4369480311870575, + "step": 3458 + }, + { + "epoch": 0.8819479857215706, + "grad_norm": 0.4384622573852539, + "learning_rate": 7.527551934877597e-07, + "loss": 0.436867892742157, + "step": 3459 + }, + { + "epoch": 0.8822029576746558, + "grad_norm": 0.42390331625938416, + "learning_rate": 7.49548262868135e-07, + "loss": 0.4484369158744812, + "step": 3460 + }, + { + "epoch": 0.882457929627741, + "grad_norm": 0.43532341718673706, + "learning_rate": 7.46347912009332e-07, + "loss": 0.44156980514526367, + "step": 3461 + }, + { + "epoch": 0.8827129015808262, + "grad_norm": 0.44449347257614136, + "learning_rate": 7.431541431877343e-07, + "loss": 0.4474422335624695, + "step": 3462 + }, + { + "epoch": 0.8829678735339113, + "grad_norm": 0.4316299259662628, + "learning_rate": 7.399669586750357e-07, + "loss": 0.4310445189476013, + "step": 3463 + }, + { + "epoch": 0.8832228454869965, + "grad_norm": 0.4550790786743164, + "learning_rate": 7.367863607382542e-07, + "loss": 0.4407844543457031, + "step": 3464 + }, + { + "epoch": 0.8834778174400816, + "grad_norm": 0.435812771320343, + "learning_rate": 7.336123516397176e-07, + "loss": 0.438144326210022, + "step": 3465 + }, + { + "epoch": 0.8837327893931668, + "grad_norm": 0.42210423946380615, + "learning_rate": 7.304449336370711e-07, + "loss": 0.4265088737010956, + "step": 3466 + }, + { + "epoch": 0.8839877613462519, + "grad_norm": 0.4351652264595032, + "learning_rate": 7.272841089832694e-07, + "loss": 0.4282117486000061, + "step": 3467 + }, + { + "epoch": 0.8842427332993371, + "grad_norm": 0.4479033350944519, + "learning_rate": 7.24129879926575e-07, + "loss": 0.4302578866481781, + "step": 3468 + }, + { + "epoch": 0.8844977052524222, + "grad_norm": 0.43092960119247437, + "learning_rate": 7.209822487105655e-07, + "loss": 0.43718647956848145, + "step": 3469 + }, + { + "epoch": 0.8847526772055074, + "grad_norm": 0.44142788648605347, + "learning_rate": 7.17841217574119e-07, + "loss": 0.43912026286125183, + "step": 3470 + }, + { + "epoch": 0.8850076491585925, + "grad_norm": 0.4471288323402405, + "learning_rate": 7.147067887514236e-07, + "loss": 0.43443840742111206, + "step": 3471 + }, + { + "epoch": 0.8852626211116777, + "grad_norm": 0.43807896971702576, + "learning_rate": 7.115789644719728e-07, + "loss": 0.43923306465148926, + "step": 3472 + }, + { + "epoch": 0.8855175930647629, + "grad_norm": 0.4477294385433197, + "learning_rate": 7.084577469605525e-07, + "loss": 0.4377750754356384, + "step": 3473 + }, + { + "epoch": 0.8857725650178481, + "grad_norm": 0.4518868327140808, + "learning_rate": 7.053431384372599e-07, + "loss": 0.43124985694885254, + "step": 3474 + }, + { + "epoch": 0.8860275369709332, + "grad_norm": 0.44317349791526794, + "learning_rate": 7.022351411174866e-07, + "loss": 0.44053560495376587, + "step": 3475 + }, + { + "epoch": 0.8862825089240184, + "grad_norm": 0.4312305748462677, + "learning_rate": 6.991337572119217e-07, + "loss": 0.44413360953330994, + "step": 3476 + }, + { + "epoch": 0.8865374808771035, + "grad_norm": 0.4528648555278778, + "learning_rate": 6.960389889265517e-07, + "loss": 0.4408388137817383, + "step": 3477 + }, + { + "epoch": 0.8867924528301887, + "grad_norm": 0.4547745883464813, + "learning_rate": 6.929508384626571e-07, + "loss": 0.4394215941429138, + "step": 3478 + }, + { + "epoch": 0.8870474247832738, + "grad_norm": 0.4406372010707855, + "learning_rate": 6.898693080168084e-07, + "loss": 0.43522459268569946, + "step": 3479 + }, + { + "epoch": 0.887302396736359, + "grad_norm": 0.4650934934616089, + "learning_rate": 6.867943997808735e-07, + "loss": 0.44631558656692505, + "step": 3480 + }, + { + "epoch": 0.8875573686894441, + "grad_norm": 0.4555158019065857, + "learning_rate": 6.837261159420016e-07, + "loss": 0.43532800674438477, + "step": 3481 + }, + { + "epoch": 0.8878123406425293, + "grad_norm": 0.4111659824848175, + "learning_rate": 6.806644586826383e-07, + "loss": 0.4364461302757263, + "step": 3482 + }, + { + "epoch": 0.8880673125956144, + "grad_norm": 0.4358905553817749, + "learning_rate": 6.776094301805114e-07, + "loss": 0.4406110942363739, + "step": 3483 + }, + { + "epoch": 0.8883222845486997, + "grad_norm": 0.4338662326335907, + "learning_rate": 6.745610326086327e-07, + "loss": 0.41641271114349365, + "step": 3484 + }, + { + "epoch": 0.8885772565017848, + "grad_norm": 0.46959635615348816, + "learning_rate": 6.715192681353022e-07, + "loss": 0.4398638904094696, + "step": 3485 + }, + { + "epoch": 0.88883222845487, + "grad_norm": 0.4260825514793396, + "learning_rate": 6.684841389240971e-07, + "loss": 0.4335266351699829, + "step": 3486 + }, + { + "epoch": 0.8890872004079551, + "grad_norm": 0.4593506455421448, + "learning_rate": 6.654556471338747e-07, + "loss": 0.4404299259185791, + "step": 3487 + }, + { + "epoch": 0.8893421723610403, + "grad_norm": 0.44930601119995117, + "learning_rate": 6.624337949187776e-07, + "loss": 0.43138647079467773, + "step": 3488 + }, + { + "epoch": 0.8895971443141254, + "grad_norm": 0.4422794282436371, + "learning_rate": 6.594185844282175e-07, + "loss": 0.4225475788116455, + "step": 3489 + }, + { + "epoch": 0.8898521162672106, + "grad_norm": 0.42670273780822754, + "learning_rate": 6.564100178068867e-07, + "loss": 0.41914981603622437, + "step": 3490 + }, + { + "epoch": 0.8901070882202957, + "grad_norm": 0.4556596875190735, + "learning_rate": 6.534080971947554e-07, + "loss": 0.4371855854988098, + "step": 3491 + }, + { + "epoch": 0.8903620601733809, + "grad_norm": 0.41994306445121765, + "learning_rate": 6.504128247270547e-07, + "loss": 0.4364043176174164, + "step": 3492 + }, + { + "epoch": 0.890617032126466, + "grad_norm": 0.467311829328537, + "learning_rate": 6.474242025342991e-07, + "loss": 0.4354938864707947, + "step": 3493 + }, + { + "epoch": 0.8908720040795512, + "grad_norm": 0.4528542160987854, + "learning_rate": 6.44442232742265e-07, + "loss": 0.42929911613464355, + "step": 3494 + }, + { + "epoch": 0.8911269760326365, + "grad_norm": 0.45788252353668213, + "learning_rate": 6.414669174720001e-07, + "loss": 0.4449249505996704, + "step": 3495 + }, + { + "epoch": 0.8913819479857216, + "grad_norm": 0.45263516902923584, + "learning_rate": 6.384982588398225e-07, + "loss": 0.4337441325187683, + "step": 3496 + }, + { + "epoch": 0.8916369199388068, + "grad_norm": 0.4540601968765259, + "learning_rate": 6.355362589573078e-07, + "loss": 0.4453548192977905, + "step": 3497 + }, + { + "epoch": 0.8918918918918919, + "grad_norm": 0.4698966443538666, + "learning_rate": 6.325809199313015e-07, + "loss": 0.4402586817741394, + "step": 3498 + }, + { + "epoch": 0.8921468638449771, + "grad_norm": 0.45818012952804565, + "learning_rate": 6.29632243863908e-07, + "loss": 0.41622447967529297, + "step": 3499 + }, + { + "epoch": 0.8924018357980622, + "grad_norm": 0.4561059772968292, + "learning_rate": 6.266902328524915e-07, + "loss": 0.42476189136505127, + "step": 3500 + }, + { + "epoch": 0.8926568077511474, + "grad_norm": 0.45123928785324097, + "learning_rate": 6.237548889896805e-07, + "loss": 0.43091583251953125, + "step": 3501 + }, + { + "epoch": 0.8929117797042325, + "grad_norm": 0.4374764859676361, + "learning_rate": 6.208262143633548e-07, + "loss": 0.427135169506073, + "step": 3502 + }, + { + "epoch": 0.8931667516573177, + "grad_norm": 0.43948739767074585, + "learning_rate": 6.179042110566558e-07, + "loss": 0.43492886424064636, + "step": 3503 + }, + { + "epoch": 0.8934217236104028, + "grad_norm": 0.43435484170913696, + "learning_rate": 6.149888811479787e-07, + "loss": 0.4235824942588806, + "step": 3504 + }, + { + "epoch": 0.893676695563488, + "grad_norm": 0.42728570103645325, + "learning_rate": 6.12080226710966e-07, + "loss": 0.4385955035686493, + "step": 3505 + }, + { + "epoch": 0.8939316675165732, + "grad_norm": 0.4453405737876892, + "learning_rate": 6.091782498145204e-07, + "loss": 0.44239622354507446, + "step": 3506 + }, + { + "epoch": 0.8941866394696584, + "grad_norm": 0.45244428515434265, + "learning_rate": 6.062829525227909e-07, + "loss": 0.4283861517906189, + "step": 3507 + }, + { + "epoch": 0.8944416114227435, + "grad_norm": 0.42372480034828186, + "learning_rate": 6.033943368951745e-07, + "loss": 0.42147597670555115, + "step": 3508 + }, + { + "epoch": 0.8946965833758287, + "grad_norm": 0.4271625876426697, + "learning_rate": 6.005124049863187e-07, + "loss": 0.4466112554073334, + "step": 3509 + }, + { + "epoch": 0.8949515553289138, + "grad_norm": 0.4325474798679352, + "learning_rate": 5.976371588461138e-07, + "loss": 0.4398912787437439, + "step": 3510 + }, + { + "epoch": 0.895206527281999, + "grad_norm": 0.4471154510974884, + "learning_rate": 5.94768600519694e-07, + "loss": 0.43469834327697754, + "step": 3511 + }, + { + "epoch": 0.8954614992350841, + "grad_norm": 0.4322305917739868, + "learning_rate": 5.919067320474425e-07, + "loss": 0.41751420497894287, + "step": 3512 + }, + { + "epoch": 0.8957164711881693, + "grad_norm": 0.44496405124664307, + "learning_rate": 5.890515554649756e-07, + "loss": 0.4205957055091858, + "step": 3513 + }, + { + "epoch": 0.8959714431412544, + "grad_norm": 0.41922953724861145, + "learning_rate": 5.862030728031554e-07, + "loss": 0.4464760422706604, + "step": 3514 + }, + { + "epoch": 0.8962264150943396, + "grad_norm": 0.4451551139354706, + "learning_rate": 5.833612860880844e-07, + "loss": 0.434225857257843, + "step": 3515 + }, + { + "epoch": 0.8964813870474247, + "grad_norm": 0.4299732744693756, + "learning_rate": 5.805261973410936e-07, + "loss": 0.4436805844306946, + "step": 3516 + }, + { + "epoch": 0.89673635900051, + "grad_norm": 0.442019522190094, + "learning_rate": 5.77697808578761e-07, + "loss": 0.4386199116706848, + "step": 3517 + }, + { + "epoch": 0.8969913309535951, + "grad_norm": 0.4500035345554352, + "learning_rate": 5.748761218128918e-07, + "loss": 0.4361865222454071, + "step": 3518 + }, + { + "epoch": 0.8972463029066803, + "grad_norm": 0.42633429169654846, + "learning_rate": 5.720611390505215e-07, + "loss": 0.4213171899318695, + "step": 3519 + }, + { + "epoch": 0.8975012748597654, + "grad_norm": 0.4605890214443207, + "learning_rate": 5.692528622939275e-07, + "loss": 0.42544159293174744, + "step": 3520 + }, + { + "epoch": 0.8977562468128506, + "grad_norm": 0.4514401853084564, + "learning_rate": 5.664512935406063e-07, + "loss": 0.4186820685863495, + "step": 3521 + }, + { + "epoch": 0.8980112187659357, + "grad_norm": 0.43343934416770935, + "learning_rate": 5.636564347832907e-07, + "loss": 0.41555485129356384, + "step": 3522 + }, + { + "epoch": 0.8982661907190209, + "grad_norm": 0.46844685077667236, + "learning_rate": 5.60868288009937e-07, + "loss": 0.4361535906791687, + "step": 3523 + }, + { + "epoch": 0.898521162672106, + "grad_norm": 0.4280957877635956, + "learning_rate": 5.580868552037277e-07, + "loss": 0.4293152987957001, + "step": 3524 + }, + { + "epoch": 0.8987761346251912, + "grad_norm": 0.4266355633735657, + "learning_rate": 5.553121383430715e-07, + "loss": 0.4427189230918884, + "step": 3525 + }, + { + "epoch": 0.8990311065782763, + "grad_norm": 0.425010085105896, + "learning_rate": 5.525441394015973e-07, + "loss": 0.43918511271476746, + "step": 3526 + }, + { + "epoch": 0.8992860785313616, + "grad_norm": 0.42402413487434387, + "learning_rate": 5.49782860348157e-07, + "loss": 0.41261357069015503, + "step": 3527 + }, + { + "epoch": 0.8995410504844467, + "grad_norm": 0.46322718262672424, + "learning_rate": 5.470283031468259e-07, + "loss": 0.4363265037536621, + "step": 3528 + }, + { + "epoch": 0.8997960224375319, + "grad_norm": 0.4325588643550873, + "learning_rate": 5.442804697568915e-07, + "loss": 0.4324786067008972, + "step": 3529 + }, + { + "epoch": 0.9000509943906171, + "grad_norm": 0.4412180781364441, + "learning_rate": 5.415393621328658e-07, + "loss": 0.4369555413722992, + "step": 3530 + }, + { + "epoch": 0.9003059663437022, + "grad_norm": 0.4297512471675873, + "learning_rate": 5.388049822244701e-07, + "loss": 0.43392789363861084, + "step": 3531 + }, + { + "epoch": 0.9005609382967874, + "grad_norm": 0.4450361132621765, + "learning_rate": 5.360773319766443e-07, + "loss": 0.43461984395980835, + "step": 3532 + }, + { + "epoch": 0.9008159102498725, + "grad_norm": 0.44984447956085205, + "learning_rate": 5.333564133295421e-07, + "loss": 0.4311913847923279, + "step": 3533 + }, + { + "epoch": 0.9010708822029577, + "grad_norm": 0.4270000159740448, + "learning_rate": 5.306422282185264e-07, + "loss": 0.4367779493331909, + "step": 3534 + }, + { + "epoch": 0.9013258541560428, + "grad_norm": 0.4270723760128021, + "learning_rate": 5.279347785741718e-07, + "loss": 0.4244728982448578, + "step": 3535 + }, + { + "epoch": 0.901580826109128, + "grad_norm": 0.44941937923431396, + "learning_rate": 5.252340663222666e-07, + "loss": 0.4332941770553589, + "step": 3536 + }, + { + "epoch": 0.9018357980622131, + "grad_norm": 0.43194061517715454, + "learning_rate": 5.225400933837954e-07, + "loss": 0.4334859848022461, + "step": 3537 + }, + { + "epoch": 0.9020907700152984, + "grad_norm": 0.42677220702171326, + "learning_rate": 5.198528616749599e-07, + "loss": 0.4181649088859558, + "step": 3538 + }, + { + "epoch": 0.9023457419683835, + "grad_norm": 0.43355873227119446, + "learning_rate": 5.171723731071643e-07, + "loss": 0.44718942046165466, + "step": 3539 + }, + { + "epoch": 0.9026007139214687, + "grad_norm": 0.42261987924575806, + "learning_rate": 5.144986295870125e-07, + "loss": 0.4279809892177582, + "step": 3540 + }, + { + "epoch": 0.9028556858745538, + "grad_norm": 0.43762263655662537, + "learning_rate": 5.118316330163165e-07, + "loss": 0.43330079317092896, + "step": 3541 + }, + { + "epoch": 0.903110657827639, + "grad_norm": 0.4268043339252472, + "learning_rate": 5.091713852920854e-07, + "loss": 0.4417315423488617, + "step": 3542 + }, + { + "epoch": 0.9033656297807241, + "grad_norm": 0.45512381196022034, + "learning_rate": 5.065178883065258e-07, + "loss": 0.4269631803035736, + "step": 3543 + }, + { + "epoch": 0.9036206017338093, + "grad_norm": 0.42681342363357544, + "learning_rate": 5.038711439470511e-07, + "loss": 0.439994752407074, + "step": 3544 + }, + { + "epoch": 0.9038755736868944, + "grad_norm": 0.4336576759815216, + "learning_rate": 5.0123115409626e-07, + "loss": 0.43351709842681885, + "step": 3545 + }, + { + "epoch": 0.9041305456399796, + "grad_norm": 0.4286787211894989, + "learning_rate": 4.98597920631958e-07, + "loss": 0.43429574370384216, + "step": 3546 + }, + { + "epoch": 0.9043855175930647, + "grad_norm": 0.449375718832016, + "learning_rate": 4.95971445427137e-07, + "loss": 0.45396527647972107, + "step": 3547 + }, + { + "epoch": 0.90464048954615, + "grad_norm": 0.4293923079967499, + "learning_rate": 4.933517303499846e-07, + "loss": 0.43527752161026, + "step": 3548 + }, + { + "epoch": 0.904895461499235, + "grad_norm": 0.4441344738006592, + "learning_rate": 4.907387772638839e-07, + "loss": 0.44533535838127136, + "step": 3549 + }, + { + "epoch": 0.9051504334523203, + "grad_norm": 0.44053396582603455, + "learning_rate": 4.881325880273968e-07, + "loss": 0.44325876235961914, + "step": 3550 + }, + { + "epoch": 0.9054054054054054, + "grad_norm": 0.4394433796405792, + "learning_rate": 4.855331644942873e-07, + "loss": 0.4454990327358246, + "step": 3551 + }, + { + "epoch": 0.9056603773584906, + "grad_norm": 0.4348095953464508, + "learning_rate": 4.829405085134997e-07, + "loss": 0.4354587197303772, + "step": 3552 + }, + { + "epoch": 0.9059153493115757, + "grad_norm": 0.4534893333911896, + "learning_rate": 4.803546219291655e-07, + "loss": 0.4252866506576538, + "step": 3553 + }, + { + "epoch": 0.9061703212646609, + "grad_norm": 0.4208613634109497, + "learning_rate": 4.777755065806044e-07, + "loss": 0.4360646605491638, + "step": 3554 + }, + { + "epoch": 0.906425293217746, + "grad_norm": 0.4523314833641052, + "learning_rate": 4.752031643023158e-07, + "loss": 0.4250752925872803, + "step": 3555 + }, + { + "epoch": 0.9066802651708312, + "grad_norm": 0.4313192665576935, + "learning_rate": 4.726375969239816e-07, + "loss": 0.4444052577018738, + "step": 3556 + }, + { + "epoch": 0.9069352371239163, + "grad_norm": 0.4894581735134125, + "learning_rate": 4.700788062704686e-07, + "loss": 0.43131422996520996, + "step": 3557 + }, + { + "epoch": 0.9071902090770015, + "grad_norm": 0.45645684003829956, + "learning_rate": 4.675267941618178e-07, + "loss": 0.4375073313713074, + "step": 3558 + }, + { + "epoch": 0.9074451810300866, + "grad_norm": 0.43935051560401917, + "learning_rate": 4.6498156241325364e-07, + "loss": 0.43498870730400085, + "step": 3559 + }, + { + "epoch": 0.9077001529831719, + "grad_norm": 0.4343809485435486, + "learning_rate": 4.6244311283517805e-07, + "loss": 0.43834230303764343, + "step": 3560 + }, + { + "epoch": 0.907955124936257, + "grad_norm": 0.4208904802799225, + "learning_rate": 4.599114472331634e-07, + "loss": 0.4369846284389496, + "step": 3561 + }, + { + "epoch": 0.9082100968893422, + "grad_norm": 0.44323861598968506, + "learning_rate": 4.573865674079625e-07, + "loss": 0.4203859567642212, + "step": 3562 + }, + { + "epoch": 0.9084650688424273, + "grad_norm": 0.48349323868751526, + "learning_rate": 4.548684751554988e-07, + "loss": 0.4276666045188904, + "step": 3563 + }, + { + "epoch": 0.9087200407955125, + "grad_norm": 0.4553949236869812, + "learning_rate": 4.5235717226686516e-07, + "loss": 0.43263620138168335, + "step": 3564 + }, + { + "epoch": 0.9089750127485976, + "grad_norm": 0.437443345785141, + "learning_rate": 4.4985266052833043e-07, + "loss": 0.4380514621734619, + "step": 3565 + }, + { + "epoch": 0.9092299847016828, + "grad_norm": 0.41560494899749756, + "learning_rate": 4.4735494172133074e-07, + "loss": 0.4358428418636322, + "step": 3566 + }, + { + "epoch": 0.909484956654768, + "grad_norm": 0.44587910175323486, + "learning_rate": 4.4486401762246947e-07, + "loss": 0.44075244665145874, + "step": 3567 + }, + { + "epoch": 0.9097399286078531, + "grad_norm": 0.4215124547481537, + "learning_rate": 4.423798900035203e-07, + "loss": 0.42927083373069763, + "step": 3568 + }, + { + "epoch": 0.9099949005609383, + "grad_norm": 0.44220197200775146, + "learning_rate": 4.3990256063141667e-07, + "loss": 0.4306790232658386, + "step": 3569 + }, + { + "epoch": 0.9102498725140235, + "grad_norm": 0.44558820128440857, + "learning_rate": 4.374320312682612e-07, + "loss": 0.4551706910133362, + "step": 3570 + }, + { + "epoch": 0.9105048444671087, + "grad_norm": 0.4541255533695221, + "learning_rate": 4.349683036713215e-07, + "loss": 0.43386468291282654, + "step": 3571 + }, + { + "epoch": 0.9107598164201938, + "grad_norm": 0.42653003334999084, + "learning_rate": 4.3251137959302023e-07, + "loss": 0.4319538474082947, + "step": 3572 + }, + { + "epoch": 0.911014788373279, + "grad_norm": 0.436084508895874, + "learning_rate": 4.300612607809473e-07, + "loss": 0.4303576946258545, + "step": 3573 + }, + { + "epoch": 0.9112697603263641, + "grad_norm": 0.452581524848938, + "learning_rate": 4.276179489778487e-07, + "loss": 0.4242931306362152, + "step": 3574 + }, + { + "epoch": 0.9115247322794493, + "grad_norm": 0.42535877227783203, + "learning_rate": 4.251814459216286e-07, + "loss": 0.4394821524620056, + "step": 3575 + }, + { + "epoch": 0.9117797042325344, + "grad_norm": 0.45626556873321533, + "learning_rate": 4.227517533453518e-07, + "loss": 0.45337963104248047, + "step": 3576 + }, + { + "epoch": 0.9120346761856196, + "grad_norm": 0.43536752462387085, + "learning_rate": 4.203288729772326e-07, + "loss": 0.442407488822937, + "step": 3577 + }, + { + "epoch": 0.9122896481387047, + "grad_norm": 0.43327459692955017, + "learning_rate": 4.179128065406468e-07, + "loss": 0.43041032552719116, + "step": 3578 + }, + { + "epoch": 0.9125446200917899, + "grad_norm": 0.4425260126590729, + "learning_rate": 4.155035557541165e-07, + "loss": 0.4451976716518402, + "step": 3579 + }, + { + "epoch": 0.912799592044875, + "grad_norm": 0.4322320818901062, + "learning_rate": 4.1310112233132303e-07, + "loss": 0.4459317922592163, + "step": 3580 + }, + { + "epoch": 0.9130545639979603, + "grad_norm": 0.44417110085487366, + "learning_rate": 4.107055079810951e-07, + "loss": 0.42950934171676636, + "step": 3581 + }, + { + "epoch": 0.9133095359510454, + "grad_norm": 0.44251391291618347, + "learning_rate": 4.083167144074074e-07, + "loss": 0.43427062034606934, + "step": 3582 + }, + { + "epoch": 0.9135645079041306, + "grad_norm": 0.4517573118209839, + "learning_rate": 4.059347433093896e-07, + "loss": 0.43051815032958984, + "step": 3583 + }, + { + "epoch": 0.9138194798572157, + "grad_norm": 0.44148555397987366, + "learning_rate": 4.0355959638131545e-07, + "loss": 0.43934953212738037, + "step": 3584 + }, + { + "epoch": 0.9140744518103009, + "grad_norm": 0.4492388367652893, + "learning_rate": 4.0119127531260347e-07, + "loss": 0.4464166760444641, + "step": 3585 + }, + { + "epoch": 0.914329423763386, + "grad_norm": 0.4425787329673767, + "learning_rate": 3.988297817878217e-07, + "loss": 0.44234633445739746, + "step": 3586 + }, + { + "epoch": 0.9145843957164712, + "grad_norm": 0.43447956442832947, + "learning_rate": 3.9647511748667653e-07, + "loss": 0.444368451833725, + "step": 3587 + }, + { + "epoch": 0.9148393676695563, + "grad_norm": 0.44007408618927, + "learning_rate": 3.9412728408401714e-07, + "loss": 0.43814414739608765, + "step": 3588 + }, + { + "epoch": 0.9150943396226415, + "grad_norm": 0.42095082998275757, + "learning_rate": 3.917862832498387e-07, + "loss": 0.4246536195278168, + "step": 3589 + }, + { + "epoch": 0.9153493115757266, + "grad_norm": 0.42961618304252625, + "learning_rate": 3.8945211664927154e-07, + "loss": 0.42815613746643066, + "step": 3590 + }, + { + "epoch": 0.9156042835288118, + "grad_norm": 0.4285229444503784, + "learning_rate": 3.871247859425886e-07, + "loss": 0.43332967162132263, + "step": 3591 + }, + { + "epoch": 0.915859255481897, + "grad_norm": 0.42834195494651794, + "learning_rate": 3.84804292785198e-07, + "loss": 0.4423152208328247, + "step": 3592 + }, + { + "epoch": 0.9161142274349822, + "grad_norm": 0.5144716501235962, + "learning_rate": 3.82490638827645e-07, + "loss": 0.43932145833969116, + "step": 3593 + }, + { + "epoch": 0.9163691993880673, + "grad_norm": 0.41799429059028625, + "learning_rate": 3.8018382571560876e-07, + "loss": 0.4334896206855774, + "step": 3594 + }, + { + "epoch": 0.9166241713411525, + "grad_norm": 0.427685409784317, + "learning_rate": 3.7788385508990667e-07, + "loss": 0.4288986921310425, + "step": 3595 + }, + { + "epoch": 0.9168791432942376, + "grad_norm": 0.4473314881324768, + "learning_rate": 3.755907285864846e-07, + "loss": 0.44231194257736206, + "step": 3596 + }, + { + "epoch": 0.9171341152473228, + "grad_norm": 0.45096901059150696, + "learning_rate": 3.733044478364234e-07, + "loss": 0.43934544920921326, + "step": 3597 + }, + { + "epoch": 0.9173890872004079, + "grad_norm": 0.43266093730926514, + "learning_rate": 3.710250144659322e-07, + "loss": 0.43362313508987427, + "step": 3598 + }, + { + "epoch": 0.9176440591534931, + "grad_norm": 0.42727604508399963, + "learning_rate": 3.6875243009635406e-07, + "loss": 0.4410891532897949, + "step": 3599 + }, + { + "epoch": 0.9178990311065782, + "grad_norm": 0.4408717751502991, + "learning_rate": 3.664866963441538e-07, + "loss": 0.43474286794662476, + "step": 3600 + }, + { + "epoch": 0.9181540030596634, + "grad_norm": 0.4211311936378479, + "learning_rate": 3.6422781482092664e-07, + "loss": 0.43140873312950134, + "step": 3601 + }, + { + "epoch": 0.9184089750127487, + "grad_norm": 0.4141659736633301, + "learning_rate": 3.619757871333973e-07, + "loss": 0.4358154535293579, + "step": 3602 + }, + { + "epoch": 0.9186639469658338, + "grad_norm": 0.4296232759952545, + "learning_rate": 3.59730614883409e-07, + "loss": 0.43090933561325073, + "step": 3603 + }, + { + "epoch": 0.918918918918919, + "grad_norm": 0.4304314851760864, + "learning_rate": 3.574922996679331e-07, + "loss": 0.44070133566856384, + "step": 3604 + }, + { + "epoch": 0.9191738908720041, + "grad_norm": 0.45988893508911133, + "learning_rate": 3.552608430790649e-07, + "loss": 0.4354715049266815, + "step": 3605 + }, + { + "epoch": 0.9194288628250893, + "grad_norm": 0.43448030948638916, + "learning_rate": 3.53036246704016e-07, + "loss": 0.43007493019104004, + "step": 3606 + }, + { + "epoch": 0.9196838347781744, + "grad_norm": 0.4371713399887085, + "learning_rate": 3.5081851212512176e-07, + "loss": 0.44341692328453064, + "step": 3607 + }, + { + "epoch": 0.9199388067312596, + "grad_norm": 0.44628259539604187, + "learning_rate": 3.486076409198369e-07, + "loss": 0.42946192622184753, + "step": 3608 + }, + { + "epoch": 0.9201937786843447, + "grad_norm": 0.47736960649490356, + "learning_rate": 3.464036346607336e-07, + "loss": 0.45520347356796265, + "step": 3609 + }, + { + "epoch": 0.9204487506374299, + "grad_norm": 0.4316670894622803, + "learning_rate": 3.442064949155011e-07, + "loss": 0.42847108840942383, + "step": 3610 + }, + { + "epoch": 0.920703722590515, + "grad_norm": 0.4310346841812134, + "learning_rate": 3.420162232469437e-07, + "loss": 0.43078848719596863, + "step": 3611 + }, + { + "epoch": 0.9209586945436002, + "grad_norm": 0.4333515167236328, + "learning_rate": 3.398328212129809e-07, + "loss": 0.42883485555648804, + "step": 3612 + }, + { + "epoch": 0.9212136664966853, + "grad_norm": 0.45875313878059387, + "learning_rate": 3.3765629036665135e-07, + "loss": 0.42375749349594116, + "step": 3613 + }, + { + "epoch": 0.9214686384497706, + "grad_norm": 0.43513643741607666, + "learning_rate": 3.354866322560946e-07, + "loss": 0.4320690631866455, + "step": 3614 + }, + { + "epoch": 0.9217236104028557, + "grad_norm": 0.43181368708610535, + "learning_rate": 3.3332384842457156e-07, + "loss": 0.43557125329971313, + "step": 3615 + }, + { + "epoch": 0.9219785823559409, + "grad_norm": 0.4194730818271637, + "learning_rate": 3.3116794041045174e-07, + "loss": 0.43037256598472595, + "step": 3616 + }, + { + "epoch": 0.922233554309026, + "grad_norm": 0.4568606913089752, + "learning_rate": 3.290189097472096e-07, + "loss": 0.43266376852989197, + "step": 3617 + }, + { + "epoch": 0.9224885262621112, + "grad_norm": 0.435941606760025, + "learning_rate": 3.2687675796343244e-07, + "loss": 0.43532881140708923, + "step": 3618 + }, + { + "epoch": 0.9227434982151963, + "grad_norm": 0.47094932198524475, + "learning_rate": 3.247414865828136e-07, + "loss": 0.43585801124572754, + "step": 3619 + }, + { + "epoch": 0.9229984701682815, + "grad_norm": 0.45321404933929443, + "learning_rate": 3.226130971241503e-07, + "loss": 0.4414428472518921, + "step": 3620 + }, + { + "epoch": 0.9232534421213666, + "grad_norm": 0.4174649119377136, + "learning_rate": 3.204915911013484e-07, + "loss": 0.44382810592651367, + "step": 3621 + }, + { + "epoch": 0.9235084140744518, + "grad_norm": 0.4502234160900116, + "learning_rate": 3.1837697002341293e-07, + "loss": 0.41893133521080017, + "step": 3622 + }, + { + "epoch": 0.9237633860275369, + "grad_norm": 0.4951938986778259, + "learning_rate": 3.162692353944563e-07, + "loss": 0.4423009157180786, + "step": 3623 + }, + { + "epoch": 0.9240183579806222, + "grad_norm": 0.471845805644989, + "learning_rate": 3.1416838871368925e-07, + "loss": 0.4448579251766205, + "step": 3624 + }, + { + "epoch": 0.9242733299337073, + "grad_norm": 0.4562813937664032, + "learning_rate": 3.120744314754254e-07, + "loss": 0.4233444333076477, + "step": 3625 + }, + { + "epoch": 0.9245283018867925, + "grad_norm": 0.4315374195575714, + "learning_rate": 3.099873651690788e-07, + "loss": 0.4357251524925232, + "step": 3626 + }, + { + "epoch": 0.9247832738398776, + "grad_norm": 0.4478853642940521, + "learning_rate": 3.0790719127915647e-07, + "loss": 0.45080792903900146, + "step": 3627 + }, + { + "epoch": 0.9250382457929628, + "grad_norm": 0.443620502948761, + "learning_rate": 3.058339112852693e-07, + "loss": 0.4329032897949219, + "step": 3628 + }, + { + "epoch": 0.9252932177460479, + "grad_norm": 0.42865297198295593, + "learning_rate": 3.0376752666212317e-07, + "loss": 0.4263918399810791, + "step": 3629 + }, + { + "epoch": 0.9255481896991331, + "grad_norm": 0.43004220724105835, + "learning_rate": 3.017080388795168e-07, + "loss": 0.4549320936203003, + "step": 3630 + }, + { + "epoch": 0.9258031616522182, + "grad_norm": 0.43752700090408325, + "learning_rate": 2.9965544940234624e-07, + "loss": 0.4366416931152344, + "step": 3631 + }, + { + "epoch": 0.9260581336053034, + "grad_norm": 0.432623028755188, + "learning_rate": 2.976097596905969e-07, + "loss": 0.4313819110393524, + "step": 3632 + }, + { + "epoch": 0.9263131055583885, + "grad_norm": 0.43730250000953674, + "learning_rate": 2.955709711993515e-07, + "loss": 0.43685126304626465, + "step": 3633 + }, + { + "epoch": 0.9265680775114737, + "grad_norm": 0.4221460521221161, + "learning_rate": 2.9353908537878004e-07, + "loss": 0.42311978340148926, + "step": 3634 + }, + { + "epoch": 0.9268230494645588, + "grad_norm": 0.43018046021461487, + "learning_rate": 2.915141036741442e-07, + "loss": 0.43955671787261963, + "step": 3635 + }, + { + "epoch": 0.9270780214176441, + "grad_norm": 0.4328952431678772, + "learning_rate": 2.894960275257941e-07, + "loss": 0.4308651387691498, + "step": 3636 + }, + { + "epoch": 0.9273329933707293, + "grad_norm": 0.44335830211639404, + "learning_rate": 2.874848583691714e-07, + "loss": 0.438312828540802, + "step": 3637 + }, + { + "epoch": 0.9275879653238144, + "grad_norm": 0.4323786497116089, + "learning_rate": 2.854805976348007e-07, + "loss": 0.43586835265159607, + "step": 3638 + }, + { + "epoch": 0.9278429372768996, + "grad_norm": 0.42834949493408203, + "learning_rate": 2.8348324674829174e-07, + "loss": 0.442486047744751, + "step": 3639 + }, + { + "epoch": 0.9280979092299847, + "grad_norm": 0.4232763350009918, + "learning_rate": 2.8149280713034575e-07, + "loss": 0.43259090185165405, + "step": 3640 + }, + { + "epoch": 0.9283528811830699, + "grad_norm": 0.4621821641921997, + "learning_rate": 2.795092801967414e-07, + "loss": 0.4362543225288391, + "step": 3641 + }, + { + "epoch": 0.928607853136155, + "grad_norm": 0.4357510805130005, + "learning_rate": 2.775326673583434e-07, + "loss": 0.45379358530044556, + "step": 3642 + }, + { + "epoch": 0.9288628250892402, + "grad_norm": 0.4349755048751831, + "learning_rate": 2.755629700210982e-07, + "loss": 0.4319487512111664, + "step": 3643 + }, + { + "epoch": 0.9291177970423253, + "grad_norm": 0.4547295868396759, + "learning_rate": 2.7360018958603405e-07, + "loss": 0.43946123123168945, + "step": 3644 + }, + { + "epoch": 0.9293727689954105, + "grad_norm": 0.44599592685699463, + "learning_rate": 2.716443274492597e-07, + "loss": 0.4380370080471039, + "step": 3645 + }, + { + "epoch": 0.9296277409484957, + "grad_norm": 0.44466736912727356, + "learning_rate": 2.696953850019579e-07, + "loss": 0.43402618169784546, + "step": 3646 + }, + { + "epoch": 0.9298827129015809, + "grad_norm": 0.4327200949192047, + "learning_rate": 2.677533636303964e-07, + "loss": 0.44082748889923096, + "step": 3647 + }, + { + "epoch": 0.930137684854666, + "grad_norm": 0.439943790435791, + "learning_rate": 2.658182647159169e-07, + "loss": 0.43326807022094727, + "step": 3648 + }, + { + "epoch": 0.9303926568077512, + "grad_norm": 0.42451268434524536, + "learning_rate": 2.6389008963493613e-07, + "loss": 0.43715566396713257, + "step": 3649 + }, + { + "epoch": 0.9306476287608363, + "grad_norm": 0.42902666330337524, + "learning_rate": 2.6196883975894817e-07, + "loss": 0.42805609107017517, + "step": 3650 + }, + { + "epoch": 0.9309026007139215, + "grad_norm": 0.44421282410621643, + "learning_rate": 2.60054516454521e-07, + "loss": 0.44107586145401, + "step": 3651 + }, + { + "epoch": 0.9311575726670066, + "grad_norm": 0.44369035959243774, + "learning_rate": 2.581471210832931e-07, + "loss": 0.43590283393859863, + "step": 3652 + }, + { + "epoch": 0.9314125446200918, + "grad_norm": 0.4411109983921051, + "learning_rate": 2.5624665500197933e-07, + "loss": 0.4290148615837097, + "step": 3653 + }, + { + "epoch": 0.9316675165731769, + "grad_norm": 0.566591739654541, + "learning_rate": 2.543531195623605e-07, + "loss": 0.4412996172904968, + "step": 3654 + }, + { + "epoch": 0.9319224885262621, + "grad_norm": 0.44682958722114563, + "learning_rate": 2.524665161112949e-07, + "loss": 0.44440752267837524, + "step": 3655 + }, + { + "epoch": 0.9321774604793472, + "grad_norm": 0.43499064445495605, + "learning_rate": 2.505868459907035e-07, + "loss": 0.43677276372909546, + "step": 3656 + }, + { + "epoch": 0.9324324324324325, + "grad_norm": 0.45111894607543945, + "learning_rate": 2.48714110537579e-07, + "loss": 0.43129217624664307, + "step": 3657 + }, + { + "epoch": 0.9326874043855176, + "grad_norm": 0.44929251074790955, + "learning_rate": 2.4684831108398033e-07, + "loss": 0.4395376443862915, + "step": 3658 + }, + { + "epoch": 0.9329423763386028, + "grad_norm": 0.4287400245666504, + "learning_rate": 2.449894489570337e-07, + "loss": 0.4335207939147949, + "step": 3659 + }, + { + "epoch": 0.9331973482916879, + "grad_norm": 0.42606982588768005, + "learning_rate": 2.4313752547893034e-07, + "loss": 0.4391172528266907, + "step": 3660 + }, + { + "epoch": 0.9334523202447731, + "grad_norm": 0.43672558665275574, + "learning_rate": 2.412925419669276e-07, + "loss": 0.4412034749984741, + "step": 3661 + }, + { + "epoch": 0.9337072921978582, + "grad_norm": 0.43024998903274536, + "learning_rate": 2.3945449973334366e-07, + "loss": 0.4417959451675415, + "step": 3662 + }, + { + "epoch": 0.9339622641509434, + "grad_norm": 0.45105281472206116, + "learning_rate": 2.3762340008556261e-07, + "loss": 0.44574111700057983, + "step": 3663 + }, + { + "epoch": 0.9342172361040285, + "grad_norm": 0.42282834649086, + "learning_rate": 2.3579924432602708e-07, + "loss": 0.435298889875412, + "step": 3664 + }, + { + "epoch": 0.9344722080571137, + "grad_norm": 0.4477471709251404, + "learning_rate": 2.339820337522425e-07, + "loss": 0.44897374510765076, + "step": 3665 + }, + { + "epoch": 0.9347271800101988, + "grad_norm": 0.42740267515182495, + "learning_rate": 2.321717696567738e-07, + "loss": 0.43435829877853394, + "step": 3666 + }, + { + "epoch": 0.934982151963284, + "grad_norm": 0.4464569389820099, + "learning_rate": 2.3036845332724545e-07, + "loss": 0.44783616065979004, + "step": 3667 + }, + { + "epoch": 0.9352371239163692, + "grad_norm": 0.4314689636230469, + "learning_rate": 2.2857208604634028e-07, + "loss": 0.43364688754081726, + "step": 3668 + }, + { + "epoch": 0.9354920958694544, + "grad_norm": 0.42942556738853455, + "learning_rate": 2.2678266909179737e-07, + "loss": 0.4412001967430115, + "step": 3669 + }, + { + "epoch": 0.9357470678225395, + "grad_norm": 0.42958909273147583, + "learning_rate": 2.2500020373641297e-07, + "loss": 0.4348294734954834, + "step": 3670 + }, + { + "epoch": 0.9360020397756247, + "grad_norm": 0.4455057978630066, + "learning_rate": 2.2322469124803624e-07, + "loss": 0.43147438764572144, + "step": 3671 + }, + { + "epoch": 0.9362570117287098, + "grad_norm": 0.45338964462280273, + "learning_rate": 2.214561328895748e-07, + "loss": 0.442391961812973, + "step": 3672 + }, + { + "epoch": 0.936511983681795, + "grad_norm": 0.473303884267807, + "learning_rate": 2.1969452991898678e-07, + "loss": 0.4215404689311981, + "step": 3673 + }, + { + "epoch": 0.9367669556348802, + "grad_norm": 0.46086111664772034, + "learning_rate": 2.1793988358928432e-07, + "loss": 0.4291722774505615, + "step": 3674 + }, + { + "epoch": 0.9370219275879653, + "grad_norm": 0.4351825416088104, + "learning_rate": 2.161921951485313e-07, + "loss": 0.4376457929611206, + "step": 3675 + }, + { + "epoch": 0.9372768995410505, + "grad_norm": 0.4450758397579193, + "learning_rate": 2.1445146583984334e-07, + "loss": 0.4214126765727997, + "step": 3676 + }, + { + "epoch": 0.9375318714941356, + "grad_norm": 0.44977065920829773, + "learning_rate": 2.1271769690138334e-07, + "loss": 0.42246899008750916, + "step": 3677 + }, + { + "epoch": 0.9377868434472209, + "grad_norm": 0.43122920393943787, + "learning_rate": 2.1099088956636594e-07, + "loss": 0.43919774889945984, + "step": 3678 + }, + { + "epoch": 0.938041815400306, + "grad_norm": 0.5114609599113464, + "learning_rate": 2.0927104506305308e-07, + "loss": 0.4412056803703308, + "step": 3679 + }, + { + "epoch": 0.9382967873533912, + "grad_norm": 0.4477209746837616, + "learning_rate": 2.0755816461475398e-07, + "loss": 0.4255548417568207, + "step": 3680 + }, + { + "epoch": 0.9385517593064763, + "grad_norm": 0.4294905960559845, + "learning_rate": 2.0585224943982518e-07, + "loss": 0.4375752806663513, + "step": 3681 + }, + { + "epoch": 0.9388067312595615, + "grad_norm": 0.43354830145835876, + "learning_rate": 2.041533007516694e-07, + "loss": 0.4328603446483612, + "step": 3682 + }, + { + "epoch": 0.9390617032126466, + "grad_norm": 0.4309961199760437, + "learning_rate": 2.0246131975873217e-07, + "loss": 0.4423938989639282, + "step": 3683 + }, + { + "epoch": 0.9393166751657318, + "grad_norm": 0.4355313181877136, + "learning_rate": 2.0077630766450195e-07, + "loss": 0.4296422004699707, + "step": 3684 + }, + { + "epoch": 0.9395716471188169, + "grad_norm": 0.4247247874736786, + "learning_rate": 1.9909826566751555e-07, + "loss": 0.44954705238342285, + "step": 3685 + }, + { + "epoch": 0.9398266190719021, + "grad_norm": 0.45959627628326416, + "learning_rate": 1.97427194961346e-07, + "loss": 0.4236677885055542, + "step": 3686 + }, + { + "epoch": 0.9400815910249872, + "grad_norm": 0.4352707862854004, + "learning_rate": 1.9576309673461358e-07, + "loss": 0.42161887884140015, + "step": 3687 + }, + { + "epoch": 0.9403365629780724, + "grad_norm": 0.4265056550502777, + "learning_rate": 1.941059721709715e-07, + "loss": 0.4375278353691101, + "step": 3688 + }, + { + "epoch": 0.9405915349311575, + "grad_norm": 0.43939751386642456, + "learning_rate": 1.9245582244912241e-07, + "loss": 0.4362907409667969, + "step": 3689 + }, + { + "epoch": 0.9408465068842428, + "grad_norm": 0.41473960876464844, + "learning_rate": 1.9081264874279858e-07, + "loss": 0.4331972301006317, + "step": 3690 + }, + { + "epoch": 0.9411014788373279, + "grad_norm": 0.43197643756866455, + "learning_rate": 1.891764522207762e-07, + "loss": 0.4336824417114258, + "step": 3691 + }, + { + "epoch": 0.9413564507904131, + "grad_norm": 0.4420890808105469, + "learning_rate": 1.8754723404686425e-07, + "loss": 0.4414839744567871, + "step": 3692 + }, + { + "epoch": 0.9416114227434982, + "grad_norm": 0.4495333135128021, + "learning_rate": 1.859249953799147e-07, + "loss": 0.42230579257011414, + "step": 3693 + }, + { + "epoch": 0.9418663946965834, + "grad_norm": 0.44855111837387085, + "learning_rate": 1.8430973737380785e-07, + "loss": 0.4318239390850067, + "step": 3694 + }, + { + "epoch": 0.9421213666496685, + "grad_norm": 0.4337852895259857, + "learning_rate": 1.8270146117746245e-07, + "loss": 0.43734127283096313, + "step": 3695 + }, + { + "epoch": 0.9423763386027537, + "grad_norm": 0.431687593460083, + "learning_rate": 1.8110016793483122e-07, + "loss": 0.44457387924194336, + "step": 3696 + }, + { + "epoch": 0.9426313105558388, + "grad_norm": 0.43883758783340454, + "learning_rate": 1.7950585878489856e-07, + "loss": 0.418931245803833, + "step": 3697 + }, + { + "epoch": 0.942886282508924, + "grad_norm": 0.46315500140190125, + "learning_rate": 1.7791853486168188e-07, + "loss": 0.4317883253097534, + "step": 3698 + }, + { + "epoch": 0.9431412544620091, + "grad_norm": 0.43983328342437744, + "learning_rate": 1.7633819729422907e-07, + "loss": 0.4401555061340332, + "step": 3699 + }, + { + "epoch": 0.9433962264150944, + "grad_norm": 0.42895379662513733, + "learning_rate": 1.7476484720662103e-07, + "loss": 0.4379884898662567, + "step": 3700 + }, + { + "epoch": 0.9436511983681795, + "grad_norm": 0.43615207076072693, + "learning_rate": 1.7319848571796805e-07, + "loss": 0.4482054114341736, + "step": 3701 + }, + { + "epoch": 0.9439061703212647, + "grad_norm": 0.4342696964740753, + "learning_rate": 1.7163911394240675e-07, + "loss": 0.42798078060150146, + "step": 3702 + }, + { + "epoch": 0.9441611422743498, + "grad_norm": 0.4399515390396118, + "learning_rate": 1.700867329891043e-07, + "loss": 0.43327248096466064, + "step": 3703 + }, + { + "epoch": 0.944416114227435, + "grad_norm": 0.44510048627853394, + "learning_rate": 1.6854134396225407e-07, + "loss": 0.4332675337791443, + "step": 3704 + }, + { + "epoch": 0.9446710861805201, + "grad_norm": 0.4416811764240265, + "learning_rate": 1.6700294796107907e-07, + "loss": 0.44257673621177673, + "step": 3705 + }, + { + "epoch": 0.9449260581336053, + "grad_norm": 0.44168102741241455, + "learning_rate": 1.6547154607982508e-07, + "loss": 0.4348487854003906, + "step": 3706 + }, + { + "epoch": 0.9451810300866904, + "grad_norm": 0.43375131487846375, + "learning_rate": 1.6394713940776296e-07, + "loss": 0.4283615052700043, + "step": 3707 + }, + { + "epoch": 0.9454360020397756, + "grad_norm": 0.42909738421440125, + "learning_rate": 1.6242972902919095e-07, + "loss": 0.43982595205307007, + "step": 3708 + }, + { + "epoch": 0.9456909739928608, + "grad_norm": 0.45628172159194946, + "learning_rate": 1.6091931602342903e-07, + "loss": 0.42930155992507935, + "step": 3709 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 0.4570588171482086, + "learning_rate": 1.594159014648178e-07, + "loss": 0.4148539900779724, + "step": 3710 + }, + { + "epoch": 0.9462009178990312, + "grad_norm": 0.4399675130844116, + "learning_rate": 1.5791948642272404e-07, + "loss": 0.4340864419937134, + "step": 3711 + }, + { + "epoch": 0.9464558898521163, + "grad_norm": 0.439888060092926, + "learning_rate": 1.5643007196153303e-07, + "loss": 0.43424567580223083, + "step": 3712 + }, + { + "epoch": 0.9467108618052015, + "grad_norm": 0.42277246713638306, + "learning_rate": 1.5494765914065292e-07, + "loss": 0.4288041591644287, + "step": 3713 + }, + { + "epoch": 0.9469658337582866, + "grad_norm": 0.42644914984703064, + "learning_rate": 1.5347224901450908e-07, + "loss": 0.4338779151439667, + "step": 3714 + }, + { + "epoch": 0.9472208057113718, + "grad_norm": 0.5191669464111328, + "learning_rate": 1.5200384263254763e-07, + "loss": 0.4319589138031006, + "step": 3715 + }, + { + "epoch": 0.9474757776644569, + "grad_norm": 0.443218469619751, + "learning_rate": 1.5054244103923087e-07, + "loss": 0.432138055562973, + "step": 3716 + }, + { + "epoch": 0.9477307496175421, + "grad_norm": 0.44982388615608215, + "learning_rate": 1.4908804527404287e-07, + "loss": 0.4303247928619385, + "step": 3717 + }, + { + "epoch": 0.9479857215706272, + "grad_norm": 0.454996258020401, + "learning_rate": 1.476406563714794e-07, + "loss": 0.4347943663597107, + "step": 3718 + }, + { + "epoch": 0.9482406935237124, + "grad_norm": 0.43886685371398926, + "learning_rate": 1.4620027536105697e-07, + "loss": 0.43539103865623474, + "step": 3719 + }, + { + "epoch": 0.9484956654767975, + "grad_norm": 0.4551427960395813, + "learning_rate": 1.447669032673027e-07, + "loss": 0.4332054853439331, + "step": 3720 + }, + { + "epoch": 0.9487506374298827, + "grad_norm": 0.4315807819366455, + "learning_rate": 1.4334054110976437e-07, + "loss": 0.4265736937522888, + "step": 3721 + }, + { + "epoch": 0.9490056093829679, + "grad_norm": 0.43956583738327026, + "learning_rate": 1.419211899029971e-07, + "loss": 0.42740926146507263, + "step": 3722 + }, + { + "epoch": 0.9492605813360531, + "grad_norm": 0.4354575276374817, + "learning_rate": 1.4050885065657326e-07, + "loss": 0.4265577793121338, + "step": 3723 + }, + { + "epoch": 0.9495155532891382, + "grad_norm": 0.4236654043197632, + "learning_rate": 1.3910352437507711e-07, + "loss": 0.435396671295166, + "step": 3724 + }, + { + "epoch": 0.9497705252422234, + "grad_norm": 0.4544011056423187, + "learning_rate": 1.377052120581046e-07, + "loss": 0.4167858064174652, + "step": 3725 + }, + { + "epoch": 0.9500254971953085, + "grad_norm": 0.43282467126846313, + "learning_rate": 1.3631391470026124e-07, + "loss": 0.41828426718711853, + "step": 3726 + }, + { + "epoch": 0.9502804691483937, + "grad_norm": 0.4354334771633148, + "learning_rate": 1.3492963329116538e-07, + "loss": 0.4377305507659912, + "step": 3727 + }, + { + "epoch": 0.9505354411014788, + "grad_norm": 0.4522036910057068, + "learning_rate": 1.3355236881544276e-07, + "loss": 0.44088101387023926, + "step": 3728 + }, + { + "epoch": 0.950790413054564, + "grad_norm": 0.4242122173309326, + "learning_rate": 1.3218212225272753e-07, + "loss": 0.43219560384750366, + "step": 3729 + }, + { + "epoch": 0.9510453850076491, + "grad_norm": 0.43885210156440735, + "learning_rate": 1.3081889457766672e-07, + "loss": 0.43644246459007263, + "step": 3730 + }, + { + "epoch": 0.9513003569607343, + "grad_norm": 0.42883220314979553, + "learning_rate": 1.2946268675990804e-07, + "loss": 0.43907150626182556, + "step": 3731 + }, + { + "epoch": 0.9515553289138194, + "grad_norm": 0.43720653653144836, + "learning_rate": 1.2811349976411204e-07, + "loss": 0.43129271268844604, + "step": 3732 + }, + { + "epoch": 0.9518103008669047, + "grad_norm": 0.4418365955352783, + "learning_rate": 1.2677133454994105e-07, + "loss": 0.4380974769592285, + "step": 3733 + }, + { + "epoch": 0.9520652728199898, + "grad_norm": 0.4314889907836914, + "learning_rate": 1.2543619207206704e-07, + "loss": 0.4347606599330902, + "step": 3734 + }, + { + "epoch": 0.952320244773075, + "grad_norm": 0.4545467793941498, + "learning_rate": 1.241080732801625e-07, + "loss": 0.43105897307395935, + "step": 3735 + }, + { + "epoch": 0.9525752167261601, + "grad_norm": 0.4504272937774658, + "learning_rate": 1.2278697911890625e-07, + "loss": 0.442145437002182, + "step": 3736 + }, + { + "epoch": 0.9528301886792453, + "grad_norm": 0.431334525346756, + "learning_rate": 1.2147291052798217e-07, + "loss": 0.4376698136329651, + "step": 3737 + }, + { + "epoch": 0.9530851606323304, + "grad_norm": 0.42173293232917786, + "learning_rate": 1.2016586844207368e-07, + "loss": 0.4280625283718109, + "step": 3738 + }, + { + "epoch": 0.9533401325854156, + "grad_norm": 0.4419265687465668, + "learning_rate": 1.1886585379086713e-07, + "loss": 0.4444487988948822, + "step": 3739 + }, + { + "epoch": 0.9535951045385007, + "grad_norm": 0.42979907989501953, + "learning_rate": 1.1757286749905172e-07, + "loss": 0.43843722343444824, + "step": 3740 + }, + { + "epoch": 0.9538500764915859, + "grad_norm": 0.43650469183921814, + "learning_rate": 1.1628691048631735e-07, + "loss": 0.41674113273620605, + "step": 3741 + }, + { + "epoch": 0.954105048444671, + "grad_norm": 0.5075280666351318, + "learning_rate": 1.1500798366735234e-07, + "loss": 0.4313585162162781, + "step": 3742 + }, + { + "epoch": 0.9543600203977562, + "grad_norm": 0.45171964168548584, + "learning_rate": 1.1373608795184566e-07, + "loss": 0.43857893347740173, + "step": 3743 + }, + { + "epoch": 0.9546149923508415, + "grad_norm": 0.4379873275756836, + "learning_rate": 1.1247122424448586e-07, + "loss": 0.4406069219112396, + "step": 3744 + }, + { + "epoch": 0.9548699643039266, + "grad_norm": 0.4228276312351227, + "learning_rate": 1.1121339344495663e-07, + "loss": 0.43608206510543823, + "step": 3745 + }, + { + "epoch": 0.9551249362570118, + "grad_norm": 0.4296625554561615, + "learning_rate": 1.0996259644794449e-07, + "loss": 0.433504581451416, + "step": 3746 + }, + { + "epoch": 0.9553799082100969, + "grad_norm": 0.4238683879375458, + "learning_rate": 1.0871883414312778e-07, + "loss": 0.4223134517669678, + "step": 3747 + }, + { + "epoch": 0.9556348801631821, + "grad_norm": 0.5850834846496582, + "learning_rate": 1.0748210741518217e-07, + "loss": 0.43754324316978455, + "step": 3748 + }, + { + "epoch": 0.9558898521162672, + "grad_norm": 0.4604136049747467, + "learning_rate": 1.0625241714378287e-07, + "loss": 0.41852694749832153, + "step": 3749 + }, + { + "epoch": 0.9561448240693524, + "grad_norm": 0.42672300338745117, + "learning_rate": 1.0502976420359468e-07, + "loss": 0.4334542155265808, + "step": 3750 + }, + { + "epoch": 0.9563997960224375, + "grad_norm": 0.43828144669532776, + "learning_rate": 1.0381414946428081e-07, + "loss": 0.42628201842308044, + "step": 3751 + }, + { + "epoch": 0.9566547679755227, + "grad_norm": 0.4435332119464874, + "learning_rate": 1.0260557379049518e-07, + "loss": 0.42938369512557983, + "step": 3752 + }, + { + "epoch": 0.9569097399286078, + "grad_norm": 0.4323669373989105, + "learning_rate": 1.0140403804188903e-07, + "loss": 0.43224120140075684, + "step": 3753 + }, + { + "epoch": 0.957164711881693, + "grad_norm": 0.46962302923202515, + "learning_rate": 1.0020954307310205e-07, + "loss": 0.44025421142578125, + "step": 3754 + }, + { + "epoch": 0.9574196838347782, + "grad_norm": 0.43256840109825134, + "learning_rate": 9.902208973376682e-08, + "loss": 0.43730872869491577, + "step": 3755 + }, + { + "epoch": 0.9576746557878634, + "grad_norm": 0.4514023959636688, + "learning_rate": 9.784167886850881e-08, + "loss": 0.442075252532959, + "step": 3756 + }, + { + "epoch": 0.9579296277409485, + "grad_norm": 0.4285776615142822, + "learning_rate": 9.66683113169431e-08, + "loss": 0.43864166736602783, + "step": 3757 + }, + { + "epoch": 0.9581845996940337, + "grad_norm": 0.4442579746246338, + "learning_rate": 9.550198791367538e-08, + "loss": 0.440030574798584, + "step": 3758 + }, + { + "epoch": 0.9584395716471188, + "grad_norm": 0.43228745460510254, + "learning_rate": 9.434270948830093e-08, + "loss": 0.44247886538505554, + "step": 3759 + }, + { + "epoch": 0.958694543600204, + "grad_norm": 0.42962801456451416, + "learning_rate": 9.31904768654035e-08, + "loss": 0.4262453019618988, + "step": 3760 + }, + { + "epoch": 0.9589495155532891, + "grad_norm": 0.43366146087646484, + "learning_rate": 9.204529086455527e-08, + "loss": 0.433780312538147, + "step": 3761 + }, + { + "epoch": 0.9592044875063743, + "grad_norm": 0.4430442750453949, + "learning_rate": 9.090715230031688e-08, + "loss": 0.4226398169994354, + "step": 3762 + }, + { + "epoch": 0.9594594594594594, + "grad_norm": 0.4477764964103699, + "learning_rate": 8.977606198223521e-08, + "loss": 0.4350840747356415, + "step": 3763 + }, + { + "epoch": 0.9597144314125446, + "grad_norm": 0.4348631203174591, + "learning_rate": 8.865202071484558e-08, + "loss": 0.43202561140060425, + "step": 3764 + }, + { + "epoch": 0.9599694033656297, + "grad_norm": 0.44474688172340393, + "learning_rate": 8.753502929766622e-08, + "loss": 0.421417236328125, + "step": 3765 + }, + { + "epoch": 0.960224375318715, + "grad_norm": 0.43490901589393616, + "learning_rate": 8.642508852520492e-08, + "loss": 0.42618608474731445, + "step": 3766 + }, + { + "epoch": 0.9604793472718001, + "grad_norm": 0.4435284435749054, + "learning_rate": 8.532219918695128e-08, + "loss": 0.42826855182647705, + "step": 3767 + }, + { + "epoch": 0.9607343192248853, + "grad_norm": 0.4380919635295868, + "learning_rate": 8.422636206737888e-08, + "loss": 0.4380806088447571, + "step": 3768 + }, + { + "epoch": 0.9609892911779704, + "grad_norm": 0.4442375898361206, + "learning_rate": 8.313757794594867e-08, + "loss": 0.44663724303245544, + "step": 3769 + }, + { + "epoch": 0.9612442631310556, + "grad_norm": 0.42946797609329224, + "learning_rate": 8.205584759710228e-08, + "loss": 0.42948731780052185, + "step": 3770 + }, + { + "epoch": 0.9614992350841407, + "grad_norm": 0.45053452253341675, + "learning_rate": 8.09811717902631e-08, + "loss": 0.4317397475242615, + "step": 3771 + }, + { + "epoch": 0.9617542070372259, + "grad_norm": 0.4437471330165863, + "learning_rate": 7.99135512898408e-08, + "loss": 0.4413641393184662, + "step": 3772 + }, + { + "epoch": 0.962009178990311, + "grad_norm": 0.44477081298828125, + "learning_rate": 7.885298685522235e-08, + "loss": 0.44333338737487793, + "step": 3773 + }, + { + "epoch": 0.9622641509433962, + "grad_norm": 0.4432341456413269, + "learning_rate": 7.779947924077658e-08, + "loss": 0.4412536025047302, + "step": 3774 + }, + { + "epoch": 0.9625191228964813, + "grad_norm": 0.4365348517894745, + "learning_rate": 7.675302919585514e-08, + "loss": 0.43959423899650574, + "step": 3775 + }, + { + "epoch": 0.9627740948495666, + "grad_norm": 0.43227964639663696, + "learning_rate": 7.57136374647871e-08, + "loss": 0.4292386472225189, + "step": 3776 + }, + { + "epoch": 0.9630290668026517, + "grad_norm": 0.4467385411262512, + "learning_rate": 7.468130478688218e-08, + "loss": 0.44225990772247314, + "step": 3777 + }, + { + "epoch": 0.9632840387557369, + "grad_norm": 0.4632147252559662, + "learning_rate": 7.365603189643078e-08, + "loss": 0.4331645369529724, + "step": 3778 + }, + { + "epoch": 0.963539010708822, + "grad_norm": 0.4216609001159668, + "learning_rate": 7.263781952269954e-08, + "loss": 0.43343281745910645, + "step": 3779 + }, + { + "epoch": 0.9637939826619072, + "grad_norm": 0.45064857602119446, + "learning_rate": 7.162666838993249e-08, + "loss": 0.4255315065383911, + "step": 3780 + }, + { + "epoch": 0.9640489546149924, + "grad_norm": 0.4347366392612457, + "learning_rate": 7.062257921735205e-08, + "loss": 0.4380302429199219, + "step": 3781 + }, + { + "epoch": 0.9643039265680775, + "grad_norm": 0.4600411057472229, + "learning_rate": 6.962555271915806e-08, + "loss": 0.4289246201515198, + "step": 3782 + }, + { + "epoch": 0.9645588985211627, + "grad_norm": 0.44208458065986633, + "learning_rate": 6.863558960452654e-08, + "loss": 0.4249235987663269, + "step": 3783 + }, + { + "epoch": 0.9648138704742478, + "grad_norm": 0.44760599732398987, + "learning_rate": 6.76526905776087e-08, + "loss": 0.4318837821483612, + "step": 3784 + }, + { + "epoch": 0.965068842427333, + "grad_norm": 0.43746480345726013, + "learning_rate": 6.667685633753196e-08, + "loss": 0.4212871491909027, + "step": 3785 + }, + { + "epoch": 0.9653238143804181, + "grad_norm": 0.4536183178424835, + "learning_rate": 6.570808757839775e-08, + "loss": 0.42356353998184204, + "step": 3786 + }, + { + "epoch": 0.9655787863335034, + "grad_norm": 0.42770835757255554, + "learning_rate": 6.474638498928265e-08, + "loss": 0.4301995038986206, + "step": 3787 + }, + { + "epoch": 0.9658337582865885, + "grad_norm": 0.4357539713382721, + "learning_rate": 6.379174925423614e-08, + "loss": 0.4319273829460144, + "step": 3788 + }, + { + "epoch": 0.9660887302396737, + "grad_norm": 0.46971604228019714, + "learning_rate": 6.284418105228284e-08, + "loss": 0.4219222068786621, + "step": 3789 + }, + { + "epoch": 0.9663437021927588, + "grad_norm": 0.45036160945892334, + "learning_rate": 6.190368105741806e-08, + "loss": 0.41687309741973877, + "step": 3790 + }, + { + "epoch": 0.966598674145844, + "grad_norm": 0.4009671211242676, + "learning_rate": 6.097024993861112e-08, + "loss": 0.43720635771751404, + "step": 3791 + }, + { + "epoch": 0.9668536460989291, + "grad_norm": 0.42961275577545166, + "learning_rate": 6.004388835980424e-08, + "loss": 0.4499396085739136, + "step": 3792 + }, + { + "epoch": 0.9671086180520143, + "grad_norm": 0.45251771807670593, + "learning_rate": 5.9124596979908136e-08, + "loss": 0.45437192916870117, + "step": 3793 + }, + { + "epoch": 0.9673635900050994, + "grad_norm": 0.4467000365257263, + "learning_rate": 5.821237645280753e-08, + "loss": 0.43350905179977417, + "step": 3794 + }, + { + "epoch": 0.9676185619581846, + "grad_norm": 0.44402676820755005, + "learning_rate": 5.730722742735562e-08, + "loss": 0.42394256591796875, + "step": 3795 + }, + { + "epoch": 0.9678735339112697, + "grad_norm": 0.42971062660217285, + "learning_rate": 5.640915054737739e-08, + "loss": 0.43170222640037537, + "step": 3796 + }, + { + "epoch": 0.968128505864355, + "grad_norm": 0.46525314450263977, + "learning_rate": 5.55181464516652e-08, + "loss": 0.4343894422054291, + "step": 3797 + }, + { + "epoch": 0.96838347781744, + "grad_norm": 0.425347238779068, + "learning_rate": 5.46342157739832e-08, + "loss": 0.4369485378265381, + "step": 3798 + }, + { + "epoch": 0.9686384497705253, + "grad_norm": 0.447989284992218, + "learning_rate": 5.375735914306291e-08, + "loss": 0.4052256941795349, + "step": 3799 + }, + { + "epoch": 0.9688934217236104, + "grad_norm": 0.4680423438549042, + "learning_rate": 5.288757718260429e-08, + "loss": 0.4462357759475708, + "step": 3800 + }, + { + "epoch": 0.9691483936766956, + "grad_norm": 0.4516068696975708, + "learning_rate": 5.2024870511274694e-08, + "loss": 0.43032628297805786, + "step": 3801 + }, + { + "epoch": 0.9694033656297807, + "grad_norm": 0.4310958683490753, + "learning_rate": 5.116923974270993e-08, + "loss": 0.41478651762008667, + "step": 3802 + }, + { + "epoch": 0.9696583375828659, + "grad_norm": 0.41717851161956787, + "learning_rate": 5.032068548551205e-08, + "loss": 0.42103320360183716, + "step": 3803 + }, + { + "epoch": 0.969913309535951, + "grad_norm": 0.44686272740364075, + "learning_rate": 4.947920834325048e-08, + "loss": 0.42334866523742676, + "step": 3804 + }, + { + "epoch": 0.9701682814890362, + "grad_norm": 0.43319904804229736, + "learning_rate": 4.8644808914459776e-08, + "loss": 0.44510579109191895, + "step": 3805 + }, + { + "epoch": 0.9704232534421213, + "grad_norm": 0.42840850353240967, + "learning_rate": 4.781748779263962e-08, + "loss": 0.42885157465934753, + "step": 3806 + }, + { + "epoch": 0.9706782253952065, + "grad_norm": 0.41729164123535156, + "learning_rate": 4.6997245566257066e-08, + "loss": 0.43627625703811646, + "step": 3807 + }, + { + "epoch": 0.9709331973482916, + "grad_norm": 0.5475233197212219, + "learning_rate": 4.618408281874209e-08, + "loss": 0.4385756254196167, + "step": 3808 + }, + { + "epoch": 0.9711881693013769, + "grad_norm": 0.4557948708534241, + "learning_rate": 4.537800012849092e-08, + "loss": 0.42454612255096436, + "step": 3809 + }, + { + "epoch": 0.971443141254462, + "grad_norm": 0.42507830262184143, + "learning_rate": 4.4578998068861566e-08, + "loss": 0.4411567151546478, + "step": 3810 + }, + { + "epoch": 0.9716981132075472, + "grad_norm": 0.4329739511013031, + "learning_rate": 4.378707720817721e-08, + "loss": 0.44114264845848083, + "step": 3811 + }, + { + "epoch": 0.9719530851606323, + "grad_norm": 0.4257124960422516, + "learning_rate": 4.3002238109723927e-08, + "loss": 0.43023261427879333, + "step": 3812 + }, + { + "epoch": 0.9722080571137175, + "grad_norm": 0.4460616111755371, + "learning_rate": 4.2224481331750723e-08, + "loss": 0.4382517337799072, + "step": 3813 + }, + { + "epoch": 0.9724630290668026, + "grad_norm": 0.4248466491699219, + "learning_rate": 4.1453807427467297e-08, + "loss": 0.4303026795387268, + "step": 3814 + }, + { + "epoch": 0.9727180010198878, + "grad_norm": 0.42271357774734497, + "learning_rate": 4.06902169450496e-08, + "loss": 0.42649969458580017, + "step": 3815 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 0.42175567150115967, + "learning_rate": 3.9933710427629837e-08, + "loss": 0.44406718015670776, + "step": 3816 + }, + { + "epoch": 0.9732279449260581, + "grad_norm": 0.439832866191864, + "learning_rate": 3.918428841330646e-08, + "loss": 0.4441678524017334, + "step": 3817 + }, + { + "epoch": 0.9734829168791433, + "grad_norm": 0.4424952566623688, + "learning_rate": 3.844195143513418e-08, + "loss": 0.4407542645931244, + "step": 3818 + }, + { + "epoch": 0.9737378888322284, + "grad_norm": 0.4503242075443268, + "learning_rate": 3.770670002113175e-08, + "loss": 0.44105467200279236, + "step": 3819 + }, + { + "epoch": 0.9739928607853137, + "grad_norm": 0.43343716859817505, + "learning_rate": 3.697853469427637e-08, + "loss": 0.4408354163169861, + "step": 3820 + }, + { + "epoch": 0.9742478327383988, + "grad_norm": 0.44179418683052063, + "learning_rate": 3.6257455972505964e-08, + "loss": 0.42275065183639526, + "step": 3821 + }, + { + "epoch": 0.974502804691484, + "grad_norm": 0.4276719391345978, + "learning_rate": 3.554346436871581e-08, + "loss": 0.44853538274765015, + "step": 3822 + }, + { + "epoch": 0.9747577766445691, + "grad_norm": 0.4315888285636902, + "learning_rate": 3.4836560390762995e-08, + "loss": 0.43066248297691345, + "step": 3823 + }, + { + "epoch": 0.9750127485976543, + "grad_norm": 0.4345954358577728, + "learning_rate": 3.4136744541459764e-08, + "loss": 0.4355141818523407, + "step": 3824 + }, + { + "epoch": 0.9752677205507394, + "grad_norm": 0.43663638830184937, + "learning_rate": 3.344401731858127e-08, + "loss": 0.42832696437835693, + "step": 3825 + }, + { + "epoch": 0.9755226925038246, + "grad_norm": 0.4864625632762909, + "learning_rate": 3.2758379214855584e-08, + "loss": 0.436530202627182, + "step": 3826 + }, + { + "epoch": 0.9757776644569097, + "grad_norm": 0.42900747060775757, + "learning_rate": 3.207983071797261e-08, + "loss": 0.44682687520980835, + "step": 3827 + }, + { + "epoch": 0.9760326364099949, + "grad_norm": 0.44420379400253296, + "learning_rate": 3.140837231057625e-08, + "loss": 0.41383880376815796, + "step": 3828 + }, + { + "epoch": 0.97628760836308, + "grad_norm": 0.44567447900772095, + "learning_rate": 3.074400447027004e-08, + "loss": 0.4374455213546753, + "step": 3829 + }, + { + "epoch": 0.9765425803161653, + "grad_norm": 0.4449840486049652, + "learning_rate": 3.008672766961151e-08, + "loss": 0.42075401544570923, + "step": 3830 + }, + { + "epoch": 0.9767975522692504, + "grad_norm": 0.44207072257995605, + "learning_rate": 2.9436542376116706e-08, + "loss": 0.42974042892456055, + "step": 3831 + }, + { + "epoch": 0.9770525242223356, + "grad_norm": 0.41998976469039917, + "learning_rate": 2.8793449052254564e-08, + "loss": 0.4292892813682556, + "step": 3832 + }, + { + "epoch": 0.9773074961754207, + "grad_norm": 0.45919719338417053, + "learning_rate": 2.8157448155452515e-08, + "loss": 0.43220004439353943, + "step": 3833 + }, + { + "epoch": 0.9775624681285059, + "grad_norm": 0.5310207605361938, + "learning_rate": 2.752854013809314e-08, + "loss": 0.4327763020992279, + "step": 3834 + }, + { + "epoch": 0.977817440081591, + "grad_norm": 0.4287780523300171, + "learning_rate": 2.6906725447510828e-08, + "loss": 0.4327196478843689, + "step": 3835 + }, + { + "epoch": 0.9780724120346762, + "grad_norm": 0.4250393509864807, + "learning_rate": 2.629200452599845e-08, + "loss": 0.4387291669845581, + "step": 3836 + }, + { + "epoch": 0.9783273839877613, + "grad_norm": 0.41067278385162354, + "learning_rate": 2.5684377810800687e-08, + "loss": 0.44172751903533936, + "step": 3837 + }, + { + "epoch": 0.9785823559408465, + "grad_norm": 0.46978065371513367, + "learning_rate": 2.5083845734116264e-08, + "loss": 0.4297478199005127, + "step": 3838 + }, + { + "epoch": 0.9788373278939316, + "grad_norm": 0.43518373370170593, + "learning_rate": 2.4490408723097936e-08, + "loss": 0.43526455760002136, + "step": 3839 + }, + { + "epoch": 0.9790922998470168, + "grad_norm": 0.4430595338344574, + "learning_rate": 2.3904067199853607e-08, + "loss": 0.44849714636802673, + "step": 3840 + }, + { + "epoch": 0.979347271800102, + "grad_norm": 0.5210933089256287, + "learning_rate": 2.332482158144078e-08, + "loss": 0.4486989974975586, + "step": 3841 + }, + { + "epoch": 0.9796022437531872, + "grad_norm": 0.45702749490737915, + "learning_rate": 2.2752672279873212e-08, + "loss": 0.4192723035812378, + "step": 3842 + }, + { + "epoch": 0.9798572157062723, + "grad_norm": 0.4321991205215454, + "learning_rate": 2.2187619702113138e-08, + "loss": 0.4389612078666687, + "step": 3843 + }, + { + "epoch": 0.9801121876593575, + "grad_norm": 0.435012549161911, + "learning_rate": 2.1629664250080172e-08, + "loss": 0.43103253841400146, + "step": 3844 + }, + { + "epoch": 0.9803671596124426, + "grad_norm": 0.4384916424751282, + "learning_rate": 2.1078806320640187e-08, + "loss": 0.42653101682662964, + "step": 3845 + }, + { + "epoch": 0.9806221315655278, + "grad_norm": 0.42758908867836, + "learning_rate": 2.0535046305614202e-08, + "loss": 0.42084115743637085, + "step": 3846 + }, + { + "epoch": 0.9808771035186129, + "grad_norm": 0.4485051929950714, + "learning_rate": 1.9998384591773945e-08, + "loss": 0.4244152009487152, + "step": 3847 + }, + { + "epoch": 0.9811320754716981, + "grad_norm": 0.43234965205192566, + "learning_rate": 1.9468821560841845e-08, + "loss": 0.43603652715682983, + "step": 3848 + }, + { + "epoch": 0.9813870474247832, + "grad_norm": 0.4192225933074951, + "learning_rate": 1.8946357589492147e-08, + "loss": 0.42890340089797974, + "step": 3849 + }, + { + "epoch": 0.9816420193778684, + "grad_norm": 0.4449460804462433, + "learning_rate": 1.843099304934759e-08, + "loss": 0.4340965151786804, + "step": 3850 + }, + { + "epoch": 0.9818969913309537, + "grad_norm": 0.46211758255958557, + "learning_rate": 1.792272830698161e-08, + "loss": 0.43783053755760193, + "step": 3851 + }, + { + "epoch": 0.9821519632840388, + "grad_norm": 0.41678836941719055, + "learning_rate": 1.7421563723919453e-08, + "loss": 0.43343478441238403, + "step": 3852 + }, + { + "epoch": 0.982406935237124, + "grad_norm": 0.4770534336566925, + "learning_rate": 1.692749965663376e-08, + "loss": 0.42233383655548096, + "step": 3853 + }, + { + "epoch": 0.9826619071902091, + "grad_norm": 0.44247758388519287, + "learning_rate": 1.6440536456547863e-08, + "loss": 0.4290817975997925, + "step": 3854 + }, + { + "epoch": 0.9829168791432943, + "grad_norm": 0.4584869146347046, + "learning_rate": 1.5960674470035796e-08, + "loss": 0.43462952971458435, + "step": 3855 + }, + { + "epoch": 0.9831718510963794, + "grad_norm": 0.42376816272735596, + "learning_rate": 1.5487914038417873e-08, + "loss": 0.4478336274623871, + "step": 3856 + }, + { + "epoch": 0.9834268230494646, + "grad_norm": 0.5084519386291504, + "learning_rate": 1.502225549796288e-08, + "loss": 0.4331415593624115, + "step": 3857 + }, + { + "epoch": 0.9836817950025497, + "grad_norm": 0.4290451109409332, + "learning_rate": 1.4563699179892532e-08, + "loss": 0.4360179305076599, + "step": 3858 + }, + { + "epoch": 0.9839367669556349, + "grad_norm": 0.44008415937423706, + "learning_rate": 1.4112245410370373e-08, + "loss": 0.4326023459434509, + "step": 3859 + }, + { + "epoch": 0.98419173890872, + "grad_norm": 0.4248690605163574, + "learning_rate": 1.366789451051287e-08, + "loss": 0.4331281781196594, + "step": 3860 + }, + { + "epoch": 0.9844467108618052, + "grad_norm": 0.43502941727638245, + "learning_rate": 1.3230646796383862e-08, + "loss": 0.4257779121398926, + "step": 3861 + }, + { + "epoch": 0.9847016828148903, + "grad_norm": 0.4281660318374634, + "learning_rate": 1.2800502578991236e-08, + "loss": 0.43861016631126404, + "step": 3862 + }, + { + "epoch": 0.9849566547679756, + "grad_norm": 0.4268348813056946, + "learning_rate": 1.2377462164294695e-08, + "loss": 0.4324760138988495, + "step": 3863 + }, + { + "epoch": 0.9852116267210607, + "grad_norm": 0.42239123582839966, + "learning_rate": 1.1961525853196876e-08, + "loss": 0.4312841296195984, + "step": 3864 + }, + { + "epoch": 0.9854665986741459, + "grad_norm": 0.43889138102531433, + "learning_rate": 1.1552693941551118e-08, + "loss": 0.4337885081768036, + "step": 3865 + }, + { + "epoch": 0.985721570627231, + "grad_norm": 0.43608731031417847, + "learning_rate": 1.115096672015481e-08, + "loss": 0.4339744448661804, + "step": 3866 + }, + { + "epoch": 0.9859765425803162, + "grad_norm": 0.41787976026535034, + "learning_rate": 1.0756344474753822e-08, + "loss": 0.4270380735397339, + "step": 3867 + }, + { + "epoch": 0.9862315145334013, + "grad_norm": 0.4472630023956299, + "learning_rate": 1.036882748603918e-08, + "loss": 0.43142664432525635, + "step": 3868 + }, + { + "epoch": 0.9864864864864865, + "grad_norm": 0.42906099557876587, + "learning_rate": 9.988416029647063e-09, + "loss": 0.43397778272628784, + "step": 3869 + }, + { + "epoch": 0.9867414584395716, + "grad_norm": 0.4523125886917114, + "learning_rate": 9.61511037616103e-09, + "loss": 0.4238213896751404, + "step": 3870 + }, + { + "epoch": 0.9869964303926568, + "grad_norm": 0.4824058711528778, + "learning_rate": 9.248910791109789e-09, + "loss": 0.43059441447257996, + "step": 3871 + }, + { + "epoch": 0.9872514023457419, + "grad_norm": 0.4468144178390503, + "learning_rate": 8.889817534969425e-09, + "loss": 0.42374369502067566, + "step": 3872 + }, + { + "epoch": 0.9875063742988271, + "grad_norm": 0.4466455578804016, + "learning_rate": 8.537830863157848e-09, + "loss": 0.43019899725914, + "step": 3873 + }, + { + "epoch": 0.9877613462519123, + "grad_norm": 0.4248717427253723, + "learning_rate": 8.19295102603923e-09, + "loss": 0.4276416301727295, + "step": 3874 + }, + { + "epoch": 0.9880163182049975, + "grad_norm": 0.4355369508266449, + "learning_rate": 7.855178268926233e-09, + "loss": 0.44368404150009155, + "step": 3875 + }, + { + "epoch": 0.9882712901580826, + "grad_norm": 0.4394513964653015, + "learning_rate": 7.524512832071117e-09, + "loss": 0.42616432905197144, + "step": 3876 + }, + { + "epoch": 0.9885262621111678, + "grad_norm": 0.45191922783851624, + "learning_rate": 7.200954950673522e-09, + "loss": 0.4272156357765198, + "step": 3877 + }, + { + "epoch": 0.9887812340642529, + "grad_norm": 0.42931079864501953, + "learning_rate": 6.8845048548782375e-09, + "loss": 0.42737168073654175, + "step": 3878 + }, + { + "epoch": 0.9890362060173381, + "grad_norm": 0.46329110860824585, + "learning_rate": 6.575162769771881e-09, + "loss": 0.4369872510433197, + "step": 3879 + }, + { + "epoch": 0.9892911779704232, + "grad_norm": 0.42689400911331177, + "learning_rate": 6.272928915387333e-09, + "loss": 0.44038841128349304, + "step": 3880 + }, + { + "epoch": 0.9895461499235084, + "grad_norm": 0.4330800771713257, + "learning_rate": 5.977803506701518e-09, + "loss": 0.42824047803878784, + "step": 3881 + }, + { + "epoch": 0.9898011218765935, + "grad_norm": 0.4149211645126343, + "learning_rate": 5.689786753633186e-09, + "loss": 0.4450784921646118, + "step": 3882 + }, + { + "epoch": 0.9900560938296787, + "grad_norm": 0.4341184198856354, + "learning_rate": 5.40887886104624e-09, + "loss": 0.4351019859313965, + "step": 3883 + }, + { + "epoch": 0.9903110657827638, + "grad_norm": 0.46087369322776794, + "learning_rate": 5.135080028748629e-09, + "loss": 0.4396669566631317, + "step": 3884 + }, + { + "epoch": 0.9905660377358491, + "grad_norm": 0.4476439654827118, + "learning_rate": 4.868390451490123e-09, + "loss": 0.4363985061645508, + "step": 3885 + }, + { + "epoch": 0.9908210096889342, + "grad_norm": 0.44064781069755554, + "learning_rate": 4.608810318964541e-09, + "loss": 0.42739638686180115, + "step": 3886 + }, + { + "epoch": 0.9910759816420194, + "grad_norm": 0.4554571211338043, + "learning_rate": 4.35633981580974e-09, + "loss": 0.4383185803890228, + "step": 3887 + }, + { + "epoch": 0.9913309535951046, + "grad_norm": 0.4218502640724182, + "learning_rate": 4.110979121604297e-09, + "loss": 0.4426953196525574, + "step": 3888 + }, + { + "epoch": 0.9915859255481897, + "grad_norm": 0.4349331557750702, + "learning_rate": 3.87272841087194e-09, + "loss": 0.4466049075126648, + "step": 3889 + }, + { + "epoch": 0.9918408975012749, + "grad_norm": 0.42966899275779724, + "learning_rate": 3.641587853077111e-09, + "loss": 0.43008658289909363, + "step": 3890 + }, + { + "epoch": 0.99209586945436, + "grad_norm": 0.4374047815799713, + "learning_rate": 3.4175576126305177e-09, + "loss": 0.4254233241081238, + "step": 3891 + }, + { + "epoch": 0.9923508414074452, + "grad_norm": 0.4157470464706421, + "learning_rate": 3.20063784888025e-09, + "loss": 0.4410827159881592, + "step": 3892 + }, + { + "epoch": 0.9926058133605303, + "grad_norm": 0.42432066798210144, + "learning_rate": 2.9908287161195537e-09, + "loss": 0.4353354871273041, + "step": 3893 + }, + { + "epoch": 0.9928607853136155, + "grad_norm": 0.5254224538803101, + "learning_rate": 2.788130363584607e-09, + "loss": 0.42760753631591797, + "step": 3894 + }, + { + "epoch": 0.9931157572667006, + "grad_norm": 0.4437570869922638, + "learning_rate": 2.5925429354534126e-09, + "loss": 0.4352593421936035, + "step": 3895 + }, + { + "epoch": 0.9933707292197859, + "grad_norm": 0.45760712027549744, + "learning_rate": 2.404066570843577e-09, + "loss": 0.4349639415740967, + "step": 3896 + }, + { + "epoch": 0.993625701172871, + "grad_norm": 0.4340839684009552, + "learning_rate": 2.222701403818972e-09, + "loss": 0.4380471110343933, + "step": 3897 + }, + { + "epoch": 0.9938806731259562, + "grad_norm": 0.4430701434612274, + "learning_rate": 2.048447563380851e-09, + "loss": 0.43017488718032837, + "step": 3898 + }, + { + "epoch": 0.9941356450790413, + "grad_norm": 0.43190470337867737, + "learning_rate": 1.881305173475623e-09, + "loss": 0.41849285364151, + "step": 3899 + }, + { + "epoch": 0.9943906170321265, + "grad_norm": 0.4917444884777069, + "learning_rate": 1.7212743529892996e-09, + "loss": 0.4396299123764038, + "step": 3900 + }, + { + "epoch": 0.9946455889852116, + "grad_norm": 0.4348951280117035, + "learning_rate": 1.5683552157508275e-09, + "loss": 0.4330969452857971, + "step": 3901 + }, + { + "epoch": 0.9949005609382968, + "grad_norm": 0.43384942412376404, + "learning_rate": 1.422547870530977e-09, + "loss": 0.4398225247859955, + "step": 3902 + }, + { + "epoch": 0.9951555328913819, + "grad_norm": 0.4424363374710083, + "learning_rate": 1.2838524210390113e-09, + "loss": 0.42855989933013916, + "step": 3903 + }, + { + "epoch": 0.9954105048444671, + "grad_norm": 0.4463779926300049, + "learning_rate": 1.1522689659293484e-09, + "loss": 0.4319448471069336, + "step": 3904 + }, + { + "epoch": 0.9956654767975522, + "grad_norm": 0.4403187036514282, + "learning_rate": 1.0277975987960098e-09, + "loss": 0.43797823786735535, + "step": 3905 + }, + { + "epoch": 0.9959204487506375, + "grad_norm": 0.43161749839782715, + "learning_rate": 9.104384081737305e-10, + "loss": 0.43103861808776855, + "step": 3906 + }, + { + "epoch": 0.9961754207037226, + "grad_norm": 0.44430285692214966, + "learning_rate": 8.001914775401798e-10, + "loss": 0.44083261489868164, + "step": 3907 + }, + { + "epoch": 0.9964303926568078, + "grad_norm": 0.4711357355117798, + "learning_rate": 6.970568853115201e-10, + "loss": 0.43897342681884766, + "step": 3908 + }, + { + "epoch": 0.9966853646098929, + "grad_norm": 0.4367968440055847, + "learning_rate": 6.010347048468479e-10, + "loss": 0.4301096200942993, + "step": 3909 + }, + { + "epoch": 0.9969403365629781, + "grad_norm": 0.4353834092617035, + "learning_rate": 5.121250044459736e-10, + "loss": 0.42462557554244995, + "step": 3910 + }, + { + "epoch": 0.9971953085160632, + "grad_norm": 0.4247950613498688, + "learning_rate": 4.3032784735053123e-10, + "loss": 0.42845088243484497, + "step": 3911 + }, + { + "epoch": 0.9974502804691484, + "grad_norm": 0.4372820258140564, + "learning_rate": 3.5564329174064826e-10, + "loss": 0.4321706295013428, + "step": 3912 + }, + { + "epoch": 0.9977052524222335, + "grad_norm": 0.46002548933029175, + "learning_rate": 2.8807139073827597e-10, + "loss": 0.43641120195388794, + "step": 3913 + }, + { + "epoch": 0.9979602243753187, + "grad_norm": 0.4415642321109772, + "learning_rate": 2.276121924094099e-10, + "loss": 0.4387682378292084, + "step": 3914 + }, + { + "epoch": 0.9982151963284038, + "grad_norm": 0.43121448159217834, + "learning_rate": 1.7426573975520834e-10, + "loss": 0.4379480481147766, + "step": 3915 + }, + { + "epoch": 0.998470168281489, + "grad_norm": 0.4421471059322357, + "learning_rate": 1.2803207072198398e-10, + "loss": 0.419895738363266, + "step": 3916 + }, + { + "epoch": 0.9987251402345741, + "grad_norm": 0.4430276155471802, + "learning_rate": 8.891121819565307e-11, + "loss": 0.43177294731140137, + "step": 3917 + }, + { + "epoch": 0.9989801121876594, + "grad_norm": 0.429384708404541, + "learning_rate": 5.690321000062504e-11, + "loss": 0.4321375787258148, + "step": 3918 + }, + { + "epoch": 0.9992350841407445, + "grad_norm": 0.41285955905914307, + "learning_rate": 3.200806890646391e-11, + "loss": 0.44204482436180115, + "step": 3919 + }, + { + "epoch": 0.9994900560938297, + "grad_norm": 0.42164909839630127, + "learning_rate": 1.4225812619006462e-11, + "loss": 0.4531802535057068, + "step": 3920 + }, + { + "epoch": 0.9997450280469148, + "grad_norm": 0.4804461896419525, + "learning_rate": 3.556453787023628e-12, + "loss": 0.43864184617996216, + "step": 3921 + }, + { + "epoch": 1.0, + "grad_norm": 0.46723073720932007, + "learning_rate": 0.0, + "loss": 0.42660099267959595, + "step": 3922 + } + ], + "logging_steps": 1, + "max_steps": 3922, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.4774551385835726e+20, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}