| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9991487623003845, |
| "eval_steps": 500, |
| "global_step": 2751, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010895842555075079, |
| "grad_norm": 2.5097851753234863, |
| "learning_rate": 1.9927299163940386e-05, |
| "loss": 0.51, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021791685110150158, |
| "grad_norm": 1.4778149127960205, |
| "learning_rate": 1.985459832788077e-05, |
| "loss": 0.0818, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03268752766522524, |
| "grad_norm": 1.0869137048721313, |
| "learning_rate": 1.978189749182116e-05, |
| "loss": 0.1052, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.043583370220300316, |
| "grad_norm": 1.0245683193206787, |
| "learning_rate": 1.970919665576154e-05, |
| "loss": 0.0508, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.054479212775375395, |
| "grad_norm": 3.580275535583496, |
| "learning_rate": 1.963649581970193e-05, |
| "loss": 0.0336, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06537505533045047, |
| "grad_norm": 13.708664894104004, |
| "learning_rate": 1.9563794983642313e-05, |
| "loss": 0.0247, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07627089788552556, |
| "grad_norm": 2.38472843170166, |
| "learning_rate": 1.9491094147582698e-05, |
| "loss": 0.0474, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08716674044060063, |
| "grad_norm": 2.3008601665496826, |
| "learning_rate": 1.9418393311523086e-05, |
| "loss": 0.0505, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09806258299567572, |
| "grad_norm": 0.9727557301521301, |
| "learning_rate": 1.9345692475463468e-05, |
| "loss": 0.0291, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10895842555075079, |
| "grad_norm": 0.017909426242113113, |
| "learning_rate": 1.9272991639403856e-05, |
| "loss": 0.0232, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11985426810582588, |
| "grad_norm": 0.13610009849071503, |
| "learning_rate": 1.920029080334424e-05, |
| "loss": 0.0679, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13075011066090095, |
| "grad_norm": 0.09146017581224442, |
| "learning_rate": 1.9127589967284625e-05, |
| "loss": 0.062, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14164595321597603, |
| "grad_norm": 3.696361541748047, |
| "learning_rate": 1.9054889131225013e-05, |
| "loss": 0.0515, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15254179577105112, |
| "grad_norm": 0.07528296858072281, |
| "learning_rate": 1.8982188295165395e-05, |
| "loss": 0.008, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16343763832612618, |
| "grad_norm": 0.3899368345737457, |
| "learning_rate": 1.8909487459105783e-05, |
| "loss": 0.0309, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17433348088120126, |
| "grad_norm": 0.5960955619812012, |
| "learning_rate": 1.8836786623046168e-05, |
| "loss": 0.0415, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18522932343627635, |
| "grad_norm": 0.027237065136432648, |
| "learning_rate": 1.8764085786986552e-05, |
| "loss": 0.0257, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.19612516599135144, |
| "grad_norm": 6.851381778717041, |
| "learning_rate": 1.8691384950926937e-05, |
| "loss": 0.0585, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2070210085464265, |
| "grad_norm": 1.518951416015625, |
| "learning_rate": 1.8618684114867322e-05, |
| "loss": 0.0173, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21791685110150158, |
| "grad_norm": 16.334980010986328, |
| "learning_rate": 1.854598327880771e-05, |
| "loss": 0.0892, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22881269365657667, |
| "grad_norm": 0.6327227354049683, |
| "learning_rate": 1.847328244274809e-05, |
| "loss": 0.0391, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23970853621165175, |
| "grad_norm": 0.026528311893343925, |
| "learning_rate": 1.840058160668848e-05, |
| "loss": 0.0316, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2506043787667268, |
| "grad_norm": 0.15849582850933075, |
| "learning_rate": 1.8327880770628864e-05, |
| "loss": 0.0306, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2615002213218019, |
| "grad_norm": 8.983612060546875, |
| "learning_rate": 1.825517993456925e-05, |
| "loss": 0.0252, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.272396063876877, |
| "grad_norm": 1.4300966262817383, |
| "learning_rate": 1.8182479098509634e-05, |
| "loss": 0.0307, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.28329190643195207, |
| "grad_norm": 0.19248631596565247, |
| "learning_rate": 1.810977826245002e-05, |
| "loss": 0.034, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29418774898702715, |
| "grad_norm": 0.0807420164346695, |
| "learning_rate": 1.8037077426390407e-05, |
| "loss": 0.0218, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.30508359154210224, |
| "grad_norm": 0.04030030593276024, |
| "learning_rate": 1.796437659033079e-05, |
| "loss": 0.0164, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.31597943409717727, |
| "grad_norm": 0.03919893503189087, |
| "learning_rate": 1.7891675754271176e-05, |
| "loss": 0.0207, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.32687527665225236, |
| "grad_norm": 0.9118878245353699, |
| "learning_rate": 1.781897491821156e-05, |
| "loss": 0.0254, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33777111920732744, |
| "grad_norm": 0.09405702352523804, |
| "learning_rate": 1.7746274082151945e-05, |
| "loss": 0.0072, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3486669617624025, |
| "grad_norm": 1.061004638671875, |
| "learning_rate": 1.7673573246092334e-05, |
| "loss": 0.0178, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3595628043174776, |
| "grad_norm": 0.35136711597442627, |
| "learning_rate": 1.7600872410032715e-05, |
| "loss": 0.0268, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3704586468725527, |
| "grad_norm": 0.33769288659095764, |
| "learning_rate": 1.7528171573973103e-05, |
| "loss": 0.0383, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3813544894276278, |
| "grad_norm": 1.448626160621643, |
| "learning_rate": 1.7455470737913488e-05, |
| "loss": 0.0214, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.39225033198270287, |
| "grad_norm": 1.096685767173767, |
| "learning_rate": 1.7382769901853873e-05, |
| "loss": 0.0442, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4031461745377779, |
| "grad_norm": 0.08582064509391785, |
| "learning_rate": 1.7310069065794257e-05, |
| "loss": 0.041, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.414042017092853, |
| "grad_norm": 0.5726041793823242, |
| "learning_rate": 1.7237368229734642e-05, |
| "loss": 0.02, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4249378596479281, |
| "grad_norm": 0.27912572026252747, |
| "learning_rate": 1.716466739367503e-05, |
| "loss": 0.033, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.43583370220300316, |
| "grad_norm": 0.40194639563560486, |
| "learning_rate": 1.7091966557615415e-05, |
| "loss": 0.0297, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.44672954475807825, |
| "grad_norm": 0.4923015832901001, |
| "learning_rate": 1.70192657215558e-05, |
| "loss": 0.0473, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.45762538731315333, |
| "grad_norm": 0.4864579439163208, |
| "learning_rate": 1.6946564885496184e-05, |
| "loss": 0.0335, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4685212298682284, |
| "grad_norm": 0.0577218122780323, |
| "learning_rate": 1.687386404943657e-05, |
| "loss": 0.0267, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4794170724233035, |
| "grad_norm": 0.026588434353470802, |
| "learning_rate": 1.6801163213376954e-05, |
| "loss": 0.0242, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4903129149783786, |
| "grad_norm": 1.106031060218811, |
| "learning_rate": 1.6728462377317342e-05, |
| "loss": 0.0412, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5012087575334536, |
| "grad_norm": 2.185438394546509, |
| "learning_rate": 1.6655761541257727e-05, |
| "loss": 0.0168, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5121046000885288, |
| "grad_norm": 0.2645202577114105, |
| "learning_rate": 1.658306070519811e-05, |
| "loss": 0.0225, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5230004426436038, |
| "grad_norm": 0.26281026005744934, |
| "learning_rate": 1.6510359869138496e-05, |
| "loss": 0.0225, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5338962851986789, |
| "grad_norm": 0.09611400961875916, |
| "learning_rate": 1.643765903307888e-05, |
| "loss": 0.0204, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.544792127753754, |
| "grad_norm": 0.2964985966682434, |
| "learning_rate": 1.6364958197019266e-05, |
| "loss": 0.0192, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.555687970308829, |
| "grad_norm": 3.2991862297058105, |
| "learning_rate": 1.629225736095965e-05, |
| "loss": 0.0395, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5665838128639041, |
| "grad_norm": 0.9299785494804382, |
| "learning_rate": 1.621955652490004e-05, |
| "loss": 0.0213, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5774796554189792, |
| "grad_norm": 1.7656854391098022, |
| "learning_rate": 1.6146855688840423e-05, |
| "loss": 0.0293, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5883754979740543, |
| "grad_norm": 0.052940454334020615, |
| "learning_rate": 1.6074154852780808e-05, |
| "loss": 0.0349, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5992713405291293, |
| "grad_norm": 0.6700181365013123, |
| "learning_rate": 1.6001454016721193e-05, |
| "loss": 0.0098, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6101671830842045, |
| "grad_norm": 1.4992352724075317, |
| "learning_rate": 1.5928753180661577e-05, |
| "loss": 0.0209, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6210630256392795, |
| "grad_norm": 0.6882705688476562, |
| "learning_rate": 1.5856052344601966e-05, |
| "loss": 0.0208, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6319588681943545, |
| "grad_norm": 0.35566991567611694, |
| "learning_rate": 1.578335150854235e-05, |
| "loss": 0.0157, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6428547107494297, |
| "grad_norm": 0.1365765929222107, |
| "learning_rate": 1.5710650672482735e-05, |
| "loss": 0.0207, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6537505533045047, |
| "grad_norm": 0.010805984027683735, |
| "learning_rate": 1.563794983642312e-05, |
| "loss": 0.0386, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6646463958595799, |
| "grad_norm": 0.33677366375923157, |
| "learning_rate": 1.5565249000363505e-05, |
| "loss": 0.0178, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6755422384146549, |
| "grad_norm": 0.023768046870827675, |
| "learning_rate": 1.5492548164303893e-05, |
| "loss": 0.0115, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.68643808096973, |
| "grad_norm": 1.271041989326477, |
| "learning_rate": 1.5419847328244274e-05, |
| "loss": 0.0335, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.697333923524805, |
| "grad_norm": 0.39303043484687805, |
| "learning_rate": 1.5347146492184662e-05, |
| "loss": 0.0456, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7082297660798802, |
| "grad_norm": 1.5450124740600586, |
| "learning_rate": 1.5274445656125047e-05, |
| "loss": 0.0206, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7191256086349552, |
| "grad_norm": 0.12599903345108032, |
| "learning_rate": 1.5201744820065432e-05, |
| "loss": 0.0125, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7300214511900303, |
| "grad_norm": 0.03158240765333176, |
| "learning_rate": 1.5129043984005818e-05, |
| "loss": 0.0019, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7409172937451054, |
| "grad_norm": 1.2820944786071777, |
| "learning_rate": 1.5056343147946201e-05, |
| "loss": 0.0132, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7518131363001804, |
| "grad_norm": 0.4018807113170624, |
| "learning_rate": 1.4983642311886588e-05, |
| "loss": 0.0274, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7627089788552556, |
| "grad_norm": 0.7147946953773499, |
| "learning_rate": 1.4910941475826972e-05, |
| "loss": 0.0207, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7736048214103306, |
| "grad_norm": 1.3514039516448975, |
| "learning_rate": 1.4838240639767359e-05, |
| "loss": 0.0088, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7845006639654057, |
| "grad_norm": 0.10958287864923477, |
| "learning_rate": 1.4765539803707745e-05, |
| "loss": 0.0054, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7953965065204808, |
| "grad_norm": 0.12291970103979111, |
| "learning_rate": 1.4692838967648128e-05, |
| "loss": 0.0154, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8062923490755558, |
| "grad_norm": 0.056142911314964294, |
| "learning_rate": 1.4620138131588515e-05, |
| "loss": 0.0214, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.817188191630631, |
| "grad_norm": 0.08367596566677094, |
| "learning_rate": 1.45474372955289e-05, |
| "loss": 0.0074, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.828084034185706, |
| "grad_norm": 0.8847033381462097, |
| "learning_rate": 1.4474736459469286e-05, |
| "loss": 0.052, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8389798767407811, |
| "grad_norm": 0.23346182703971863, |
| "learning_rate": 1.4402035623409672e-05, |
| "loss": 0.0238, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8498757192958561, |
| "grad_norm": 0.7445326447486877, |
| "learning_rate": 1.4329334787350055e-05, |
| "loss": 0.0179, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8607715618509313, |
| "grad_norm": 1.623715877532959, |
| "learning_rate": 1.4256633951290442e-05, |
| "loss": 0.0138, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8716674044060063, |
| "grad_norm": 0.12205464392900467, |
| "learning_rate": 1.4183933115230826e-05, |
| "loss": 0.0182, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8825632469610815, |
| "grad_norm": 0.015034107491374016, |
| "learning_rate": 1.4111232279171211e-05, |
| "loss": 0.0192, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8934590895161565, |
| "grad_norm": 1.1116948127746582, |
| "learning_rate": 1.4038531443111596e-05, |
| "loss": 0.0329, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9043549320712315, |
| "grad_norm": 0.35468608140945435, |
| "learning_rate": 1.3965830607051982e-05, |
| "loss": 0.0299, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9152507746263067, |
| "grad_norm": 1.3069281578063965, |
| "learning_rate": 1.3893129770992369e-05, |
| "loss": 0.028, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9261466171813817, |
| "grad_norm": 0.6548961997032166, |
| "learning_rate": 1.3820428934932752e-05, |
| "loss": 0.0125, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9370424597364568, |
| "grad_norm": 0.016538333147764206, |
| "learning_rate": 1.3747728098873138e-05, |
| "loss": 0.0097, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9479383022915319, |
| "grad_norm": 0.7220777273178101, |
| "learning_rate": 1.3675027262813523e-05, |
| "loss": 0.0281, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.958834144846607, |
| "grad_norm": 7.228305339813232, |
| "learning_rate": 1.360232642675391e-05, |
| "loss": 0.0095, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.969729987401682, |
| "grad_norm": 0.31951704621315, |
| "learning_rate": 1.3529625590694292e-05, |
| "loss": 0.0148, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9806258299567572, |
| "grad_norm": 0.009546870365738869, |
| "learning_rate": 1.3456924754634679e-05, |
| "loss": 0.0051, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9915216725118322, |
| "grad_norm": 2.050363063812256, |
| "learning_rate": 1.3384223918575065e-05, |
| "loss": 0.0306, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0032687527665225, |
| "grad_norm": 1.1950825452804565, |
| "learning_rate": 1.331152308251545e-05, |
| "loss": 0.0061, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0141645953215976, |
| "grad_norm": 0.02007538639008999, |
| "learning_rate": 1.3238822246455837e-05, |
| "loss": 0.005, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0250604378766728, |
| "grad_norm": 0.053643591701984406, |
| "learning_rate": 1.316612141039622e-05, |
| "loss": 0.0093, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0359562804317477, |
| "grad_norm": 0.13197128474712372, |
| "learning_rate": 1.3093420574336606e-05, |
| "loss": 0.0123, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0468521229868228, |
| "grad_norm": 0.20932506024837494, |
| "learning_rate": 1.3020719738276992e-05, |
| "loss": 0.0267, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.057747965541898, |
| "grad_norm": 0.11939968913793564, |
| "learning_rate": 1.2948018902217377e-05, |
| "loss": 0.0042, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.068643808096973, |
| "grad_norm": 0.08671363443136215, |
| "learning_rate": 1.2875318066157762e-05, |
| "loss": 0.009, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.079539650652048, |
| "grad_norm": 0.025082537904381752, |
| "learning_rate": 1.2802617230098147e-05, |
| "loss": 0.0028, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.0904354932071232, |
| "grad_norm": 0.005358474794775248, |
| "learning_rate": 1.2729916394038533e-05, |
| "loss": 0.0017, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1013313357621983, |
| "grad_norm": 0.008662994019687176, |
| "learning_rate": 1.2657215557978916e-05, |
| "loss": 0.0013, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1122271783172732, |
| "grad_norm": 2.0191564559936523, |
| "learning_rate": 1.2584514721919303e-05, |
| "loss": 0.0179, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.1231230208723484, |
| "grad_norm": 0.025384988635778427, |
| "learning_rate": 1.2511813885859689e-05, |
| "loss": 0.02, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1340188634274235, |
| "grad_norm": 0.011868833564221859, |
| "learning_rate": 1.2439113049800074e-05, |
| "loss": 0.0024, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1449147059824987, |
| "grad_norm": 0.010154581628739834, |
| "learning_rate": 1.236641221374046e-05, |
| "loss": 0.0053, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1558105485375736, |
| "grad_norm": 0.09402716159820557, |
| "learning_rate": 1.2293711377680843e-05, |
| "loss": 0.005, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1667063910926487, |
| "grad_norm": 0.3972262442111969, |
| "learning_rate": 1.222101054162123e-05, |
| "loss": 0.0065, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1776022336477239, |
| "grad_norm": 0.02627560682594776, |
| "learning_rate": 1.2148309705561614e-05, |
| "loss": 0.0192, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1884980762027988, |
| "grad_norm": 0.538215160369873, |
| "learning_rate": 1.2075608869502e-05, |
| "loss": 0.0073, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.199393918757874, |
| "grad_norm": 0.48226070404052734, |
| "learning_rate": 1.2002908033442387e-05, |
| "loss": 0.0009, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.210289761312949, |
| "grad_norm": 0.5596455335617065, |
| "learning_rate": 1.193020719738277e-05, |
| "loss": 0.0119, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2211856038680242, |
| "grad_norm": 0.03299971669912338, |
| "learning_rate": 1.1857506361323157e-05, |
| "loss": 0.0025, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.2320814464230991, |
| "grad_norm": 0.03791365772485733, |
| "learning_rate": 1.1784805525263541e-05, |
| "loss": 0.0147, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2429772889781743, |
| "grad_norm": 0.6537386178970337, |
| "learning_rate": 1.1712104689203926e-05, |
| "loss": 0.0026, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2538731315332494, |
| "grad_norm": 0.02327698841691017, |
| "learning_rate": 1.1639403853144313e-05, |
| "loss": 0.0012, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2647689740883243, |
| "grad_norm": 0.024980690330266953, |
| "learning_rate": 1.1566703017084697e-05, |
| "loss": 0.0053, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2756648166433995, |
| "grad_norm": 0.01306835189461708, |
| "learning_rate": 1.1494002181025084e-05, |
| "loss": 0.0179, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2865606591984746, |
| "grad_norm": 0.005500817205756903, |
| "learning_rate": 1.1421301344965467e-05, |
| "loss": 0.0117, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2974565017535498, |
| "grad_norm": 2.294457197189331, |
| "learning_rate": 1.1348600508905853e-05, |
| "loss": 0.0065, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3083523443086247, |
| "grad_norm": 3.2596099376678467, |
| "learning_rate": 1.1275899672846238e-05, |
| "loss": 0.0128, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3192481868636998, |
| "grad_norm": 0.014325232245028019, |
| "learning_rate": 1.1203198836786624e-05, |
| "loss": 0.004, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.330144029418775, |
| "grad_norm": 0.08742561936378479, |
| "learning_rate": 1.1130498000727011e-05, |
| "loss": 0.005, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.3410398719738499, |
| "grad_norm": 0.06310788542032242, |
| "learning_rate": 1.1057797164667394e-05, |
| "loss": 0.0062, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.351935714528925, |
| "grad_norm": 0.02661961503326893, |
| "learning_rate": 1.098509632860778e-05, |
| "loss": 0.001, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3628315570840002, |
| "grad_norm": 0.008728576824069023, |
| "learning_rate": 1.0912395492548165e-05, |
| "loss": 0.0065, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3737273996390753, |
| "grad_norm": 0.40287479758262634, |
| "learning_rate": 1.0839694656488552e-05, |
| "loss": 0.0115, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.3846232421941502, |
| "grad_norm": 0.0008290009573101997, |
| "learning_rate": 1.0766993820428935e-05, |
| "loss": 0.0023, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3955190847492254, |
| "grad_norm": 0.20154079794883728, |
| "learning_rate": 1.0694292984369321e-05, |
| "loss": 0.004, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4064149273043005, |
| "grad_norm": 0.032378897070884705, |
| "learning_rate": 1.0621592148309707e-05, |
| "loss": 0.0103, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.4173107698593754, |
| "grad_norm": 0.037077393382787704, |
| "learning_rate": 1.0548891312250092e-05, |
| "loss": 0.0048, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4282066124144506, |
| "grad_norm": 0.0009527279180474579, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 0.0197, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4391024549695257, |
| "grad_norm": 0.6460732221603394, |
| "learning_rate": 1.0403489640130862e-05, |
| "loss": 0.0085, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4499982975246009, |
| "grad_norm": 0.18065184354782104, |
| "learning_rate": 1.0330788804071248e-05, |
| "loss": 0.0021, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4608941400796758, |
| "grad_norm": 0.08325136452913284, |
| "learning_rate": 1.0258087968011631e-05, |
| "loss": 0.0079, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.471789982634751, |
| "grad_norm": 0.0035695817787200212, |
| "learning_rate": 1.0185387131952018e-05, |
| "loss": 0.0001, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.482685825189826, |
| "grad_norm": 0.00448552705347538, |
| "learning_rate": 1.0112686295892404e-05, |
| "loss": 0.0004, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.493581667744901, |
| "grad_norm": 0.027783585712313652, |
| "learning_rate": 1.0039985459832789e-05, |
| "loss": 0.011, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.5044775102999761, |
| "grad_norm": 2.4403154850006104, |
| "learning_rate": 9.967284623773175e-06, |
| "loss": 0.0162, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5153733528550513, |
| "grad_norm": 0.031121332198381424, |
| "learning_rate": 9.89458378771356e-06, |
| "loss": 0.0019, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5262691954101264, |
| "grad_norm": 0.01372817624360323, |
| "learning_rate": 9.821882951653945e-06, |
| "loss": 0.0107, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5371650379652015, |
| "grad_norm": 0.015296364203095436, |
| "learning_rate": 9.74918211559433e-06, |
| "loss": 0.0107, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.5480608805202765, |
| "grad_norm": 0.022742554545402527, |
| "learning_rate": 9.676481279534716e-06, |
| "loss": 0.0055, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.5589567230753516, |
| "grad_norm": 0.005425534211099148, |
| "learning_rate": 9.6037804434751e-06, |
| "loss": 0.001, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5698525656304265, |
| "grad_norm": 0.0004977713688276708, |
| "learning_rate": 9.531079607415487e-06, |
| "loss": 0.0015, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5807484081855017, |
| "grad_norm": 0.016388392075896263, |
| "learning_rate": 9.458378771355872e-06, |
| "loss": 0.0213, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5916442507405768, |
| "grad_norm": 0.029239172115921974, |
| "learning_rate": 9.385677935296256e-06, |
| "loss": 0.0032, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.602540093295652, |
| "grad_norm": 0.25184109807014465, |
| "learning_rate": 9.312977099236641e-06, |
| "loss": 0.0139, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.613435935850727, |
| "grad_norm": 0.5452978014945984, |
| "learning_rate": 9.240276263177028e-06, |
| "loss": 0.001, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.624331778405802, |
| "grad_norm": 0.00713045010343194, |
| "learning_rate": 9.167575427117412e-06, |
| "loss": 0.0068, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6352276209608771, |
| "grad_norm": 0.04856117442250252, |
| "learning_rate": 9.094874591057799e-06, |
| "loss": 0.013, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.646123463515952, |
| "grad_norm": 0.6631866693496704, |
| "learning_rate": 9.022173754998184e-06, |
| "loss": 0.0118, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6570193060710272, |
| "grad_norm": 0.34849047660827637, |
| "learning_rate": 8.949472918938568e-06, |
| "loss": 0.004, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.6679151486261024, |
| "grad_norm": 0.011874212883412838, |
| "learning_rate": 8.876772082878955e-06, |
| "loss": 0.002, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6788109911811775, |
| "grad_norm": 0.05654163286089897, |
| "learning_rate": 8.80407124681934e-06, |
| "loss": 0.0033, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6897068337362526, |
| "grad_norm": 0.05505364388227463, |
| "learning_rate": 8.731370410759724e-06, |
| "loss": 0.0016, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7006026762913276, |
| "grad_norm": 0.8052054047584534, |
| "learning_rate": 8.658669574700109e-06, |
| "loss": 0.0033, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.7114985188464027, |
| "grad_norm": 0.001815033028833568, |
| "learning_rate": 8.585968738640495e-06, |
| "loss": 0.0026, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.7223943614014776, |
| "grad_norm": 0.17480531334877014, |
| "learning_rate": 8.51326790258088e-06, |
| "loss": 0.0064, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.7332902039565528, |
| "grad_norm": 0.005486777517944574, |
| "learning_rate": 8.440567066521266e-06, |
| "loss": 0.0208, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.744186046511628, |
| "grad_norm": 0.10310015082359314, |
| "learning_rate": 8.367866230461651e-06, |
| "loss": 0.0005, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.755081889066703, |
| "grad_norm": 0.008104170672595501, |
| "learning_rate": 8.295165394402036e-06, |
| "loss": 0.0087, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7659777316217782, |
| "grad_norm": 0.033456411212682724, |
| "learning_rate": 8.22246455834242e-06, |
| "loss": 0.0072, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.776873574176853, |
| "grad_norm": 0.007005383726209402, |
| "learning_rate": 8.149763722282807e-06, |
| "loss": 0.014, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.7877694167319282, |
| "grad_norm": 0.012260228395462036, |
| "learning_rate": 8.077062886223192e-06, |
| "loss": 0.0008, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.7986652592870032, |
| "grad_norm": 0.0009957356378436089, |
| "learning_rate": 8.004362050163578e-06, |
| "loss": 0.0014, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8095611018420783, |
| "grad_norm": 0.005955096334218979, |
| "learning_rate": 7.931661214103963e-06, |
| "loss": 0.0005, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.8204569443971534, |
| "grad_norm": 0.0004700123390648514, |
| "learning_rate": 7.858960378044348e-06, |
| "loss": 0.0028, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8313527869522286, |
| "grad_norm": 0.002416003029793501, |
| "learning_rate": 7.786259541984733e-06, |
| "loss": 0.0003, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.8422486295073037, |
| "grad_norm": 0.028112288564443588, |
| "learning_rate": 7.713558705925119e-06, |
| "loss": 0.0318, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.8531444720623786, |
| "grad_norm": 0.03914355859160423, |
| "learning_rate": 7.640857869865504e-06, |
| "loss": 0.0139, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8640403146174538, |
| "grad_norm": 4.869634628295898, |
| "learning_rate": 7.568157033805889e-06, |
| "loss": 0.0098, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8749361571725287, |
| "grad_norm": 1.1335488557815552, |
| "learning_rate": 7.495456197746275e-06, |
| "loss": 0.0174, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8858319997276038, |
| "grad_norm": 0.6747786402702332, |
| "learning_rate": 7.42275536168666e-06, |
| "loss": 0.0044, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.896727842282679, |
| "grad_norm": 0.9970724582672119, |
| "learning_rate": 7.350054525627045e-06, |
| "loss": 0.0087, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9076236848377541, |
| "grad_norm": 0.16893063485622406, |
| "learning_rate": 7.27735368956743e-06, |
| "loss": 0.0032, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9185195273928293, |
| "grad_norm": 0.8119887709617615, |
| "learning_rate": 7.204652853507816e-06, |
| "loss": 0.0153, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.9294153699479044, |
| "grad_norm": 0.006383243482559919, |
| "learning_rate": 7.131952017448202e-06, |
| "loss": 0.0034, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9403112125029793, |
| "grad_norm": 0.03637854754924774, |
| "learning_rate": 7.059251181388587e-06, |
| "loss": 0.0034, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.9512070550580543, |
| "grad_norm": 0.04712774232029915, |
| "learning_rate": 6.9865503453289714e-06, |
| "loss": 0.0234, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.9621028976131294, |
| "grad_norm": 6.268856525421143, |
| "learning_rate": 6.913849509269357e-06, |
| "loss": 0.0265, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9729987401682045, |
| "grad_norm": 0.6448054313659668, |
| "learning_rate": 6.841148673209742e-06, |
| "loss": 0.0057, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9838945827232797, |
| "grad_norm": 0.07000619918107986, |
| "learning_rate": 6.768447837150128e-06, |
| "loss": 0.0005, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.9947904252783548, |
| "grad_norm": 0.012424224987626076, |
| "learning_rate": 6.695747001090514e-06, |
| "loss": 0.0039, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.006537505533045, |
| "grad_norm": 0.08453727513551712, |
| "learning_rate": 6.6230461650308985e-06, |
| "loss": 0.0006, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.01743334808812, |
| "grad_norm": 0.0390053391456604, |
| "learning_rate": 6.550345328971284e-06, |
| "loss": 0.0006, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0283291906431953, |
| "grad_norm": 0.013394408859312534, |
| "learning_rate": 6.477644492911669e-06, |
| "loss": 0.0049, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.0392250331982704, |
| "grad_norm": 0.0027593837585300207, |
| "learning_rate": 6.404943656852054e-06, |
| "loss": 0.0008, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.0501208757533456, |
| "grad_norm": 0.0010020197369158268, |
| "learning_rate": 6.332242820792439e-06, |
| "loss": 0.0023, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.0610167183084203, |
| "grad_norm": 0.0010899041080847383, |
| "learning_rate": 6.259541984732826e-06, |
| "loss": 0.0005, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.0719125608634954, |
| "grad_norm": 0.03333039954304695, |
| "learning_rate": 6.18684114867321e-06, |
| "loss": 0.0011, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0828084034185705, |
| "grad_norm": 0.002606542780995369, |
| "learning_rate": 6.114140312613596e-06, |
| "loss": 0.0062, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.0937042459736457, |
| "grad_norm": 0.008523502387106419, |
| "learning_rate": 6.041439476553981e-06, |
| "loss": 0.0001, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.104600088528721, |
| "grad_norm": 0.005313311703503132, |
| "learning_rate": 5.968738640494366e-06, |
| "loss": 0.0095, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.115495931083796, |
| "grad_norm": 0.030115563422441483, |
| "learning_rate": 5.896037804434751e-06, |
| "loss": 0.0011, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.126391773638871, |
| "grad_norm": 0.001531143207103014, |
| "learning_rate": 5.823336968375137e-06, |
| "loss": 0.0047, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.137287616193946, |
| "grad_norm": 0.013100974261760712, |
| "learning_rate": 5.750636132315522e-06, |
| "loss": 0.0041, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.148183458749021, |
| "grad_norm": 0.010219580493867397, |
| "learning_rate": 5.677935296255908e-06, |
| "loss": 0.0012, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.159079301304096, |
| "grad_norm": 0.02304321527481079, |
| "learning_rate": 5.6052344601962925e-06, |
| "loss": 0.0006, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.1699751438591712, |
| "grad_norm": 0.32716256380081177, |
| "learning_rate": 5.532533624136678e-06, |
| "loss": 0.0005, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.1808709864142464, |
| "grad_norm": 0.003199178259819746, |
| "learning_rate": 5.459832788077063e-06, |
| "loss": 0.0002, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1917668289693215, |
| "grad_norm": 0.10407451540231705, |
| "learning_rate": 5.387131952017448e-06, |
| "loss": 0.0026, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.2026626715243967, |
| "grad_norm": 0.0036433066707104445, |
| "learning_rate": 5.314431115957834e-06, |
| "loss": 0.0053, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.2135585140794714, |
| "grad_norm": 0.22139491140842438, |
| "learning_rate": 5.2417302798982195e-06, |
| "loss": 0.0013, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.2244543566345465, |
| "grad_norm": 0.00901265349239111, |
| "learning_rate": 5.169029443838604e-06, |
| "loss": 0.0004, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.2353501991896216, |
| "grad_norm": 0.007596256677061319, |
| "learning_rate": 5.09632860777899e-06, |
| "loss": 0.0002, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.2462460417446968, |
| "grad_norm": 0.05308268591761589, |
| "learning_rate": 5.023627771719375e-06, |
| "loss": 0.0001, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.257141884299772, |
| "grad_norm": 0.005023419391363859, |
| "learning_rate": 4.95092693565976e-06, |
| "loss": 0.0001, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.268037726854847, |
| "grad_norm": 0.09251435101032257, |
| "learning_rate": 4.878226099600146e-06, |
| "loss": 0.0008, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.278933569409922, |
| "grad_norm": 0.0035660325083881617, |
| "learning_rate": 4.8055252635405305e-06, |
| "loss": 0.0029, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.2898294119649973, |
| "grad_norm": 0.00022365724726114422, |
| "learning_rate": 4.732824427480917e-06, |
| "loss": 0.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.300725254520072, |
| "grad_norm": 0.28966161608695984, |
| "learning_rate": 4.660123591421302e-06, |
| "loss": 0.0004, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.311621097075147, |
| "grad_norm": 0.000494773150421679, |
| "learning_rate": 4.5874227553616864e-06, |
| "loss": 0.0003, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.3225169396302223, |
| "grad_norm": 0.2110077142715454, |
| "learning_rate": 4.514721919302073e-06, |
| "loss": 0.0007, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.3334127821852975, |
| "grad_norm": 0.0006416022079065442, |
| "learning_rate": 4.442021083242458e-06, |
| "loss": 0.0006, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.3443086247403726, |
| "grad_norm": 0.0005581114673987031, |
| "learning_rate": 4.369320247182842e-06, |
| "loss": 0.0004, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.3552044672954477, |
| "grad_norm": 0.0006430571665987372, |
| "learning_rate": 4.296619411123229e-06, |
| "loss": 0.0013, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.3661003098505224, |
| "grad_norm": 0.0002313524018973112, |
| "learning_rate": 4.2239185750636135e-06, |
| "loss": 0.0011, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.3769961524055976, |
| "grad_norm": 0.01299639604985714, |
| "learning_rate": 4.151217739003999e-06, |
| "loss": 0.0002, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.3878919949606727, |
| "grad_norm": 0.036279868334531784, |
| "learning_rate": 4.078516902944385e-06, |
| "loss": 0.0, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.398787837515748, |
| "grad_norm": 0.0004496763285715133, |
| "learning_rate": 4.005816066884769e-06, |
| "loss": 0.0, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.409683680070823, |
| "grad_norm": 0.010034661740064621, |
| "learning_rate": 3.933115230825155e-06, |
| "loss": 0.0, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.420579522625898, |
| "grad_norm": 0.0027114665135741234, |
| "learning_rate": 3.860414394765541e-06, |
| "loss": 0.0, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.4314753651809733, |
| "grad_norm": 0.00021306249254848808, |
| "learning_rate": 3.7877135587059253e-06, |
| "loss": 0.0, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.4423712077360484, |
| "grad_norm": 0.002327492693439126, |
| "learning_rate": 3.7150127226463105e-06, |
| "loss": 0.0, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.453267050291123, |
| "grad_norm": 0.0042752730660140514, |
| "learning_rate": 3.6423118865866965e-06, |
| "loss": 0.0001, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.4641628928461983, |
| "grad_norm": 0.5819891691207886, |
| "learning_rate": 3.5696110505270817e-06, |
| "loss": 0.0014, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.4750587354012734, |
| "grad_norm": 0.0002232871629530564, |
| "learning_rate": 3.4969102144674664e-06, |
| "loss": 0.0, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.4859545779563486, |
| "grad_norm": 0.0006547856028191745, |
| "learning_rate": 3.4242093784078516e-06, |
| "loss": 0.0, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.4968504205114237, |
| "grad_norm": 0.007096582092344761, |
| "learning_rate": 3.3515085423482376e-06, |
| "loss": 0.0, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.507746263066499, |
| "grad_norm": 0.007319641765207052, |
| "learning_rate": 3.2788077062886227e-06, |
| "loss": 0.0, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.5186421056215735, |
| "grad_norm": 0.00013177268556319177, |
| "learning_rate": 3.206106870229008e-06, |
| "loss": 0.0, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.5295379481766487, |
| "grad_norm": 0.001638653688132763, |
| "learning_rate": 3.1334060341693935e-06, |
| "loss": 0.0002, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.540433790731724, |
| "grad_norm": 0.00048312891158275306, |
| "learning_rate": 3.0607051981097786e-06, |
| "loss": 0.0, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.551329633286799, |
| "grad_norm": 0.001063148258253932, |
| "learning_rate": 2.988004362050164e-06, |
| "loss": 0.0001, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.562225475841874, |
| "grad_norm": 0.005976190324872732, |
| "learning_rate": 2.9153035259905494e-06, |
| "loss": 0.0, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.5731213183969492, |
| "grad_norm": 0.001030449871905148, |
| "learning_rate": 2.8426026899309345e-06, |
| "loss": 0.0001, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.5840171609520244, |
| "grad_norm": 0.000677391595672816, |
| "learning_rate": 2.7699018538713197e-06, |
| "loss": 0.0016, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.5949130035070995, |
| "grad_norm": 1.1224867105484009, |
| "learning_rate": 2.6972010178117053e-06, |
| "loss": 0.0036, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.6058088460621747, |
| "grad_norm": 0.0026874279137700796, |
| "learning_rate": 2.6245001817520905e-06, |
| "loss": 0.0, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.6167046886172494, |
| "grad_norm": 0.003862058976665139, |
| "learning_rate": 2.5517993456924756e-06, |
| "loss": 0.0001, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.6276005311723245, |
| "grad_norm": 0.0830313041806221, |
| "learning_rate": 2.4790985096328608e-06, |
| "loss": 0.0014, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.6384963737273996, |
| "grad_norm": 0.0019621718674898148, |
| "learning_rate": 2.4063976735732464e-06, |
| "loss": 0.0005, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.649392216282475, |
| "grad_norm": 0.28306806087493896, |
| "learning_rate": 2.3336968375136315e-06, |
| "loss": 0.0002, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.66028805883755, |
| "grad_norm": 0.004503046162426472, |
| "learning_rate": 2.260996001454017e-06, |
| "loss": 0.0, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.6711839013926246, |
| "grad_norm": 0.0008729721885174513, |
| "learning_rate": 2.1882951653944023e-06, |
| "loss": 0.0008, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.6820797439476998, |
| "grad_norm": 0.010283468291163445, |
| "learning_rate": 2.1155943293347874e-06, |
| "loss": 0.0, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.692975586502775, |
| "grad_norm": 1.8014414308709092e-05, |
| "learning_rate": 2.042893493275173e-06, |
| "loss": 0.0012, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.70387142905785, |
| "grad_norm": 0.0013227862073108554, |
| "learning_rate": 1.970192657215558e-06, |
| "loss": 0.0001, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.714767271612925, |
| "grad_norm": 9.750492608873174e-05, |
| "learning_rate": 1.8974918211559433e-06, |
| "loss": 0.0012, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.7256631141680003, |
| "grad_norm": 0.009569020941853523, |
| "learning_rate": 1.824790985096329e-06, |
| "loss": 0.0001, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.7365589567230755, |
| "grad_norm": 0.00015347945736721158, |
| "learning_rate": 1.752090149036714e-06, |
| "loss": 0.0001, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.7474547992781506, |
| "grad_norm": 0.0024864268489181995, |
| "learning_rate": 1.6793893129770995e-06, |
| "loss": 0.0002, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.7583506418332258, |
| "grad_norm": 0.0018065335461869836, |
| "learning_rate": 1.6066884769174848e-06, |
| "loss": 0.0, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.7692464843883005, |
| "grad_norm": 0.000252872530836612, |
| "learning_rate": 1.53398764085787e-06, |
| "loss": 0.0002, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.7801423269433756, |
| "grad_norm": 0.0006220173672772944, |
| "learning_rate": 1.4612868047982554e-06, |
| "loss": 0.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.7910381694984507, |
| "grad_norm": 0.00021657197794411331, |
| "learning_rate": 1.3885859687386405e-06, |
| "loss": 0.002, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.801934012053526, |
| "grad_norm": 0.062267255038022995, |
| "learning_rate": 1.315885132679026e-06, |
| "loss": 0.0001, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.812829854608601, |
| "grad_norm": 0.00383751024492085, |
| "learning_rate": 1.2431842966194113e-06, |
| "loss": 0.0002, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.8237256971636757, |
| "grad_norm": 9.788275929167867e-05, |
| "learning_rate": 1.1704834605597967e-06, |
| "loss": 0.0006, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.834621539718751, |
| "grad_norm": 0.0013275217497721314, |
| "learning_rate": 1.0977826245001818e-06, |
| "loss": 0.0002, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.845517382273826, |
| "grad_norm": 0.0015028759371489286, |
| "learning_rate": 1.0250817884405672e-06, |
| "loss": 0.0, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.856413224828901, |
| "grad_norm": 0.00014119225670583546, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 0.0, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.8673090673839763, |
| "grad_norm": 0.007295021787285805, |
| "learning_rate": 8.796801163213378e-07, |
| "loss": 0.0, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.8782049099390514, |
| "grad_norm": 2.5996017939178273e-05, |
| "learning_rate": 8.069792802617231e-07, |
| "loss": 0.0001, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.8891007524941266, |
| "grad_norm": 0.00027592500555329025, |
| "learning_rate": 7.342784442021084e-07, |
| "loss": 0.0001, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.8999965950492017, |
| "grad_norm": 0.0033551298547536135, |
| "learning_rate": 6.615776081424936e-07, |
| "loss": 0.0, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.910892437604277, |
| "grad_norm": 0.0005961539573036134, |
| "learning_rate": 5.88876772082879e-07, |
| "loss": 0.0, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.9217882801593515, |
| "grad_norm": 0.0015423846198245883, |
| "learning_rate": 5.161759360232643e-07, |
| "loss": 0.0003, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.9326841227144267, |
| "grad_norm": 0.000448063132353127, |
| "learning_rate": 4.434750999636496e-07, |
| "loss": 0.0031, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.943579965269502, |
| "grad_norm": 0.003001452423632145, |
| "learning_rate": 3.7077426390403497e-07, |
| "loss": 0.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.954475807824577, |
| "grad_norm": 4.6965491492301226e-05, |
| "learning_rate": 2.9807342784442023e-07, |
| "loss": 0.0001, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.965371650379652, |
| "grad_norm": 0.00013006600784137845, |
| "learning_rate": 2.2537259178480555e-07, |
| "loss": 0.001, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.9762674929347273, |
| "grad_norm": 0.006912072654813528, |
| "learning_rate": 1.5267175572519085e-07, |
| "loss": 0.0, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.987163335489802, |
| "grad_norm": 0.0006019837455824018, |
| "learning_rate": 7.997091966557616e-08, |
| "loss": 0.0, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.998059178044877, |
| "grad_norm": 0.006343195680528879, |
| "learning_rate": 7.2700836059614684e-09, |
| "loss": 0.0005, |
| "step": 2750 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2751, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1303954889740124e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|