| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997582789460963, |
| "eval_steps": 500, |
| "global_step": 517, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0019337684312303602, |
| "grad_norm": 18.9990177154541, |
| "learning_rate": 0.0, |
| "loss": 1.0504, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0038675368624607204, |
| "grad_norm": 18.651086807250977, |
| "learning_rate": 1.25e-07, |
| "loss": 1.0819, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005801305293691081, |
| "grad_norm": 17.61154556274414, |
| "learning_rate": 2.5e-07, |
| "loss": 1.0378, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.007735073724921441, |
| "grad_norm": 18.019412994384766, |
| "learning_rate": 3.75e-07, |
| "loss": 1.0519, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0096688421561518, |
| "grad_norm": 18.217206954956055, |
| "learning_rate": 5e-07, |
| "loss": 1.0353, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011602610587382161, |
| "grad_norm": 17.47523307800293, |
| "learning_rate": 6.249999999999999e-07, |
| "loss": 1.0829, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01353637901861252, |
| "grad_norm": 18.857463836669922, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0694, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.015470147449842882, |
| "grad_norm": 17.953575134277344, |
| "learning_rate": 8.75e-07, |
| "loss": 1.0873, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01740391588107324, |
| "grad_norm": 16.11162757873535, |
| "learning_rate": 1e-06, |
| "loss": 0.9523, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0193376843123036, |
| "grad_norm": 14.633333206176758, |
| "learning_rate": 1.125e-06, |
| "loss": 1.0021, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021271452743533963, |
| "grad_norm": 12.60569953918457, |
| "learning_rate": 1.2499999999999999e-06, |
| "loss": 0.9455, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.023205221174764323, |
| "grad_norm": 12.926105499267578, |
| "learning_rate": 1.375e-06, |
| "loss": 0.9779, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.025138989605994682, |
| "grad_norm": 8.501869201660156, |
| "learning_rate": 1.5e-06, |
| "loss": 0.852, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02707275803722504, |
| "grad_norm": 8.153008460998535, |
| "learning_rate": 1.625e-06, |
| "loss": 0.9161, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.029006526468455404, |
| "grad_norm": 8.377603530883789, |
| "learning_rate": 1.75e-06, |
| "loss": 0.9677, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.030940294899685764, |
| "grad_norm": 8.171313285827637, |
| "learning_rate": 1.8749999999999998e-06, |
| "loss": 0.9456, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03287406333091612, |
| "grad_norm": 7.480187892913818, |
| "learning_rate": 2e-06, |
| "loss": 0.8556, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03480783176214648, |
| "grad_norm": 6.254351615905762, |
| "learning_rate": 1.9999803395762148e-06, |
| "loss": 0.8907, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03674160019337684, |
| "grad_norm": 7.525010585784912, |
| "learning_rate": 1.9999213590779243e-06, |
| "loss": 0.8208, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0386753686246072, |
| "grad_norm": 8.508758544921875, |
| "learning_rate": 1.9998230608242915e-06, |
| "loss": 0.8717, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04060913705583756, |
| "grad_norm": 8.445762634277344, |
| "learning_rate": 1.999685448680487e-06, |
| "loss": 0.8676, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04254290548706793, |
| "grad_norm": 7.846978187561035, |
| "learning_rate": 1.999508528057537e-06, |
| "loss": 0.8169, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.044476673918298286, |
| "grad_norm": 7.429245948791504, |
| "learning_rate": 1.9992923059121105e-06, |
| "loss": 0.7503, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.046410442349528645, |
| "grad_norm": 6.933453559875488, |
| "learning_rate": 1.999036790746245e-06, |
| "loss": 0.7799, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.048344210780759005, |
| "grad_norm": 6.4085774421691895, |
| "learning_rate": 1.9987419926070138e-06, |
| "loss": 0.785, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.050277979211989364, |
| "grad_norm": 5.818970203399658, |
| "learning_rate": 1.9984079230861295e-06, |
| "loss": 0.7994, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05221174764321972, |
| "grad_norm": 6.150722026824951, |
| "learning_rate": 1.998034595319489e-06, |
| "loss": 0.7996, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05414551607445008, |
| "grad_norm": 5.317642688751221, |
| "learning_rate": 1.997622023986656e-06, |
| "loss": 0.774, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05607928450568044, |
| "grad_norm": 4.694331169128418, |
| "learning_rate": 1.9971702253102855e-06, |
| "loss": 0.7098, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05801305293691081, |
| "grad_norm": 4.669729709625244, |
| "learning_rate": 1.9966792170554842e-06, |
| "loss": 0.7263, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05994682136814117, |
| "grad_norm": 5.054605960845947, |
| "learning_rate": 1.9961490185291127e-06, |
| "loss": 0.7084, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06188058979937153, |
| "grad_norm": 4.530153751373291, |
| "learning_rate": 1.9955796505790267e-06, |
| "loss": 0.653, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06381435823060189, |
| "grad_norm": 4.292491436004639, |
| "learning_rate": 1.9949711355932563e-06, |
| "loss": 0.7146, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06574812666183225, |
| "grad_norm": 3.924863815307617, |
| "learning_rate": 1.9943234974991266e-06, |
| "loss": 0.6936, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0676818950930626, |
| "grad_norm": 4.0565314292907715, |
| "learning_rate": 1.993636761762317e-06, |
| "loss": 0.6899, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06961566352429296, |
| "grad_norm": 4.169654846191406, |
| "learning_rate": 1.9929109553858574e-06, |
| "loss": 0.7224, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07154943195552332, |
| "grad_norm": 4.1086578369140625, |
| "learning_rate": 1.9921461069090715e-06, |
| "loss": 0.7267, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07348320038675368, |
| "grad_norm": 3.86490535736084, |
| "learning_rate": 1.991342246406448e-06, |
| "loss": 0.7308, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07541696881798404, |
| "grad_norm": 3.4781484603881836, |
| "learning_rate": 1.990499405486464e-06, |
| "loss": 0.6204, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0773507372492144, |
| "grad_norm": 3.520286798477173, |
| "learning_rate": 1.989617617290338e-06, |
| "loss": 0.6711, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07928450568044476, |
| "grad_norm": 3.5273187160491943, |
| "learning_rate": 1.98869691649073e-06, |
| "loss": 0.6004, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08121827411167512, |
| "grad_norm": 3.4485585689544678, |
| "learning_rate": 1.9877373392903765e-06, |
| "loss": 0.7075, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0831520425429055, |
| "grad_norm": 3.598468780517578, |
| "learning_rate": 1.986738923420665e-06, |
| "loss": 0.6459, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08508581097413585, |
| "grad_norm": 3.1944539546966553, |
| "learning_rate": 1.9857017081401545e-06, |
| "loss": 0.5869, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08701957940536621, |
| "grad_norm": 3.8986239433288574, |
| "learning_rate": 1.984625734233029e-06, |
| "loss": 0.6738, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08895334783659657, |
| "grad_norm": 3.00274920463562, |
| "learning_rate": 1.9835110440074937e-06, |
| "loss": 0.6038, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09088711626782693, |
| "grad_norm": 3.3365049362182617, |
| "learning_rate": 1.9823576812941135e-06, |
| "loss": 0.6489, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.09282088469905729, |
| "grad_norm": 3.1739261150360107, |
| "learning_rate": 1.9811656914440883e-06, |
| "loss": 0.6558, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09475465313028765, |
| "grad_norm": 3.4007365703582764, |
| "learning_rate": 1.9799351213274686e-06, |
| "loss": 0.6043, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09668842156151801, |
| "grad_norm": 3.241675853729248, |
| "learning_rate": 1.9786660193313147e-06, |
| "loss": 0.6038, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09862218999274837, |
| "grad_norm": 3.3647406101226807, |
| "learning_rate": 1.9773584353577927e-06, |
| "loss": 0.6712, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10055595842397873, |
| "grad_norm": 3.1126842498779297, |
| "learning_rate": 1.9760124208222127e-06, |
| "loss": 0.6281, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10248972685520909, |
| "grad_norm": 3.2202467918395996, |
| "learning_rate": 1.974628028651007e-06, |
| "loss": 0.616, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10442349528643945, |
| "grad_norm": 3.008822441101074, |
| "learning_rate": 1.9732053132796493e-06, |
| "loss": 0.6192, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1063572637176698, |
| "grad_norm": 3.3741564750671387, |
| "learning_rate": 1.971744330650514e-06, |
| "loss": 0.6312, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10829103214890017, |
| "grad_norm": 3.326568603515625, |
| "learning_rate": 1.970245138210676e-06, |
| "loss": 0.6039, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11022480058013052, |
| "grad_norm": 2.9236772060394287, |
| "learning_rate": 1.968707794909653e-06, |
| "loss": 0.6182, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11215856901136088, |
| "grad_norm": 2.947068452835083, |
| "learning_rate": 1.967132361197086e-06, |
| "loss": 0.6027, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11409233744259124, |
| "grad_norm": 3.112914562225342, |
| "learning_rate": 1.9655188990203645e-06, |
| "loss": 0.6059, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11602610587382162, |
| "grad_norm": 3.3346080780029297, |
| "learning_rate": 1.963867471822189e-06, |
| "loss": 0.602, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11795987430505198, |
| "grad_norm": 2.8977630138397217, |
| "learning_rate": 1.962178144538076e-06, |
| "loss": 0.5868, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11989364273628234, |
| "grad_norm": 2.880448818206787, |
| "learning_rate": 1.9604509835938064e-06, |
| "loss": 0.5378, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1218274111675127, |
| "grad_norm": 2.6951026916503906, |
| "learning_rate": 1.9586860569028124e-06, |
| "loss": 0.5642, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12376117959874305, |
| "grad_norm": 3.1341307163238525, |
| "learning_rate": 1.9568834338635075e-06, |
| "loss": 0.6254, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1256949480299734, |
| "grad_norm": 2.9430906772613525, |
| "learning_rate": 1.9550431853565574e-06, |
| "loss": 0.5638, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12762871646120377, |
| "grad_norm": 3.170628309249878, |
| "learning_rate": 1.953165383742093e-06, |
| "loss": 0.6531, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12956248489243413, |
| "grad_norm": 2.6802241802215576, |
| "learning_rate": 1.9512501028568656e-06, |
| "loss": 0.5603, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1314962533236645, |
| "grad_norm": 2.698767900466919, |
| "learning_rate": 1.9492974180113425e-06, |
| "loss": 0.5635, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.13343002175489485, |
| "grad_norm": 2.9341378211975098, |
| "learning_rate": 1.9473074059867473e-06, |
| "loss": 0.643, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1353637901861252, |
| "grad_norm": 3.148897171020508, |
| "learning_rate": 1.9452801450320396e-06, |
| "loss": 0.6171, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13729755861735557, |
| "grad_norm": 2.996152400970459, |
| "learning_rate": 1.943215714860838e-06, |
| "loss": 0.645, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.13923132704858593, |
| "grad_norm": 2.855325222015381, |
| "learning_rate": 1.941114196648286e-06, |
| "loss": 0.5198, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1411650954798163, |
| "grad_norm": 2.8352253437042236, |
| "learning_rate": 1.9389756730278625e-06, |
| "loss": 0.6044, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.14309886391104665, |
| "grad_norm": 3.1634292602539062, |
| "learning_rate": 1.9368002280881276e-06, |
| "loss": 0.6285, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.145032632342277, |
| "grad_norm": 2.8662703037261963, |
| "learning_rate": 1.9345879473694204e-06, |
| "loss": 0.539, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.14696640077350737, |
| "grad_norm": 2.915189266204834, |
| "learning_rate": 1.932338917860494e-06, |
| "loss": 0.586, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.14890016920473773, |
| "grad_norm": 2.995396852493286, |
| "learning_rate": 1.9300532279950945e-06, |
| "loss": 0.5723, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15083393763596809, |
| "grad_norm": 3.11187481880188, |
| "learning_rate": 1.9277309676484854e-06, |
| "loss": 0.6344, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15276770606719844, |
| "grad_norm": 2.9698314666748047, |
| "learning_rate": 1.9253722281339115e-06, |
| "loss": 0.5782, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1547014744984288, |
| "grad_norm": 2.8995749950408936, |
| "learning_rate": 1.9229771021990093e-06, |
| "loss": 0.5567, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15663524292965916, |
| "grad_norm": 3.096269369125366, |
| "learning_rate": 1.920545684022161e-06, |
| "loss": 0.5743, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15856901136088952, |
| "grad_norm": 2.9514002799987793, |
| "learning_rate": 1.91807806920879e-06, |
| "loss": 0.5647, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.16050277979211988, |
| "grad_norm": 3.0244557857513428, |
| "learning_rate": 1.915574354787602e-06, |
| "loss": 0.5775, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.16243654822335024, |
| "grad_norm": 2.788266181945801, |
| "learning_rate": 1.913034639206771e-06, |
| "loss": 0.5589, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1643703166545806, |
| "grad_norm": 3.122509241104126, |
| "learning_rate": 1.910459022330065e-06, |
| "loss": 0.5745, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.166304085085811, |
| "grad_norm": 2.8160808086395264, |
| "learning_rate": 1.9078476054329236e-06, |
| "loss": 0.5431, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.16823785351704135, |
| "grad_norm": 2.9235658645629883, |
| "learning_rate": 1.9052004911984719e-06, |
| "loss": 0.5591, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1701716219482717, |
| "grad_norm": 3.0429940223693848, |
| "learning_rate": 1.9025177837134855e-06, |
| "loss": 0.5745, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.17210539037950207, |
| "grad_norm": 2.805288791656494, |
| "learning_rate": 1.8997995884642967e-06, |
| "loss": 0.579, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.17403915881073242, |
| "grad_norm": 2.8548991680145264, |
| "learning_rate": 1.897046012332646e-06, |
| "loss": 0.5793, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.17597292724196278, |
| "grad_norm": 2.9402284622192383, |
| "learning_rate": 1.8942571635914812e-06, |
| "loss": 0.5541, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.17790669567319314, |
| "grad_norm": 2.88116717338562, |
| "learning_rate": 1.8914331519006983e-06, |
| "loss": 0.5372, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1798404641044235, |
| "grad_norm": 2.8997554779052734, |
| "learning_rate": 1.8885740883028306e-06, |
| "loss": 0.5603, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.18177423253565386, |
| "grad_norm": 3.0519931316375732, |
| "learning_rate": 1.8856800852186826e-06, |
| "loss": 0.5942, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.18370800096688422, |
| "grad_norm": 3.0380802154541016, |
| "learning_rate": 1.8827512564429074e-06, |
| "loss": 0.5427, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.18564176939811458, |
| "grad_norm": 3.1358187198638916, |
| "learning_rate": 1.8797877171395353e-06, |
| "loss": 0.5735, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.18757553782934494, |
| "grad_norm": 2.9399850368499756, |
| "learning_rate": 1.8767895838374437e-06, |
| "loss": 0.5616, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1895093062605753, |
| "grad_norm": 3.1031243801116943, |
| "learning_rate": 1.8737569744257752e-06, |
| "loss": 0.5919, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.19144307469180566, |
| "grad_norm": 2.8527615070343018, |
| "learning_rate": 1.8706900081493017e-06, |
| "loss": 0.5165, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.19337684312303602, |
| "grad_norm": 2.9103434085845947, |
| "learning_rate": 1.8675888056037374e-06, |
| "loss": 0.5361, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19531061155426638, |
| "grad_norm": 2.798142910003662, |
| "learning_rate": 1.8644534887309945e-06, |
| "loss": 0.5274, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.19724437998549674, |
| "grad_norm": 2.727302312850952, |
| "learning_rate": 1.8612841808143894e-06, |
| "loss": 0.5681, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1991781484167271, |
| "grad_norm": 2.993396759033203, |
| "learning_rate": 1.8580810064737963e-06, |
| "loss": 0.5323, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.20111191684795746, |
| "grad_norm": 2.8348159790039062, |
| "learning_rate": 1.8548440916607452e-06, |
| "loss": 0.5679, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.20304568527918782, |
| "grad_norm": 2.7694900035858154, |
| "learning_rate": 1.8515735636534693e-06, |
| "loss": 0.5343, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20497945371041817, |
| "grad_norm": 3.0484673976898193, |
| "learning_rate": 1.8482695510519025e-06, |
| "loss": 0.5595, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.20691322214164853, |
| "grad_norm": 3.0022387504577637, |
| "learning_rate": 1.8449321837726205e-06, |
| "loss": 0.5666, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2088469905728789, |
| "grad_norm": 2.9040303230285645, |
| "learning_rate": 1.8415615930437334e-06, |
| "loss": 0.57, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.21078075900410925, |
| "grad_norm": 2.8118133544921875, |
| "learning_rate": 1.8381579113997252e-06, |
| "loss": 0.5479, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2127145274353396, |
| "grad_norm": 2.882737159729004, |
| "learning_rate": 1.8347212726762436e-06, |
| "loss": 0.5641, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21464829586656997, |
| "grad_norm": 2.9052658081054688, |
| "learning_rate": 1.8312518120048353e-06, |
| "loss": 0.4957, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.21658206429780033, |
| "grad_norm": 2.898360013961792, |
| "learning_rate": 1.827749665807635e-06, |
| "loss": 0.5477, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2185158327290307, |
| "grad_norm": 2.7747766971588135, |
| "learning_rate": 1.8242149717919991e-06, |
| "loss": 0.5579, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.22044960116026105, |
| "grad_norm": 3.0171499252319336, |
| "learning_rate": 1.8206478689450929e-06, |
| "loss": 0.5669, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2223833695914914, |
| "grad_norm": 2.9386541843414307, |
| "learning_rate": 1.817048497528423e-06, |
| "loss": 0.5213, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22431713802272177, |
| "grad_norm": 2.888003349304199, |
| "learning_rate": 1.8134169990723246e-06, |
| "loss": 0.5299, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.22625090645395213, |
| "grad_norm": 3.0927133560180664, |
| "learning_rate": 1.8097535163703945e-06, |
| "loss": 0.5599, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2281846748851825, |
| "grad_norm": 2.846223831176758, |
| "learning_rate": 1.8060581934738781e-06, |
| "loss": 0.5416, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.23011844331641285, |
| "grad_norm": 2.8315625190734863, |
| "learning_rate": 1.8023311756860035e-06, |
| "loss": 0.5483, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.23205221174764323, |
| "grad_norm": 2.946509599685669, |
| "learning_rate": 1.798572609556269e-06, |
| "loss": 0.5825, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2339859801788736, |
| "grad_norm": 3.0473732948303223, |
| "learning_rate": 1.7947826428746806e-06, |
| "loss": 0.5535, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.23591974861010395, |
| "grad_norm": 3.0193090438842773, |
| "learning_rate": 1.7909614246659406e-06, |
| "loss": 0.5517, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2378535170413343, |
| "grad_norm": 2.919999837875366, |
| "learning_rate": 1.787109105183587e-06, |
| "loss": 0.5755, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.23978728547256467, |
| "grad_norm": 2.945788860321045, |
| "learning_rate": 1.7832258359040882e-06, |
| "loss": 0.5206, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.24172105390379503, |
| "grad_norm": 2.968233108520508, |
| "learning_rate": 1.7793117695208826e-06, |
| "loss": 0.5674, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2436548223350254, |
| "grad_norm": 2.9779975414276123, |
| "learning_rate": 1.775367059938378e-06, |
| "loss": 0.5777, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.24558859076625575, |
| "grad_norm": 3.014735221862793, |
| "learning_rate": 1.771391862265899e-06, |
| "loss": 0.55, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2475223591974861, |
| "grad_norm": 3.1135857105255127, |
| "learning_rate": 1.7673863328115867e-06, |
| "loss": 0.5403, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.24945612762871647, |
| "grad_norm": 2.9737236499786377, |
| "learning_rate": 1.763350629076255e-06, |
| "loss": 0.554, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2513898960599468, |
| "grad_norm": 2.74530291557312, |
| "learning_rate": 1.7592849097471947e-06, |
| "loss": 0.5474, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.25332366449117716, |
| "grad_norm": 3.2034261226654053, |
| "learning_rate": 1.755189334691936e-06, |
| "loss": 0.5603, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.25525743292240755, |
| "grad_norm": 2.845120668411255, |
| "learning_rate": 1.7510640649519614e-06, |
| "loss": 0.536, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2571912013536379, |
| "grad_norm": 2.771730661392212, |
| "learning_rate": 1.7469092627363735e-06, |
| "loss": 0.5016, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.25912496978486826, |
| "grad_norm": 2.9173460006713867, |
| "learning_rate": 1.742725091415517e-06, |
| "loss": 0.5249, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2610587382160986, |
| "grad_norm": 3.0819239616394043, |
| "learning_rate": 1.7385117155145545e-06, |
| "loss": 0.5169, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.262992506647329, |
| "grad_norm": 3.031985282897949, |
| "learning_rate": 1.7342693007069973e-06, |
| "loss": 0.5824, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.26492627507855937, |
| "grad_norm": 2.9924063682556152, |
| "learning_rate": 1.729998013808192e-06, |
| "loss": 0.5805, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2668600435097897, |
| "grad_norm": 3.3764071464538574, |
| "learning_rate": 1.7256980227687591e-06, |
| "loss": 0.644, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2687938119410201, |
| "grad_norm": 2.775237798690796, |
| "learning_rate": 1.7213694966679912e-06, |
| "loss": 0.495, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2707275803722504, |
| "grad_norm": 3.067394733428955, |
| "learning_rate": 1.717012605707203e-06, |
| "loss": 0.5125, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2726613488034808, |
| "grad_norm": 3.1399190425872803, |
| "learning_rate": 1.71262752120304e-06, |
| "loss": 0.5661, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.27459511723471114, |
| "grad_norm": 3.03717303276062, |
| "learning_rate": 1.7082144155807416e-06, |
| "loss": 0.6028, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2765288856659415, |
| "grad_norm": 2.9807446002960205, |
| "learning_rate": 1.7037734623673613e-06, |
| "loss": 0.5505, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.27846265409717186, |
| "grad_norm": 2.834151268005371, |
| "learning_rate": 1.6993048361849437e-06, |
| "loss": 0.5391, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.28039642252840224, |
| "grad_norm": 2.9190399646759033, |
| "learning_rate": 1.694808712743657e-06, |
| "loss": 0.5122, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2823301909596326, |
| "grad_norm": 3.0294294357299805, |
| "learning_rate": 1.6902852688348864e-06, |
| "loss": 0.5764, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.28426395939086296, |
| "grad_norm": 2.864239454269409, |
| "learning_rate": 1.68573468232428e-06, |
| "loss": 0.542, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2861977278220933, |
| "grad_norm": 3.1250009536743164, |
| "learning_rate": 1.6811571321447565e-06, |
| "loss": 0.5638, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2881314962533237, |
| "grad_norm": 2.9388275146484375, |
| "learning_rate": 1.6765527982894687e-06, |
| "loss": 0.5452, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.290065264684554, |
| "grad_norm": 3.0746877193450928, |
| "learning_rate": 1.6719218618047262e-06, |
| "loss": 0.5515, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2919990331157844, |
| "grad_norm": 2.9160993099212646, |
| "learning_rate": 1.6672645047828769e-06, |
| "loss": 0.5759, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.29393280154701473, |
| "grad_norm": 2.942295551300049, |
| "learning_rate": 1.662580910355146e-06, |
| "loss": 0.5791, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2958665699782451, |
| "grad_norm": 3.1533167362213135, |
| "learning_rate": 1.6578712626844363e-06, |
| "loss": 0.5355, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.29780033840947545, |
| "grad_norm": 2.9345664978027344, |
| "learning_rate": 1.6531357469580857e-06, |
| "loss": 0.54, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.29973410684070584, |
| "grad_norm": 2.933525562286377, |
| "learning_rate": 1.6483745493805868e-06, |
| "loss": 0.5085, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.30166787527193617, |
| "grad_norm": 3.0294647216796875, |
| "learning_rate": 1.643587857166263e-06, |
| "loss": 0.5245, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.30360164370316656, |
| "grad_norm": 2.784876585006714, |
| "learning_rate": 1.6387758585319098e-06, |
| "loss": 0.5279, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3055354121343969, |
| "grad_norm": 2.812796115875244, |
| "learning_rate": 1.6339387426893917e-06, |
| "loss": 0.4981, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3074691805656273, |
| "grad_norm": 3.0488643646240234, |
| "learning_rate": 1.629076699838204e-06, |
| "loss": 0.5355, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3094029489968576, |
| "grad_norm": 2.980043888092041, |
| "learning_rate": 1.624189921157992e-06, |
| "loss": 0.5306, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.311336717428088, |
| "grad_norm": 2.963444709777832, |
| "learning_rate": 1.6192785988010352e-06, |
| "loss": 0.5569, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3132704858593183, |
| "grad_norm": 2.8613176345825195, |
| "learning_rate": 1.6143429258846915e-06, |
| "loss": 0.5242, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3152042542905487, |
| "grad_norm": 2.9481241703033447, |
| "learning_rate": 1.6093830964838033e-06, |
| "loss": 0.5459, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.31713802272177904, |
| "grad_norm": 2.9568634033203125, |
| "learning_rate": 1.6043993056230666e-06, |
| "loss": 0.5866, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.31907179115300943, |
| "grad_norm": 2.9647092819213867, |
| "learning_rate": 1.5993917492693623e-06, |
| "loss": 0.5505, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.32100555958423976, |
| "grad_norm": 2.9472572803497314, |
| "learning_rate": 1.59436062432405e-06, |
| "loss": 0.5905, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.32293932801547015, |
| "grad_norm": 2.9131317138671875, |
| "learning_rate": 1.5893061286152273e-06, |
| "loss": 0.5303, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3248730964467005, |
| "grad_norm": 2.945516586303711, |
| "learning_rate": 1.5842284608899488e-06, |
| "loss": 0.5336, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.32680686487793087, |
| "grad_norm": 2.8020524978637695, |
| "learning_rate": 1.579127820806414e-06, |
| "loss": 0.4651, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3287406333091612, |
| "grad_norm": 3.0330491065979004, |
| "learning_rate": 1.5740044089261136e-06, |
| "loss": 0.4867, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3306744017403916, |
| "grad_norm": 2.847795009613037, |
| "learning_rate": 1.5688584267059453e-06, |
| "loss": 0.5472, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.332608170171622, |
| "grad_norm": 2.9428656101226807, |
| "learning_rate": 1.5636900764902914e-06, |
| "loss": 0.5629, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3345419386028523, |
| "grad_norm": 3.1021251678466797, |
| "learning_rate": 1.5584995615030632e-06, |
| "loss": 0.5857, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3364757070340827, |
| "grad_norm": 3.0706946849823, |
| "learning_rate": 1.5532870858397092e-06, |
| "loss": 0.5518, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.338409475465313, |
| "grad_norm": 3.023850440979004, |
| "learning_rate": 1.5480528544591905e-06, |
| "loss": 0.5307, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3403432438965434, |
| "grad_norm": 2.7942423820495605, |
| "learning_rate": 1.542797073175921e-06, |
| "loss": 0.528, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.34227701232777374, |
| "grad_norm": 3.0587525367736816, |
| "learning_rate": 1.5375199486516759e-06, |
| "loss": 0.5768, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.34421078075900413, |
| "grad_norm": 3.1753013134002686, |
| "learning_rate": 1.532221688387464e-06, |
| "loss": 0.5658, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.34614454919023446, |
| "grad_norm": 2.8351001739501953, |
| "learning_rate": 1.5269025007153698e-06, |
| "loss": 0.5008, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.34807831762146485, |
| "grad_norm": 2.830599069595337, |
| "learning_rate": 1.5215625947903603e-06, |
| "loss": 0.4855, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3500120860526952, |
| "grad_norm": 2.871882200241089, |
| "learning_rate": 1.516202180582063e-06, |
| "loss": 0.5144, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.35194585448392557, |
| "grad_norm": 2.947722911834717, |
| "learning_rate": 1.5108214688665077e-06, |
| "loss": 0.5819, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3538796229151559, |
| "grad_norm": 2.912292003631592, |
| "learning_rate": 1.5054206712178399e-06, |
| "loss": 0.5282, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3558133913463863, |
| "grad_norm": 2.9988412857055664, |
| "learning_rate": 1.5e-06, |
| "loss": 0.5249, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3577471597776166, |
| "grad_norm": 2.934715986251831, |
| "learning_rate": 1.4945596683583753e-06, |
| "loss": 0.5424, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.359680928208847, |
| "grad_norm": 2.935274839401245, |
| "learning_rate": 1.4890998902114165e-06, |
| "loss": 0.5147, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.36161469664007734, |
| "grad_norm": 3.1173348426818848, |
| "learning_rate": 1.4836208802422286e-06, |
| "loss": 0.5402, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3635484650713077, |
| "grad_norm": 2.9309182167053223, |
| "learning_rate": 1.4781228538901265e-06, |
| "loss": 0.4956, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.36548223350253806, |
| "grad_norm": 2.8888142108917236, |
| "learning_rate": 1.4726060273421673e-06, |
| "loss": 0.534, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.36741600193376844, |
| "grad_norm": 2.891580820083618, |
| "learning_rate": 1.4670706175246462e-06, |
| "loss": 0.5188, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3693497703649988, |
| "grad_norm": 3.278562545776367, |
| "learning_rate": 1.4615168420945687e-06, |
| "loss": 0.572, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.37128353879622916, |
| "grad_norm": 3.0605547428131104, |
| "learning_rate": 1.4559449194310924e-06, |
| "loss": 0.6035, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3732173072274595, |
| "grad_norm": 2.988506317138672, |
| "learning_rate": 1.4503550686269388e-06, |
| "loss": 0.5552, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3751510756586899, |
| "grad_norm": 2.906756639480591, |
| "learning_rate": 1.4447475094797793e-06, |
| "loss": 0.507, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3770848440899202, |
| "grad_norm": 2.8822944164276123, |
| "learning_rate": 1.4391224624835928e-06, |
| "loss": 0.5025, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3790186125211506, |
| "grad_norm": 3.0325863361358643, |
| "learning_rate": 1.4334801488199939e-06, |
| "loss": 0.5193, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 2.892662763595581, |
| "learning_rate": 1.4278207903495387e-06, |
| "loss": 0.5187, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3828861493836113, |
| "grad_norm": 2.7827394008636475, |
| "learning_rate": 1.422144609602999e-06, |
| "loss": 0.5296, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.38481991781484165, |
| "grad_norm": 2.9269039630889893, |
| "learning_rate": 1.4164518297726126e-06, |
| "loss": 0.4882, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.38675368624607204, |
| "grad_norm": 3.1903462409973145, |
| "learning_rate": 1.4107426747033072e-06, |
| "loss": 0.5382, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38868745467730237, |
| "grad_norm": 2.9497694969177246, |
| "learning_rate": 1.4050173688838998e-06, |
| "loss": 0.5187, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.39062122310853276, |
| "grad_norm": 2.956583023071289, |
| "learning_rate": 1.399276137438267e-06, |
| "loss": 0.5251, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3925549915397631, |
| "grad_norm": 2.959354877471924, |
| "learning_rate": 1.3935192061164955e-06, |
| "loss": 0.6077, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3944887599709935, |
| "grad_norm": 2.9722297191619873, |
| "learning_rate": 1.3877468012860046e-06, |
| "loss": 0.5582, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3964225284022238, |
| "grad_norm": 2.9446754455566406, |
| "learning_rate": 1.3819591499226444e-06, |
| "loss": 0.5277, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3983562968334542, |
| "grad_norm": 2.925280809402466, |
| "learning_rate": 1.376156479601772e-06, |
| "loss": 0.5251, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4002900652646846, |
| "grad_norm": 2.7258141040802, |
| "learning_rate": 1.3703390184893033e-06, |
| "loss": 0.497, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4022238336959149, |
| "grad_norm": 2.9930813312530518, |
| "learning_rate": 1.3645069953327388e-06, |
| "loss": 0.575, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4041576021271453, |
| "grad_norm": 2.80450177192688, |
| "learning_rate": 1.3586606394521727e-06, |
| "loss": 0.4993, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.40609137055837563, |
| "grad_norm": 2.997321605682373, |
| "learning_rate": 1.3528001807312732e-06, |
| "loss": 0.5231, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.408025138989606, |
| "grad_norm": 3.1863949298858643, |
| "learning_rate": 1.3469258496082449e-06, |
| "loss": 0.5803, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.40995890742083635, |
| "grad_norm": 2.7617061138153076, |
| "learning_rate": 1.341037877066766e-06, |
| "loss": 0.5061, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.41189267585206674, |
| "grad_norm": 2.9016385078430176, |
| "learning_rate": 1.335136494626907e-06, |
| "loss": 0.4804, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.41382644428329707, |
| "grad_norm": 3.061267614364624, |
| "learning_rate": 1.3292219343360278e-06, |
| "loss": 0.55, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.41576021271452746, |
| "grad_norm": 3.161426544189453, |
| "learning_rate": 1.323294428759652e-06, |
| "loss": 0.5507, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4176939811457578, |
| "grad_norm": 3.2181830406188965, |
| "learning_rate": 1.3173542109723227e-06, |
| "loss": 0.5543, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4196277495769882, |
| "grad_norm": 3.0244412422180176, |
| "learning_rate": 1.3114015145484378e-06, |
| "loss": 0.5716, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4215615180082185, |
| "grad_norm": 2.8925223350524902, |
| "learning_rate": 1.3054365735530664e-06, |
| "loss": 0.4452, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4234952864394489, |
| "grad_norm": 3.0561466217041016, |
| "learning_rate": 1.2994596225327442e-06, |
| "loss": 0.4996, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4254290548706792, |
| "grad_norm": 2.930914878845215, |
| "learning_rate": 1.2934708965062507e-06, |
| "loss": 0.5131, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4273628233019096, |
| "grad_norm": 2.9865005016326904, |
| "learning_rate": 1.2874706309553697e-06, |
| "loss": 0.5286, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.42929659173313994, |
| "grad_norm": 3.2668237686157227, |
| "learning_rate": 1.2814590618156275e-06, |
| "loss": 0.5774, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.43123036016437033, |
| "grad_norm": 2.931473970413208, |
| "learning_rate": 1.2754364254670192e-06, |
| "loss": 0.5236, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.43316412859560066, |
| "grad_norm": 2.954760789871216, |
| "learning_rate": 1.2694029587247095e-06, |
| "loss": 0.4893, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.43509789702683105, |
| "grad_norm": 2.9701437950134277, |
| "learning_rate": 1.2633588988297247e-06, |
| "loss": 0.5199, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4370316654580614, |
| "grad_norm": 3.1630728244781494, |
| "learning_rate": 1.2573044834396229e-06, |
| "loss": 0.5607, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.43896543388929177, |
| "grad_norm": 2.9174458980560303, |
| "learning_rate": 1.251239950619149e-06, |
| "loss": 0.4932, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4408992023205221, |
| "grad_norm": 2.9808075428009033, |
| "learning_rate": 1.245165538830873e-06, |
| "loss": 0.5203, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4428329707517525, |
| "grad_norm": 2.8363564014434814, |
| "learning_rate": 1.2390814869258154e-06, |
| "loss": 0.54, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.4447667391829828, |
| "grad_norm": 2.8673954010009766, |
| "learning_rate": 1.232988034134053e-06, |
| "loss": 0.5408, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4467005076142132, |
| "grad_norm": 2.936624526977539, |
| "learning_rate": 1.2268854200553156e-06, |
| "loss": 0.4853, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.44863427604544354, |
| "grad_norm": 3.0259556770324707, |
| "learning_rate": 1.2207738846495599e-06, |
| "loss": 0.5135, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4505680444766739, |
| "grad_norm": 2.8267462253570557, |
| "learning_rate": 1.2146536682275385e-06, |
| "loss": 0.5327, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.45250181290790426, |
| "grad_norm": 3.01057767868042, |
| "learning_rate": 1.2085250114413484e-06, |
| "loss": 0.5043, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.45443558133913464, |
| "grad_norm": 2.9250426292419434, |
| "learning_rate": 1.202388155274969e-06, |
| "loss": 0.5282, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.456369349770365, |
| "grad_norm": 3.085334539413452, |
| "learning_rate": 1.1962433410347858e-06, |
| "loss": 0.536, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.45830311820159536, |
| "grad_norm": 3.0255112648010254, |
| "learning_rate": 1.1900908103401036e-06, |
| "loss": 0.5392, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4602368866328257, |
| "grad_norm": 2.8932268619537354, |
| "learning_rate": 1.1839308051136429e-06, |
| "loss": 0.5142, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4621706550640561, |
| "grad_norm": 3.051480293273926, |
| "learning_rate": 1.1777635675720313e-06, |
| "loss": 0.5509, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.46410442349528647, |
| "grad_norm": 2.9904825687408447, |
| "learning_rate": 1.1715893402162757e-06, |
| "loss": 0.5316, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4660381919265168, |
| "grad_norm": 2.8626015186309814, |
| "learning_rate": 1.1654083658222287e-06, |
| "loss": 0.4737, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4679719603577472, |
| "grad_norm": 2.847097158432007, |
| "learning_rate": 1.1592208874310425e-06, |
| "loss": 0.5018, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4699057287889775, |
| "grad_norm": 3.1590757369995117, |
| "learning_rate": 1.1530271483396115e-06, |
| "loss": 0.5399, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4718394972202079, |
| "grad_norm": 3.036067247390747, |
| "learning_rate": 1.1468273920910067e-06, |
| "loss": 0.5831, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.47377326565143824, |
| "grad_norm": 3.331260919570923, |
| "learning_rate": 1.1406218624648985e-06, |
| "loss": 0.55, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4757070340826686, |
| "grad_norm": 2.8437914848327637, |
| "learning_rate": 1.1344108034679709e-06, |
| "loss": 0.4947, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.47764080251389895, |
| "grad_norm": 3.1023781299591064, |
| "learning_rate": 1.1281944593243287e-06, |
| "loss": 0.5565, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.47957457094512934, |
| "grad_norm": 3.0982506275177, |
| "learning_rate": 1.121973074465892e-06, |
| "loss": 0.555, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4815083393763597, |
| "grad_norm": 3.2432122230529785, |
| "learning_rate": 1.1157468935227865e-06, |
| "loss": 0.5621, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.48344210780759006, |
| "grad_norm": 2.9969475269317627, |
| "learning_rate": 1.109516161313724e-06, |
| "loss": 0.5244, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4853758762388204, |
| "grad_norm": 2.941561222076416, |
| "learning_rate": 1.1032811228363764e-06, |
| "loss": 0.5513, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4873096446700508, |
| "grad_norm": 2.8772778511047363, |
| "learning_rate": 1.0970420232577406e-06, |
| "loss": 0.4848, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4892434131012811, |
| "grad_norm": 3.026517391204834, |
| "learning_rate": 1.0907991079045006e-06, |
| "loss": 0.4887, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4911771815325115, |
| "grad_norm": 2.914242744445801, |
| "learning_rate": 1.084552622253379e-06, |
| "loss": 0.5066, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.49311094996374183, |
| "grad_norm": 3.085822105407715, |
| "learning_rate": 1.0783028119214861e-06, |
| "loss": 0.5018, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4950447183949722, |
| "grad_norm": 2.869805097579956, |
| "learning_rate": 1.0720499226566616e-06, |
| "loss": 0.5241, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.49697848682620255, |
| "grad_norm": 3.0161194801330566, |
| "learning_rate": 1.0657942003278106e-06, |
| "loss": 0.4984, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.49891225525743294, |
| "grad_norm": 2.8013997077941895, |
| "learning_rate": 1.0595358909152377e-06, |
| "loss": 0.519, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5008460236886633, |
| "grad_norm": 2.783162832260132, |
| "learning_rate": 1.053275240500973e-06, |
| "loss": 0.4913, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5027797921198937, |
| "grad_norm": 3.2985990047454834, |
| "learning_rate": 1.0470124952590974e-06, |
| "loss": 0.5137, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.504713560551124, |
| "grad_norm": 2.9076266288757324, |
| "learning_rate": 1.040747901446062e-06, |
| "loss": 0.5054, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5066473289823543, |
| "grad_norm": 2.830371141433716, |
| "learning_rate": 1.0344817053910048e-06, |
| "loss": 0.5071, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5085810974135847, |
| "grad_norm": 2.898216485977173, |
| "learning_rate": 1.028214153486066e-06, |
| "loss": 0.5002, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5105148658448151, |
| "grad_norm": 2.9580469131469727, |
| "learning_rate": 1.021945492176699e-06, |
| "loss": 0.4903, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5124486342760455, |
| "grad_norm": 3.2471764087677, |
| "learning_rate": 1.0156759679519789e-06, |
| "loss": 0.5037, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5143824027072758, |
| "grad_norm": 2.984576940536499, |
| "learning_rate": 1.0094058273349125e-06, |
| "loss": 0.5115, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5163161711385061, |
| "grad_norm": 2.9899098873138428, |
| "learning_rate": 1.003135316872743e-06, |
| "loss": 0.4761, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5182499395697365, |
| "grad_norm": 3.1444497108459473, |
| "learning_rate": 9.96864683127257e-07, |
| "loss": 0.5092, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5201837080009669, |
| "grad_norm": 2.837801456451416, |
| "learning_rate": 9.905941726650879e-07, |
| "loss": 0.4743, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5221174764321972, |
| "grad_norm": 2.847886085510254, |
| "learning_rate": 9.843240320480212e-07, |
| "loss": 0.4526, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5240512448634276, |
| "grad_norm": 2.8540098667144775, |
| "learning_rate": 9.780545078233012e-07, |
| "loss": 0.4964, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.525985013294658, |
| "grad_norm": 2.9519143104553223, |
| "learning_rate": 9.717858465139341e-07, |
| "loss": 0.4792, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5279187817258884, |
| "grad_norm": 2.9407591819763184, |
| "learning_rate": 9.655182946089955e-07, |
| "loss": 0.5136, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5298525501571187, |
| "grad_norm": 2.884398937225342, |
| "learning_rate": 9.592520985539381e-07, |
| "loss": 0.5249, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.531786318588349, |
| "grad_norm": 2.9702608585357666, |
| "learning_rate": 9.529875047409026e-07, |
| "loss": 0.5022, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5337200870195794, |
| "grad_norm": 2.78511118888855, |
| "learning_rate": 9.467247594990271e-07, |
| "loss": 0.5296, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5356538554508098, |
| "grad_norm": 3.069162607192993, |
| "learning_rate": 9.404641090847626e-07, |
| "loss": 0.5199, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5375876238820402, |
| "grad_norm": 3.088228940963745, |
| "learning_rate": 9.342057996721894e-07, |
| "loss": 0.5884, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5395213923132705, |
| "grad_norm": 2.84122633934021, |
| "learning_rate": 9.279500773433385e-07, |
| "loss": 0.479, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5414551607445008, |
| "grad_norm": 3.0056533813476562, |
| "learning_rate": 9.216971880785139e-07, |
| "loss": 0.4961, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5433889291757312, |
| "grad_norm": 2.8740074634552, |
| "learning_rate": 9.154473777466209e-07, |
| "loss": 0.4876, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5453226976069616, |
| "grad_norm": 2.9126172065734863, |
| "learning_rate": 9.092008920954995e-07, |
| "loss": 0.5112, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5472564660381919, |
| "grad_norm": 3.0587427616119385, |
| "learning_rate": 9.029579767422592e-07, |
| "loss": 0.4757, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5491902344694223, |
| "grad_norm": 2.970956563949585, |
| "learning_rate": 8.967188771636236e-07, |
| "loss": 0.5116, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5511240029006527, |
| "grad_norm": 2.7673499584198, |
| "learning_rate": 8.904838386862757e-07, |
| "loss": 0.507, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.553057771331883, |
| "grad_norm": 2.853449821472168, |
| "learning_rate": 8.842531064772136e-07, |
| "loss": 0.4738, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5549915397631133, |
| "grad_norm": 2.9352164268493652, |
| "learning_rate": 8.78026925534108e-07, |
| "loss": 0.534, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5569253081943437, |
| "grad_norm": 3.007397413253784, |
| "learning_rate": 8.718055406756713e-07, |
| "loss": 0.513, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5588590766255741, |
| "grad_norm": 2.853698253631592, |
| "learning_rate": 8.65589196532029e-07, |
| "loss": 0.5263, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5607928450568045, |
| "grad_norm": 3.211627244949341, |
| "learning_rate": 8.593781375351019e-07, |
| "loss": 0.5094, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5627266134880348, |
| "grad_norm": 2.8546507358551025, |
| "learning_rate": 8.531726079089933e-07, |
| "loss": 0.4698, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5646603819192652, |
| "grad_norm": 2.8525969982147217, |
| "learning_rate": 8.469728516603886e-07, |
| "loss": 0.5319, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5665941503504955, |
| "grad_norm": 2.925687313079834, |
| "learning_rate": 8.407791125689576e-07, |
| "loss": 0.5478, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5685279187817259, |
| "grad_norm": 2.994581460952759, |
| "learning_rate": 8.345916341777714e-07, |
| "loss": 0.5365, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5704616872129562, |
| "grad_norm": 3.1099655628204346, |
| "learning_rate": 8.284106597837242e-07, |
| "loss": 0.5256, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5723954556441866, |
| "grad_norm": 2.8576478958129883, |
| "learning_rate": 8.222364324279689e-07, |
| "loss": 0.5182, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.574329224075417, |
| "grad_norm": 2.6711535453796387, |
| "learning_rate": 8.16069194886357e-07, |
| "loss": 0.4789, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5762629925066474, |
| "grad_norm": 3.22711181640625, |
| "learning_rate": 8.099091896598964e-07, |
| "loss": 0.5506, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5781967609378776, |
| "grad_norm": 3.0444905757904053, |
| "learning_rate": 8.037566589652141e-07, |
| "loss": 0.5388, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.580130529369108, |
| "grad_norm": 3.0839669704437256, |
| "learning_rate": 7.97611844725031e-07, |
| "loss": 0.507, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5820642978003384, |
| "grad_norm": 3.067812442779541, |
| "learning_rate": 7.914749885586515e-07, |
| "loss": 0.5522, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5839980662315688, |
| "grad_norm": 2.7645392417907715, |
| "learning_rate": 7.853463317724614e-07, |
| "loss": 0.4622, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5859318346627991, |
| "grad_norm": 2.6988883018493652, |
| "learning_rate": 7.792261153504401e-07, |
| "loss": 0.4774, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5878656030940295, |
| "grad_norm": 2.8341736793518066, |
| "learning_rate": 7.731145799446844e-07, |
| "loss": 0.5248, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5897993715252599, |
| "grad_norm": 2.75606632232666, |
| "learning_rate": 7.670119658659469e-07, |
| "loss": 0.525, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5917331399564902, |
| "grad_norm": 2.893043279647827, |
| "learning_rate": 7.609185130741846e-07, |
| "loss": 0.5375, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5936669083877205, |
| "grad_norm": 3.2129299640655518, |
| "learning_rate": 7.548344611691271e-07, |
| "loss": 0.5252, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5956006768189509, |
| "grad_norm": 2.8687853813171387, |
| "learning_rate": 7.487600493808513e-07, |
| "loss": 0.5052, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5975344452501813, |
| "grad_norm": 2.8696491718292236, |
| "learning_rate": 7.426955165603772e-07, |
| "loss": 0.4912, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5994682136814117, |
| "grad_norm": 2.9154770374298096, |
| "learning_rate": 7.366411011702753e-07, |
| "loss": 0.509, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6014019821126421, |
| "grad_norm": 2.958178997039795, |
| "learning_rate": 7.305970412752909e-07, |
| "loss": 0.5168, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6033357505438723, |
| "grad_norm": 3.04964542388916, |
| "learning_rate": 7.245635745329809e-07, |
| "loss": 0.4598, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6052695189751027, |
| "grad_norm": 2.9642903804779053, |
| "learning_rate": 7.185409381843725e-07, |
| "loss": 0.4998, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6072032874063331, |
| "grad_norm": 2.8785696029663086, |
| "learning_rate": 7.125293690446306e-07, |
| "loss": 0.5129, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6091370558375635, |
| "grad_norm": 3.1034622192382812, |
| "learning_rate": 7.065291034937495e-07, |
| "loss": 0.5073, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6110708242687938, |
| "grad_norm": 2.8777222633361816, |
| "learning_rate": 7.005403774672559e-07, |
| "loss": 0.5226, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6130045927000242, |
| "grad_norm": 3.067791223526001, |
| "learning_rate": 6.945634264469337e-07, |
| "loss": 0.4938, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6149383611312546, |
| "grad_norm": 3.056411027908325, |
| "learning_rate": 6.885984854515623e-07, |
| "loss": 0.5097, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6168721295624849, |
| "grad_norm": 3.0727953910827637, |
| "learning_rate": 6.826457890276772e-07, |
| "loss": 0.5011, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6188058979937152, |
| "grad_norm": 2.822244167327881, |
| "learning_rate": 6.76705571240348e-07, |
| "loss": 0.4943, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6207396664249456, |
| "grad_norm": 3.038038969039917, |
| "learning_rate": 6.707780656639721e-07, |
| "loss": 0.5453, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.622673434856176, |
| "grad_norm": 2.872443437576294, |
| "learning_rate": 6.64863505373093e-07, |
| "loss": 0.5338, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6246072032874064, |
| "grad_norm": 3.021986961364746, |
| "learning_rate": 6.58962122933234e-07, |
| "loss": 0.5245, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6265409717186367, |
| "grad_norm": 2.882286787033081, |
| "learning_rate": 6.53074150391755e-07, |
| "loss": 0.4942, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.628474740149867, |
| "grad_norm": 2.975586414337158, |
| "learning_rate": 6.471998192687265e-07, |
| "loss": 0.5537, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6304085085810974, |
| "grad_norm": 2.871222734451294, |
| "learning_rate": 6.413393605478274e-07, |
| "loss": 0.4847, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6323422770123278, |
| "grad_norm": 2.9107320308685303, |
| "learning_rate": 6.35493004667261e-07, |
| "loss": 0.493, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6342760454435581, |
| "grad_norm": 2.997209310531616, |
| "learning_rate": 6.29660981510697e-07, |
| "loss": 0.504, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6362098138747885, |
| "grad_norm": 2.9243879318237305, |
| "learning_rate": 6.238435203982277e-07, |
| "loss": 0.517, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6381435823060189, |
| "grad_norm": 2.822188377380371, |
| "learning_rate": 6.180408500773557e-07, |
| "loss": 0.4736, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6400773507372493, |
| "grad_norm": 2.937476873397827, |
| "learning_rate": 6.122531987139954e-07, |
| "loss": 0.4467, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6420111191684795, |
| "grad_norm": 2.9560484886169434, |
| "learning_rate": 6.064807938835046e-07, |
| "loss": 0.5076, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6439448875997099, |
| "grad_norm": 2.906756639480591, |
| "learning_rate": 6.007238625617333e-07, |
| "loss": 0.5161, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6458786560309403, |
| "grad_norm": 2.875453233718872, |
| "learning_rate": 5.949826311161006e-07, |
| "loss": 0.5277, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6478124244621707, |
| "grad_norm": 3.082592248916626, |
| "learning_rate": 5.892573252966926e-07, |
| "loss": 0.4835, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.649746192893401, |
| "grad_norm": 2.8646011352539062, |
| "learning_rate": 5.835481702273878e-07, |
| "loss": 0.5205, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6516799613246314, |
| "grad_norm": 2.8505988121032715, |
| "learning_rate": 5.778553903970009e-07, |
| "loss": 0.5463, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6536137297558617, |
| "grad_norm": 2.866309642791748, |
| "learning_rate": 5.72179209650461e-07, |
| "loss": 0.5178, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6555474981870921, |
| "grad_norm": 2.6980485916137695, |
| "learning_rate": 5.665198511800063e-07, |
| "loss": 0.4907, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6574812666183224, |
| "grad_norm": 3.0001935958862305, |
| "learning_rate": 5.608775375164072e-07, |
| "loss": 0.5227, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6594150350495528, |
| "grad_norm": 2.840331554412842, |
| "learning_rate": 5.552524905202207e-07, |
| "loss": 0.4867, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6613488034807832, |
| "grad_norm": 2.9088988304138184, |
| "learning_rate": 5.496449313730608e-07, |
| "loss": 0.5486, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6632825719120136, |
| "grad_norm": 2.773866653442383, |
| "learning_rate": 5.440550805689075e-07, |
| "loss": 0.5258, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.665216340343244, |
| "grad_norm": 3.12353777885437, |
| "learning_rate": 5.384831579054312e-07, |
| "loss": 0.5713, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6671501087744742, |
| "grad_norm": 2.9296865463256836, |
| "learning_rate": 5.329293824753538e-07, |
| "loss": 0.5194, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6690838772057046, |
| "grad_norm": 3.0712759494781494, |
| "learning_rate": 5.273939726578326e-07, |
| "loss": 0.553, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.671017645636935, |
| "grad_norm": 3.2009084224700928, |
| "learning_rate": 5.218771461098732e-07, |
| "loss": 0.5197, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6729514140681654, |
| "grad_norm": 2.914090394973755, |
| "learning_rate": 5.163791197577713e-07, |
| "loss": 0.5196, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6748851824993957, |
| "grad_norm": 2.893239974975586, |
| "learning_rate": 5.109001097885837e-07, |
| "loss": 0.5387, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.676818950930626, |
| "grad_norm": 2.7517759799957275, |
| "learning_rate": 5.054403316416247e-07, |
| "loss": 0.4814, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6787527193618564, |
| "grad_norm": 2.873094081878662, |
| "learning_rate": 5.000000000000002e-07, |
| "loss": 0.5122, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6806864877930868, |
| "grad_norm": 2.7496497631073, |
| "learning_rate": 4.945793287821604e-07, |
| "loss": 0.5086, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6826202562243171, |
| "grad_norm": 2.7671315670013428, |
| "learning_rate": 4.891785311334922e-07, |
| "loss": 0.4881, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6845540246555475, |
| "grad_norm": 2.9487366676330566, |
| "learning_rate": 4.837978194179369e-07, |
| "loss": 0.5329, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6864877930867779, |
| "grad_norm": 2.971665143966675, |
| "learning_rate": 4.784374052096396e-07, |
| "loss": 0.5509, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6884215615180083, |
| "grad_norm": 3.0846035480499268, |
| "learning_rate": 4.730974992846303e-07, |
| "loss": 0.4538, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6903553299492385, |
| "grad_norm": 2.890350103378296, |
| "learning_rate": 4.677783116125361e-07, |
| "loss": 0.5101, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6922890983804689, |
| "grad_norm": 2.8605659008026123, |
| "learning_rate": 4.624800513483239e-07, |
| "loss": 0.5224, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6942228668116993, |
| "grad_norm": 2.946995735168457, |
| "learning_rate": 4.572029268240787e-07, |
| "loss": 0.505, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6961566352429297, |
| "grad_norm": 2.8882555961608887, |
| "learning_rate": 4.519471455408098e-07, |
| "loss": 0.5041, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.69809040367416, |
| "grad_norm": 2.8211588859558105, |
| "learning_rate": 4.4671291416029055e-07, |
| "loss": 0.5471, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7000241721053904, |
| "grad_norm": 3.0928735733032227, |
| "learning_rate": 4.4150043849693695e-07, |
| "loss": 0.4627, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7019579405366208, |
| "grad_norm": 2.9646997451782227, |
| "learning_rate": 4.3630992350970865e-07, |
| "loss": 0.5327, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7038917089678511, |
| "grad_norm": 3.0351743698120117, |
| "learning_rate": 4.3114157329405487e-07, |
| "loss": 0.5292, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7058254773990814, |
| "grad_norm": 2.928863525390625, |
| "learning_rate": 4.259955910738864e-07, |
| "loss": 0.5245, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7077592458303118, |
| "grad_norm": 2.761915445327759, |
| "learning_rate": 4.20872179193586e-07, |
| "loss": 0.4653, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7096930142615422, |
| "grad_norm": 2.9401397705078125, |
| "learning_rate": 4.157715391100509e-07, |
| "loss": 0.518, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7116267826927726, |
| "grad_norm": 2.8561716079711914, |
| "learning_rate": 4.1069387138477297e-07, |
| "loss": 0.4474, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7135605511240029, |
| "grad_norm": 3.0250120162963867, |
| "learning_rate": 4.0563937567594974e-07, |
| "loss": 0.4947, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7154943195552332, |
| "grad_norm": 3.0572850704193115, |
| "learning_rate": 4.0060825073063785e-07, |
| "loss": 0.4763, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7174280879864636, |
| "grad_norm": 3.2508955001831055, |
| "learning_rate": 3.95600694376933e-07, |
| "loss": 0.5261, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.719361856417694, |
| "grad_norm": 2.9887571334838867, |
| "learning_rate": 3.906169035161967e-07, |
| "loss": 0.5028, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7212956248489243, |
| "grad_norm": 3.0212152004241943, |
| "learning_rate": 3.8565707411530866e-07, |
| "loss": 0.5344, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7232293932801547, |
| "grad_norm": 2.8374485969543457, |
| "learning_rate": 3.80721401198965e-07, |
| "loss": 0.457, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7251631617113851, |
| "grad_norm": 2.9122722148895264, |
| "learning_rate": 3.7581007884200824e-07, |
| "loss": 0.496, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7270969301426154, |
| "grad_norm": 3.086984157562256, |
| "learning_rate": 3.7092330016179605e-07, |
| "loss": 0.5244, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7290306985738458, |
| "grad_norm": 2.797271490097046, |
| "learning_rate": 3.660612573106081e-07, |
| "loss": 0.4423, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7309644670050761, |
| "grad_norm": 3.072042942047119, |
| "learning_rate": 3.612241414680901e-07, |
| "loss": 0.5059, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7328982354363065, |
| "grad_norm": 2.745339870452881, |
| "learning_rate": 3.564121428337369e-07, |
| "loss": 0.4813, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7348320038675369, |
| "grad_norm": 3.1032216548919678, |
| "learning_rate": 3.5162545061941327e-07, |
| "loss": 0.5213, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7367657722987673, |
| "grad_norm": 2.9812653064727783, |
| "learning_rate": 3.4686425304191436e-07, |
| "loss": 0.4848, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7386995407299976, |
| "grad_norm": 3.1721439361572266, |
| "learning_rate": 3.421287373155636e-07, |
| "loss": 0.4748, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7406333091612279, |
| "grad_norm": 2.7179818153381348, |
| "learning_rate": 3.374190896448541e-07, |
| "loss": 0.4638, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7425670775924583, |
| "grad_norm": 2.983214855194092, |
| "learning_rate": 3.327354952171232e-07, |
| "loss": 0.4854, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7445008460236887, |
| "grad_norm": 2.9129269123077393, |
| "learning_rate": 3.2807813819527374e-07, |
| "loss": 0.5304, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.746434614454919, |
| "grad_norm": 2.8850767612457275, |
| "learning_rate": 3.234472017105313e-07, |
| "loss": 0.5264, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7483683828861494, |
| "grad_norm": 2.805777072906494, |
| "learning_rate": 3.188428678552435e-07, |
| "loss": 0.5166, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7503021513173798, |
| "grad_norm": 2.8849081993103027, |
| "learning_rate": 3.1426531767572e-07, |
| "loss": 0.4284, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7522359197486101, |
| "grad_norm": 3.0169715881347656, |
| "learning_rate": 3.0971473116511393e-07, |
| "loss": 0.522, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7541696881798404, |
| "grad_norm": 3.2479677200317383, |
| "learning_rate": 3.0519128725634293e-07, |
| "loss": 0.5187, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7561034566110708, |
| "grad_norm": 3.006490468978882, |
| "learning_rate": 3.006951638150567e-07, |
| "loss": 0.5139, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7580372250423012, |
| "grad_norm": 3.2848265171051025, |
| "learning_rate": 2.9622653763263873e-07, |
| "loss": 0.5388, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7599709934735316, |
| "grad_norm": 3.011970281600952, |
| "learning_rate": 2.917855844192584e-07, |
| "loss": 0.5163, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 3.037081480026245, |
| "learning_rate": 2.8737247879696e-07, |
| "loss": 0.5292, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7638385303359922, |
| "grad_norm": 3.168532133102417, |
| "learning_rate": 2.8298739429279705e-07, |
| "loss": 0.5267, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7657722987672226, |
| "grad_norm": 3.2073962688446045, |
| "learning_rate": 2.786305033320089e-07, |
| "loss": 0.5159, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.767706067198453, |
| "grad_norm": 3.0780181884765625, |
| "learning_rate": 2.7430197723124115e-07, |
| "loss": 0.5512, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7696398356296833, |
| "grad_norm": 2.9060723781585693, |
| "learning_rate": 2.700019861918079e-07, |
| "loss": 0.5404, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7715736040609137, |
| "grad_norm": 2.9640395641326904, |
| "learning_rate": 2.657306992930024e-07, |
| "loss": 0.496, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7735073724921441, |
| "grad_norm": 3.119464159011841, |
| "learning_rate": 2.6148828448544566e-07, |
| "loss": 0.5233, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7754411409233745, |
| "grad_norm": 3.170423746109009, |
| "learning_rate": 2.5727490858448285e-07, |
| "loss": 0.5412, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7773749093546047, |
| "grad_norm": 3.0757274627685547, |
| "learning_rate": 2.530907372636265e-07, |
| "loss": 0.4974, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7793086777858351, |
| "grad_norm": 3.033518075942993, |
| "learning_rate": 2.4893593504803823e-07, |
| "loss": 0.5243, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.7812424462170655, |
| "grad_norm": 2.8962435722351074, |
| "learning_rate": 2.4481066530806393e-07, |
| "loss": 0.4652, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7831762146482959, |
| "grad_norm": 2.9113543033599854, |
| "learning_rate": 2.407150902528052e-07, |
| "loss": 0.4438, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7851099830795262, |
| "grad_norm": 3.034170627593994, |
| "learning_rate": 2.3664937092374492e-07, |
| "loss": 0.5339, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7870437515107566, |
| "grad_norm": 2.8349618911743164, |
| "learning_rate": 2.3261366718841303e-07, |
| "loss": 0.5154, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.788977519941987, |
| "grad_norm": 2.9790327548980713, |
| "learning_rate": 2.2860813773410103e-07, |
| "loss": 0.5295, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7909112883732173, |
| "grad_norm": 2.9119043350219727, |
| "learning_rate": 2.2463294006162182e-07, |
| "loss": 0.5236, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7928450568044476, |
| "grad_norm": 3.071845531463623, |
| "learning_rate": 2.2068823047911755e-07, |
| "loss": 0.4924, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.794778825235678, |
| "grad_norm": 2.9019951820373535, |
| "learning_rate": 2.1677416409591165e-07, |
| "loss": 0.5215, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7967125936669084, |
| "grad_norm": 2.825124740600586, |
| "learning_rate": 2.1289089481641277e-07, |
| "loss": 0.4671, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7986463620981388, |
| "grad_norm": 2.964590311050415, |
| "learning_rate": 2.0903857533405954e-07, |
| "loss": 0.5312, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8005801305293692, |
| "grad_norm": 3.303433656692505, |
| "learning_rate": 2.052173571253193e-07, |
| "loss": 0.5184, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8025138989605994, |
| "grad_norm": 3.0383520126342773, |
| "learning_rate": 2.0142739044373092e-07, |
| "loss": 0.509, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8044476673918298, |
| "grad_norm": 2.9449782371520996, |
| "learning_rate": 1.9766882431399646e-07, |
| "loss": 0.507, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8063814358230602, |
| "grad_norm": 2.8558340072631836, |
| "learning_rate": 1.939418065261219e-07, |
| "loss": 0.4788, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8083152042542906, |
| "grad_norm": 2.813002109527588, |
| "learning_rate": 1.9024648362960539e-07, |
| "loss": 0.5176, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8102489726855209, |
| "grad_norm": 3.0548245906829834, |
| "learning_rate": 1.8658300092767543e-07, |
| "loss": 0.5363, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8121827411167513, |
| "grad_norm": 2.9029524326324463, |
| "learning_rate": 1.8295150247157686e-07, |
| "loss": 0.5361, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8141165095479816, |
| "grad_norm": 3.0287888050079346, |
| "learning_rate": 1.7935213105490722e-07, |
| "loss": 0.5176, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.816050277979212, |
| "grad_norm": 2.9426074028015137, |
| "learning_rate": 1.7578502820800046e-07, |
| "loss": 0.4806, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8179840464104423, |
| "grad_norm": 3.0299363136291504, |
| "learning_rate": 1.72250334192365e-07, |
| "loss": 0.4972, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8199178148416727, |
| "grad_norm": 2.845245838165283, |
| "learning_rate": 1.6874818799516465e-07, |
| "loss": 0.5033, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8218515832729031, |
| "grad_norm": 2.8781020641326904, |
| "learning_rate": 1.6527872732375648e-07, |
| "loss": 0.508, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8237853517041335, |
| "grad_norm": 2.969726324081421, |
| "learning_rate": 1.618420886002747e-07, |
| "loss": 0.5408, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8257191201353637, |
| "grad_norm": 2.890504837036133, |
| "learning_rate": 1.5843840695626666e-07, |
| "loss": 0.5224, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8276528885665941, |
| "grad_norm": 2.945122241973877, |
| "learning_rate": 1.5506781622737942e-07, |
| "loss": 0.4949, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8295866569978245, |
| "grad_norm": 2.883887767791748, |
| "learning_rate": 1.5173044894809762e-07, |
| "loss": 0.492, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8315204254290549, |
| "grad_norm": 2.9001541137695312, |
| "learning_rate": 1.4842643634653052e-07, |
| "loss": 0.5115, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8334541938602852, |
| "grad_norm": 2.7559828758239746, |
| "learning_rate": 1.4515590833925506e-07, |
| "loss": 0.4998, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8353879622915156, |
| "grad_norm": 2.8526365756988525, |
| "learning_rate": 1.419189935262034e-07, |
| "loss": 0.5091, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.837321730722746, |
| "grad_norm": 2.8609507083892822, |
| "learning_rate": 1.3871581918561048e-07, |
| "loss": 0.4773, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8392554991539763, |
| "grad_norm": 3.009993314743042, |
| "learning_rate": 1.3554651126900564e-07, |
| "loss": 0.4892, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8411892675852066, |
| "grad_norm": 3.0874485969543457, |
| "learning_rate": 1.3241119439626258e-07, |
| "loss": 0.4492, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.843123036016437, |
| "grad_norm": 2.902095079421997, |
| "learning_rate": 1.29309991850698e-07, |
| "loss": 0.4865, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8450568044476674, |
| "grad_norm": 2.789257764816284, |
| "learning_rate": 1.2624302557422472e-07, |
| "loss": 0.5034, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8469905728788978, |
| "grad_norm": 2.9948341846466064, |
| "learning_rate": 1.232104161625561e-07, |
| "loss": 0.5115, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8489243413101281, |
| "grad_norm": 3.0060694217681885, |
| "learning_rate": 1.202122828604646e-07, |
| "loss": 0.4999, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8508581097413584, |
| "grad_norm": 3.115152597427368, |
| "learning_rate": 1.1724874355709258e-07, |
| "loss": 0.4844, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8527918781725888, |
| "grad_norm": 2.870953321456909, |
| "learning_rate": 1.1431991478131752e-07, |
| "loss": 0.5252, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8547256466038192, |
| "grad_norm": 2.8833727836608887, |
| "learning_rate": 1.114259116971693e-07, |
| "loss": 0.481, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8566594150350495, |
| "grad_norm": 3.122464179992676, |
| "learning_rate": 1.085668480993015e-07, |
| "loss": 0.5057, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8585931834662799, |
| "grad_norm": 2.969245672225952, |
| "learning_rate": 1.0574283640851889e-07, |
| "loss": 0.5017, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8605269518975103, |
| "grad_norm": 2.9073445796966553, |
| "learning_rate": 1.0295398766735409e-07, |
| "loss": 0.525, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8624607203287407, |
| "grad_norm": 2.967390775680542, |
| "learning_rate": 1.0020041153570347e-07, |
| "loss": 0.5246, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.864394488759971, |
| "grad_norm": 2.8257503509521484, |
| "learning_rate": 9.748221628651443e-08, |
| "loss": 0.4992, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8663282571912013, |
| "grad_norm": 2.900615930557251, |
| "learning_rate": 9.479950880152809e-08, |
| "loss": 0.5078, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8682620256224317, |
| "grad_norm": 2.8263025283813477, |
| "learning_rate": 9.215239456707636e-08, |
| "loss": 0.4673, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8701957940536621, |
| "grad_norm": 2.6599619388580322, |
| "learning_rate": 8.954097766993496e-08, |
| "loss": 0.4837, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8721295624848925, |
| "grad_norm": 2.919358491897583, |
| "learning_rate": 8.696536079322902e-08, |
| "loss": 0.4759, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8740633309161228, |
| "grad_norm": 3.0110116004943848, |
| "learning_rate": 8.442564521239781e-08, |
| "loss": 0.509, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8759970993473531, |
| "grad_norm": 2.8107213973999023, |
| "learning_rate": 8.192193079121002e-08, |
| "loss": 0.4791, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8779308677785835, |
| "grad_norm": 2.792571783065796, |
| "learning_rate": 7.945431597783902e-08, |
| "loss": 0.4883, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8798646362098139, |
| "grad_norm": 3.048387289047241, |
| "learning_rate": 7.70228978009907e-08, |
| "loss": 0.5173, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8817984046410442, |
| "grad_norm": 2.8160572052001953, |
| "learning_rate": 7.462777186608849e-08, |
| "loss": 0.4777, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8837321730722746, |
| "grad_norm": 2.8880162239074707, |
| "learning_rate": 7.226903235151438e-08, |
| "loss": 0.4429, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.885665941503505, |
| "grad_norm": 3.1718862056732178, |
| "learning_rate": 6.994677200490507e-08, |
| "loss": 0.5493, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8875997099347354, |
| "grad_norm": 3.0768024921417236, |
| "learning_rate": 6.766108213950583e-08, |
| "loss": 0.5329, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.8895334783659656, |
| "grad_norm": 3.097865581512451, |
| "learning_rate": 6.541205263057936e-08, |
| "loss": 0.5059, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.891467246797196, |
| "grad_norm": 2.9065606594085693, |
| "learning_rate": 6.319977191187231e-08, |
| "loss": 0.5083, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.8934010152284264, |
| "grad_norm": 2.651416540145874, |
| "learning_rate": 6.102432697213733e-08, |
| "loss": 0.4076, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8953347836596568, |
| "grad_norm": 3.1337976455688477, |
| "learning_rate": 5.888580335171367e-08, |
| "loss": 0.5136, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8972685520908871, |
| "grad_norm": 2.9478445053100586, |
| "learning_rate": 5.6784285139162116e-08, |
| "loss": 0.5605, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8992023205221175, |
| "grad_norm": 2.8331518173217773, |
| "learning_rate": 5.47198549679605e-08, |
| "loss": 0.4823, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9011360889533478, |
| "grad_norm": 2.9675228595733643, |
| "learning_rate": 5.269259401325254e-08, |
| "loss": 0.5216, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9030698573845782, |
| "grad_norm": 3.129606246948242, |
| "learning_rate": 5.07025819886574e-08, |
| "loss": 0.5425, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9050036258158085, |
| "grad_norm": 2.961496353149414, |
| "learning_rate": 4.8749897143134486e-08, |
| "loss": 0.5506, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9069373942470389, |
| "grad_norm": 2.621377944946289, |
| "learning_rate": 4.6834616257906966e-08, |
| "loss": 0.5214, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9088711626782693, |
| "grad_norm": 2.75648832321167, |
| "learning_rate": 4.495681464344259e-08, |
| "loss": 0.4796, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9108049311094997, |
| "grad_norm": 2.8078525066375732, |
| "learning_rate": 4.3116566136492506e-08, |
| "loss": 0.4867, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.91273869954073, |
| "grad_norm": 2.8183205127716064, |
| "learning_rate": 4.1313943097187495e-08, |
| "loss": 0.4663, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9146724679719603, |
| "grad_norm": 2.8927807807922363, |
| "learning_rate": 3.954901640619368e-08, |
| "loss": 0.4743, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9166062364031907, |
| "grad_norm": 2.9635002613067627, |
| "learning_rate": 3.782185546192407e-08, |
| "loss": 0.4839, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9185400048344211, |
| "grad_norm": 2.9816319942474365, |
| "learning_rate": 3.6132528177811094e-08, |
| "loss": 0.5449, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9204737732656514, |
| "grad_norm": 3.050773859024048, |
| "learning_rate": 3.44811009796353e-08, |
| "loss": 0.4857, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9224075416968818, |
| "grad_norm": 2.933197259902954, |
| "learning_rate": 3.286763880291399e-08, |
| "loss": 0.4848, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9243413101281122, |
| "grad_norm": 2.923037528991699, |
| "learning_rate": 3.129220509034724e-08, |
| "loss": 0.5111, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9262750785593425, |
| "grad_norm": 2.874183177947998, |
| "learning_rate": 2.975486178932407e-08, |
| "loss": 0.4919, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.9282088469905729, |
| "grad_norm": 2.9910435676574707, |
| "learning_rate": 2.8255669349485978e-08, |
| "loss": 0.4473, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9301426154218032, |
| "grad_norm": 2.9716756343841553, |
| "learning_rate": 2.679468672035057e-08, |
| "loss": 0.4508, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9320763838530336, |
| "grad_norm": 2.8846945762634277, |
| "learning_rate": 2.5371971348992938e-08, |
| "loss": 0.463, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.934010152284264, |
| "grad_norm": 2.8598673343658447, |
| "learning_rate": 2.3987579177787263e-08, |
| "loss": 0.5179, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9359439207154944, |
| "grad_norm": 2.910398006439209, |
| "learning_rate": 2.2641564642207346e-08, |
| "loss": 0.5172, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9378776891467246, |
| "grad_norm": 3.295294761657715, |
| "learning_rate": 2.1333980668685413e-08, |
| "loss": 0.5162, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.939811457577955, |
| "grad_norm": 3.001046895980835, |
| "learning_rate": 2.0064878672531483e-08, |
| "loss": 0.5116, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9417452260091854, |
| "grad_norm": 2.902860641479492, |
| "learning_rate": 1.8834308555911728e-08, |
| "loss": 0.5198, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9436789944404158, |
| "grad_norm": 2.8829078674316406, |
| "learning_rate": 1.7642318705886284e-08, |
| "loss": 0.5099, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9456127628716461, |
| "grad_norm": 2.825139045715332, |
| "learning_rate": 1.6488955992506216e-08, |
| "loss": 0.5034, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9475465313028765, |
| "grad_norm": 2.812983274459839, |
| "learning_rate": 1.5374265766971096e-08, |
| "loss": 0.4804, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9494802997341069, |
| "grad_norm": 3.0010793209075928, |
| "learning_rate": 1.4298291859845212e-08, |
| "loss": 0.4755, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9514140681653372, |
| "grad_norm": 2.9898478984832764, |
| "learning_rate": 1.3261076579334729e-08, |
| "loss": 0.5148, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9533478365965675, |
| "grad_norm": 2.897951364517212, |
| "learning_rate": 1.2262660709623229e-08, |
| "loss": 0.5036, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9552816050277979, |
| "grad_norm": 2.879439115524292, |
| "learning_rate": 1.1303083509269451e-08, |
| "loss": 0.5003, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9572153734590283, |
| "grad_norm": 3.1193387508392334, |
| "learning_rate": 1.0382382709661853e-08, |
| "loss": 0.4836, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9591491418902587, |
| "grad_norm": 2.995630979537964, |
| "learning_rate": 9.500594513536352e-09, |
| "loss": 0.5322, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.961082910321489, |
| "grad_norm": 2.8320207595825195, |
| "learning_rate": 8.657753593552142e-09, |
| "loss": 0.5023, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9630166787527193, |
| "grad_norm": 2.8688549995422363, |
| "learning_rate": 7.853893090928654e-09, |
| "loss": 0.5112, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9649504471839497, |
| "grad_norm": 2.9314334392547607, |
| "learning_rate": 7.0890446141421704e-09, |
| "loss": 0.4959, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9668842156151801, |
| "grad_norm": 3.1379406452178955, |
| "learning_rate": 6.363238237683033e-09, |
| "loss": 0.5223, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9688179840464104, |
| "grad_norm": 3.0119411945343018, |
| "learning_rate": 5.676502500873037e-09, |
| "loss": 0.4634, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.9707517524776408, |
| "grad_norm": 2.8588473796844482, |
| "learning_rate": 5.028864406743549e-09, |
| "loss": 0.5054, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9726855209088712, |
| "grad_norm": 3.05617618560791, |
| "learning_rate": 4.420349420973357e-09, |
| "loss": 0.56, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "grad_norm": 2.8269994258880615, |
| "learning_rate": 3.85098147088736e-09, |
| "loss": 0.4795, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9765530577713318, |
| "grad_norm": 3.081051826477051, |
| "learning_rate": 3.3207829445159872e-09, |
| "loss": 0.532, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9784868262025622, |
| "grad_norm": 2.9081430435180664, |
| "learning_rate": 2.8297746897146813e-09, |
| "loss": 0.4936, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9804205946337926, |
| "grad_norm": 2.7562308311462402, |
| "learning_rate": 2.3779760133441075e-09, |
| "loss": 0.4376, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.982354363065023, |
| "grad_norm": 3.053764820098877, |
| "learning_rate": 1.9654046805112067e-09, |
| "loss": 0.5243, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9842881314962533, |
| "grad_norm": 3.0876071453094482, |
| "learning_rate": 1.5920769138706436e-09, |
| "loss": 0.4791, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.9862218999274837, |
| "grad_norm": 2.8327271938323975, |
| "learning_rate": 1.2580073929863156e-09, |
| "loss": 0.4757, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.988155668358714, |
| "grad_norm": 2.7686331272125244, |
| "learning_rate": 9.632092537551483e-10, |
| "loss": 0.5029, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.9900894367899444, |
| "grad_norm": 2.8857767581939697, |
| "learning_rate": 7.076940878896209e-10, |
| "loss": 0.4867, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9920232052211748, |
| "grad_norm": 2.8375306129455566, |
| "learning_rate": 4.914719424629066e-10, |
| "loss": 0.5285, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9939569736524051, |
| "grad_norm": 2.8377671241760254, |
| "learning_rate": 3.1455131951285685e-10, |
| "loss": 0.5036, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9958907420836355, |
| "grad_norm": 3.5421719551086426, |
| "learning_rate": 1.7693917570837936e-10, |
| "loss": 0.5094, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9978245105148659, |
| "grad_norm": 3.072711229324341, |
| "learning_rate": 7.864092207554573e-11, |
| "loss": 0.5009, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.9997582789460963, |
| "grad_norm": 2.859100341796875, |
| "learning_rate": 1.9660423784984005e-11, |
| "loss": 0.5103, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.9997582789460963, |
| "step": 517, |
| "total_flos": 1.8974566875817574e+18, |
| "train_loss": 0.5522731098722904, |
| "train_runtime": 18088.4019, |
| "train_samples_per_second": 1.83, |
| "train_steps_per_second": 0.029 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 517, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8974566875817574e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|