| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1680, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0035714285714285713, |
| "grad_norm": 0.35824885964393616, |
| "learning_rate": 1.1904761904761904e-12, |
| "loss": 1.8791723251342773, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007142857142857143, |
| "grad_norm": 1.133953332901001, |
| "learning_rate": 3.5714285714285712e-12, |
| "loss": 1.8177907466888428, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010714285714285714, |
| "grad_norm": 0.31963008642196655, |
| "learning_rate": 5.952380952380952e-12, |
| "loss": 1.8180923461914062, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014285714285714285, |
| "grad_norm": 0.38317519426345825, |
| "learning_rate": 8.333333333333334e-12, |
| "loss": 1.890156626701355, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.017857142857142856, |
| "grad_norm": 0.22351811826229095, |
| "learning_rate": 1.0714285714285714e-11, |
| "loss": 1.8039294481277466, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02142857142857143, |
| "grad_norm": 0.2431482970714569, |
| "learning_rate": 1.3095238095238097e-11, |
| "loss": 1.7611536979675293, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.6136751770973206, |
| "learning_rate": 1.5476190476190478e-11, |
| "loss": 1.803197979927063, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 0.2537895441055298, |
| "learning_rate": 1.7857142857142857e-11, |
| "loss": 1.7924619913101196, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03214285714285714, |
| "grad_norm": 0.7044904232025146, |
| "learning_rate": 2.023809523809524e-11, |
| "loss": 1.8655799627304077, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 0.25141608715057373, |
| "learning_rate": 2.261904761904762e-11, |
| "loss": 1.7573883533477783, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.039285714285714285, |
| "grad_norm": 0.9878725409507751, |
| "learning_rate": 2.5e-11, |
| "loss": 1.7613331079483032, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04285714285714286, |
| "grad_norm": 0.6218051910400391, |
| "learning_rate": 2.7380952380952383e-11, |
| "loss": 2.009953498840332, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04642857142857143, |
| "grad_norm": 0.3609226644039154, |
| "learning_rate": 2.976190476190476e-11, |
| "loss": 1.8607038259506226, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.24035774171352386, |
| "learning_rate": 3.214285714285715e-11, |
| "loss": 1.5517598390579224, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05357142857142857, |
| "grad_norm": 0.2459728866815567, |
| "learning_rate": 3.452380952380953e-11, |
| "loss": 1.8139374256134033, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 0.25730156898498535, |
| "learning_rate": 3.6904761904761907e-11, |
| "loss": 1.8025331497192383, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.060714285714285714, |
| "grad_norm": 0.353179395198822, |
| "learning_rate": 3.9285714285714286e-11, |
| "loss": 1.8871498107910156, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06428571428571428, |
| "grad_norm": 0.3843620717525482, |
| "learning_rate": 4.166666666666667e-11, |
| "loss": 1.9136126041412354, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06785714285714285, |
| "grad_norm": 0.62681645154953, |
| "learning_rate": 4.404761904761905e-11, |
| "loss": 2.016162157058716, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.0013960599899292, |
| "learning_rate": 4.642857142857143e-11, |
| "loss": 1.8622362613677979, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.3581591248512268, |
| "learning_rate": 4.880952380952381e-11, |
| "loss": 1.8666965961456299, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07857142857142857, |
| "grad_norm": 0.5053503513336182, |
| "learning_rate": 5.119047619047619e-11, |
| "loss": 1.8547173738479614, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08214285714285714, |
| "grad_norm": 0.3650856614112854, |
| "learning_rate": 5.3571428571428574e-11, |
| "loss": 1.8626824617385864, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.26654282212257385, |
| "learning_rate": 5.595238095238095e-11, |
| "loss": 1.6219651699066162, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08928571428571429, |
| "grad_norm": 0.3208557963371277, |
| "learning_rate": 5.833333333333334e-11, |
| "loss": 1.762930154800415, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09285714285714286, |
| "grad_norm": 0.2823070287704468, |
| "learning_rate": 6.071428571428571e-11, |
| "loss": 1.720660924911499, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09642857142857143, |
| "grad_norm": 0.3506131172180176, |
| "learning_rate": 6.30952380952381e-11, |
| "loss": 1.836098074913025, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.41053706407546997, |
| "learning_rate": 6.547619047619048e-11, |
| "loss": 1.6985701322555542, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10357142857142858, |
| "grad_norm": 0.45289528369903564, |
| "learning_rate": 6.785714285714287e-11, |
| "loss": 1.855586051940918, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 2.517709732055664, |
| "learning_rate": 7.023809523809524e-11, |
| "loss": 1.9189352989196777, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11071428571428571, |
| "grad_norm": 0.3041253387928009, |
| "learning_rate": 7.261904761904761e-11, |
| "loss": 1.7433679103851318, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.36612775921821594, |
| "learning_rate": 7.5e-11, |
| "loss": 1.9802634716033936, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11785714285714285, |
| "grad_norm": 0.5854454040527344, |
| "learning_rate": 7.738095238095239e-11, |
| "loss": 1.9604766368865967, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12142857142857143, |
| "grad_norm": 0.43273574113845825, |
| "learning_rate": 7.976190476190477e-11, |
| "loss": 1.8710302114486694, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.7926504015922546, |
| "learning_rate": 8.214285714285714e-11, |
| "loss": 1.9847257137298584, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12857142857142856, |
| "grad_norm": 1.5940577983856201, |
| "learning_rate": 8.452380952380953e-11, |
| "loss": 2.3266658782958984, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13214285714285715, |
| "grad_norm": 0.16762828826904297, |
| "learning_rate": 8.690476190476191e-11, |
| "loss": 1.4378584623336792, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1357142857142857, |
| "grad_norm": 0.29286450147628784, |
| "learning_rate": 8.928571428571429e-11, |
| "loss": 1.778782606124878, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1392857142857143, |
| "grad_norm": 0.5462100505828857, |
| "learning_rate": 9.166666666666666e-11, |
| "loss": 1.8584684133529663, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.24206940829753876, |
| "learning_rate": 9.404761904761905e-11, |
| "loss": 1.8260565996170044, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14642857142857144, |
| "grad_norm": 0.4532042145729065, |
| "learning_rate": 9.642857142857143e-11, |
| "loss": 1.7921696901321411, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.3084694743156433, |
| "learning_rate": 9.880952380952382e-11, |
| "loss": 1.8815988302230835, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.15357142857142858, |
| "grad_norm": 0.31026917695999146, |
| "learning_rate": 9.999991282010347e-11, |
| "loss": 1.934004783630371, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15714285714285714, |
| "grad_norm": 0.22002796828746796, |
| "learning_rate": 9.999921538295798e-11, |
| "loss": 1.8987516164779663, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16071428571428573, |
| "grad_norm": 0.2701459228992462, |
| "learning_rate": 9.999782051947631e-11, |
| "loss": 1.768871545791626, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16428571428571428, |
| "grad_norm": 0.28832632303237915, |
| "learning_rate": 9.999572825127696e-11, |
| "loss": 1.8338327407836914, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.16785714285714284, |
| "grad_norm": 0.42094674706459045, |
| "learning_rate": 9.99929386107872e-11, |
| "loss": 1.8449537754058838, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.31477025151252747, |
| "learning_rate": 9.998945164124268e-11, |
| "loss": 1.8257495164871216, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.2591283321380615, |
| "learning_rate": 9.998526739668663e-11, |
| "loss": 1.876816987991333, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 0.32023900747299194, |
| "learning_rate": 9.998038594196913e-11, |
| "loss": 1.8425483703613281, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18214285714285713, |
| "grad_norm": 0.3183627128601074, |
| "learning_rate": 9.997480735274607e-11, |
| "loss": 1.7012989521026611, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18571428571428572, |
| "grad_norm": 1.6413437128067017, |
| "learning_rate": 9.996853171547794e-11, |
| "loss": 1.7975858449935913, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.18928571428571428, |
| "grad_norm": 0.48182412981987, |
| "learning_rate": 9.996155912742855e-11, |
| "loss": 1.849790334701538, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19285714285714287, |
| "grad_norm": 0.2553955614566803, |
| "learning_rate": 9.995388969666348e-11, |
| "loss": 1.730877161026001, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.19642857142857142, |
| "grad_norm": 0.22714966535568237, |
| "learning_rate": 9.994552354204844e-11, |
| "loss": 1.5956761837005615, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.41010916233062744, |
| "learning_rate": 9.993646079324738e-11, |
| "loss": 1.7001782655715942, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.20357142857142857, |
| "grad_norm": 0.43712326884269714, |
| "learning_rate": 9.992670159072051e-11, |
| "loss": 1.7033194303512573, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.20714285714285716, |
| "grad_norm": 0.30069440603256226, |
| "learning_rate": 9.991624608572216e-11, |
| "loss": 1.6037108898162842, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.21071428571428572, |
| "grad_norm": 0.359576016664505, |
| "learning_rate": 9.990509444029832e-11, |
| "loss": 1.6560925245285034, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.22528955340385437, |
| "learning_rate": 9.98932468272843e-11, |
| "loss": 1.660873532295227, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21785714285714286, |
| "grad_norm": 0.26792994141578674, |
| "learning_rate": 9.98807034303019e-11, |
| "loss": 1.685825228691101, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.22142857142857142, |
| "grad_norm": 0.5145900249481201, |
| "learning_rate": 9.986746444375661e-11, |
| "loss": 1.6325483322143555, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.29799070954322815, |
| "learning_rate": 9.985353007283463e-11, |
| "loss": 1.7098278999328613, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.2814246118068695, |
| "learning_rate": 9.983890053349969e-11, |
| "loss": 1.6903581619262695, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.23214285714285715, |
| "grad_norm": 0.2916901707649231, |
| "learning_rate": 9.982357605248963e-11, |
| "loss": 1.7061477899551392, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2357142857142857, |
| "grad_norm": 0.43531540036201477, |
| "learning_rate": 9.980755686731296e-11, |
| "loss": 1.751139760017395, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2392857142857143, |
| "grad_norm": 0.35889768600463867, |
| "learning_rate": 9.979084322624518e-11, |
| "loss": 1.6222317218780518, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.24285714285714285, |
| "grad_norm": 0.604837954044342, |
| "learning_rate": 9.977343538832486e-11, |
| "loss": 1.6161425113677979, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.24642857142857144, |
| "grad_norm": 0.2969377338886261, |
| "learning_rate": 9.97553336233497e-11, |
| "loss": 1.5216706991195679, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5487335920333862, |
| "learning_rate": 9.973653821187232e-11, |
| "loss": 1.6033167839050293, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25357142857142856, |
| "grad_norm": 0.615515947341919, |
| "learning_rate": 9.971704944519594e-11, |
| "loss": 1.6344680786132812, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.8488220572471619, |
| "learning_rate": 9.969686762536972e-11, |
| "loss": 1.8572226762771606, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.26071428571428573, |
| "grad_norm": 2.822991371154785, |
| "learning_rate": 9.967599306518438e-11, |
| "loss": 1.5748167037963867, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2642857142857143, |
| "grad_norm": 0.5391015410423279, |
| "learning_rate": 9.965442608816703e-11, |
| "loss": 1.7799961566925049, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 0.421622633934021, |
| "learning_rate": 9.963216702857634e-11, |
| "loss": 1.6995267868041992, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2714285714285714, |
| "grad_norm": 0.46860265731811523, |
| "learning_rate": 9.96092162313973e-11, |
| "loss": 1.7670485973358154, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.47773948311805725, |
| "learning_rate": 9.958557405233593e-11, |
| "loss": 1.7523893117904663, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2785714285714286, |
| "grad_norm": 0.6034820675849915, |
| "learning_rate": 9.956124085781366e-11, |
| "loss": 1.5787875652313232, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.28214285714285714, |
| "grad_norm": 0.8053703308105469, |
| "learning_rate": 9.953621702496178e-11, |
| "loss": 1.1423616409301758, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.34408634901046753, |
| "learning_rate": 9.951050294161547e-11, |
| "loss": 1.5774961709976196, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2892857142857143, |
| "grad_norm": 0.4361685812473297, |
| "learning_rate": 9.948409900630787e-11, |
| "loss": 1.5200037956237793, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.29285714285714287, |
| "grad_norm": 0.34992775321006775, |
| "learning_rate": 9.945700562826394e-11, |
| "loss": 1.8257197141647339, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.29642857142857143, |
| "grad_norm": 0.5045400857925415, |
| "learning_rate": 9.942922322739395e-11, |
| "loss": 1.4601728916168213, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.2626727223396301, |
| "learning_rate": 9.940075223428717e-11, |
| "loss": 1.3292102813720703, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.30357142857142855, |
| "grad_norm": 0.40398019552230835, |
| "learning_rate": 9.937159309020509e-11, |
| "loss": 1.44813072681427, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.30714285714285716, |
| "grad_norm": 0.3662099242210388, |
| "learning_rate": 9.934174624707459e-11, |
| "loss": 1.727007508277893, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3107142857142857, |
| "grad_norm": 0.24835538864135742, |
| "learning_rate": 9.931121216748092e-11, |
| "loss": 1.059862494468689, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 0.3025449514389038, |
| "learning_rate": 9.927999132466059e-11, |
| "loss": 1.7108771800994873, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.31785714285714284, |
| "grad_norm": 0.3157467842102051, |
| "learning_rate": 9.924808420249403e-11, |
| "loss": 1.5672202110290527, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 0.699787437915802, |
| "learning_rate": 9.921549129549799e-11, |
| "loss": 1.339919924736023, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.48888230323791504, |
| "learning_rate": 9.918221310881795e-11, |
| "loss": 2.124028205871582, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.32857142857142857, |
| "grad_norm": 0.314627081155777, |
| "learning_rate": 9.91482501582204e-11, |
| "loss": 1.5376380681991577, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.33214285714285713, |
| "grad_norm": 0.21677131950855255, |
| "learning_rate": 9.91136029700846e-11, |
| "loss": 1.4906896352767944, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3357142857142857, |
| "grad_norm": 0.8940775394439697, |
| "learning_rate": 9.907827208139462e-11, |
| "loss": 1.7292166948318481, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3392857142857143, |
| "grad_norm": 0.32917287945747375, |
| "learning_rate": 9.904225803973094e-11, |
| "loss": 1.7098058462142944, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.32240667939186096, |
| "learning_rate": 9.900556140326203e-11, |
| "loss": 1.2109023332595825, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3464285714285714, |
| "grad_norm": 0.3542839288711548, |
| "learning_rate": 9.896818274073555e-11, |
| "loss": 1.0306252241134644, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5823745727539062, |
| "learning_rate": 9.893012263146971e-11, |
| "loss": 1.6056597232818604, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3535714285714286, |
| "grad_norm": 0.8560162782669067, |
| "learning_rate": 9.889138166534416e-11, |
| "loss": 1.6744499206542969, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.348728746175766, |
| "learning_rate": 9.885196044279095e-11, |
| "loss": 1.5353935956954956, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3607142857142857, |
| "grad_norm": 5.2847466468811035, |
| "learning_rate": 9.881185957478514e-11, |
| "loss": 1.2694430351257324, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.36428571428571427, |
| "grad_norm": 0.2536151111125946, |
| "learning_rate": 9.877107968283538e-11, |
| "loss": 1.559706687927246, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3678571428571429, |
| "grad_norm": 0.23312772810459137, |
| "learning_rate": 9.872962139897426e-11, |
| "loss": 1.6206153631210327, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 0.2879284620285034, |
| "learning_rate": 9.86874853657485e-11, |
| "loss": 1.5968157052993774, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.279491126537323, |
| "learning_rate": 9.864467223620907e-11, |
| "loss": 1.3809610605239868, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.37857142857142856, |
| "grad_norm": 0.31574228405952454, |
| "learning_rate": 9.860118267390092e-11, |
| "loss": 1.8226162195205688, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3821428571428571, |
| "grad_norm": 0.2886030972003937, |
| "learning_rate": 9.855701735285285e-11, |
| "loss": 1.6120859384536743, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.38571428571428573, |
| "grad_norm": 0.22635193169116974, |
| "learning_rate": 9.851217695756695e-11, |
| "loss": 1.4717481136322021, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3892857142857143, |
| "grad_norm": 0.19679969549179077, |
| "learning_rate": 9.846666218300807e-11, |
| "loss": 1.4480830430984497, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 0.42078128457069397, |
| "learning_rate": 9.842047373459305e-11, |
| "loss": 1.6225476264953613, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3964285714285714, |
| "grad_norm": 0.2914668023586273, |
| "learning_rate": 9.837361232817964e-11, |
| "loss": 1.455915093421936, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.292388379573822, |
| "learning_rate": 9.832607869005566e-11, |
| "loss": 1.6276307106018066, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4035714285714286, |
| "grad_norm": 0.5007878541946411, |
| "learning_rate": 9.827787355692747e-11, |
| "loss": 1.6865593194961548, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.40714285714285714, |
| "grad_norm": 0.6924875974655151, |
| "learning_rate": 9.822899767590882e-11, |
| "loss": 1.6561429500579834, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4107142857142857, |
| "grad_norm": 0.3577944040298462, |
| "learning_rate": 9.817945180450903e-11, |
| "loss": 1.7861510515213013, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4142857142857143, |
| "grad_norm": 0.245406836271286, |
| "learning_rate": 9.812923671062139e-11, |
| "loss": 1.6835694313049316, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.41785714285714287, |
| "grad_norm": 0.32185444235801697, |
| "learning_rate": 9.807835317251119e-11, |
| "loss": 1.4940822124481201, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.42142857142857143, |
| "grad_norm": 0.3216511011123657, |
| "learning_rate": 9.802680197880379e-11, |
| "loss": 1.3896503448486328, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.29104116559028625, |
| "learning_rate": 9.79745839284722e-11, |
| "loss": 1.386911392211914, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.1894490122795105, |
| "learning_rate": 9.792169983082484e-11, |
| "loss": 1.5068607330322266, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.43214285714285716, |
| "grad_norm": 0.554080069065094, |
| "learning_rate": 9.786815050549295e-11, |
| "loss": 1.5528550148010254, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4357142857142857, |
| "grad_norm": 0.41308215260505676, |
| "learning_rate": 9.781393678241786e-11, |
| "loss": 1.6799581050872803, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4392857142857143, |
| "grad_norm": 0.5376290082931519, |
| "learning_rate": 9.775905950183821e-11, |
| "loss": 1.7018500566482544, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.44285714285714284, |
| "grad_norm": 0.567138135433197, |
| "learning_rate": 9.770351951427684e-11, |
| "loss": 1.5787659883499146, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.44642857142857145, |
| "grad_norm": 0.4916873574256897, |
| "learning_rate": 9.764731768052763e-11, |
| "loss": 1.8073500394821167, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.941686987876892, |
| "learning_rate": 9.75904548716422e-11, |
| "loss": 1.7315703630447388, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.45357142857142857, |
| "grad_norm": 0.3704272210597992, |
| "learning_rate": 9.753293196891639e-11, |
| "loss": 1.3973535299301147, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.5139915943145752, |
| "learning_rate": 9.747474986387655e-11, |
| "loss": 1.6836737394332886, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4607142857142857, |
| "grad_norm": 0.28661784529685974, |
| "learning_rate": 9.74159094582658e-11, |
| "loss": 1.4698333740234375, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 0.32768112421035767, |
| "learning_rate": 9.735641166402998e-11, |
| "loss": 1.6010611057281494, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.46785714285714286, |
| "grad_norm": 1.0357842445373535, |
| "learning_rate": 9.729625740330362e-11, |
| "loss": 1.5772435665130615, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4714285714285714, |
| "grad_norm": 0.727933943271637, |
| "learning_rate": 9.723544760839555e-11, |
| "loss": 1.7117342948913574, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.426328182220459, |
| "learning_rate": 9.717398322177442e-11, |
| "loss": 1.5788122415542603, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4785714285714286, |
| "grad_norm": 0.7015219926834106, |
| "learning_rate": 9.71118651960543e-11, |
| "loss": 1.7658413648605347, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.48214285714285715, |
| "grad_norm": 0.28371384739875793, |
| "learning_rate": 9.704909449397962e-11, |
| "loss": 1.4088166952133179, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 0.3040277063846588, |
| "learning_rate": 9.69856720884105e-11, |
| "loss": 1.338411808013916, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.48928571428571427, |
| "grad_norm": 0.30010318756103516, |
| "learning_rate": 9.692159896230756e-11, |
| "loss": 1.7016575336456299, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4928571428571429, |
| "grad_norm": 0.4967346489429474, |
| "learning_rate": 9.685687610871665e-11, |
| "loss": 1.7265392541885376, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.49642857142857144, |
| "grad_norm": 0.32157382369041443, |
| "learning_rate": 9.679150453075356e-11, |
| "loss": 1.5933375358581543, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.26096051931381226, |
| "learning_rate": 9.67254852415884e-11, |
| "loss": 1.591860294342041, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5035714285714286, |
| "grad_norm": 0.23185808956623077, |
| "learning_rate": 9.665881926442993e-11, |
| "loss": 1.589665174484253, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5071428571428571, |
| "grad_norm": 0.32329416275024414, |
| "learning_rate": 9.659150763250966e-11, |
| "loss": 1.6434839963912964, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5107142857142857, |
| "grad_norm": 0.29206129908561707, |
| "learning_rate": 9.65235513890659e-11, |
| "loss": 1.605237364768982, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.4287250339984894, |
| "learning_rate": 9.645495158732754e-11, |
| "loss": 1.464892864227295, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5178571428571429, |
| "grad_norm": 0.3669995963573456, |
| "learning_rate": 9.638570929049775e-11, |
| "loss": 1.1133761405944824, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5214285714285715, |
| "grad_norm": 1.1597135066986084, |
| "learning_rate": 9.631582557173752e-11, |
| "loss": 1.183565616607666, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.29547828435897827, |
| "learning_rate": 9.624530151414893e-11, |
| "loss": 1.507879614830017, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5285714285714286, |
| "grad_norm": 0.6196130514144897, |
| "learning_rate": 9.617413821075851e-11, |
| "loss": 1.291815161705017, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5321428571428571, |
| "grad_norm": 0.23788149654865265, |
| "learning_rate": 9.610233676450021e-11, |
| "loss": 1.4130339622497559, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.4616411328315735, |
| "learning_rate": 9.602989828819829e-11, |
| "loss": 1.3440712690353394, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5392857142857143, |
| "grad_norm": 0.2647687494754791, |
| "learning_rate": 9.595682390455016e-11, |
| "loss": 1.6042909622192383, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.16753101348876953, |
| "learning_rate": 9.588311474610887e-11, |
| "loss": 1.2125191688537598, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5464285714285714, |
| "grad_norm": 0.5108986496925354, |
| "learning_rate": 9.580877195526563e-11, |
| "loss": 1.5605721473693848, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.15435348451137543, |
| "learning_rate": 9.573379668423209e-11, |
| "loss": 1.5854820013046265, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5535714285714286, |
| "grad_norm": 0.30770283937454224, |
| "learning_rate": 9.56581900950225e-11, |
| "loss": 1.3195915222167969, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5571428571428572, |
| "grad_norm": 0.3878917694091797, |
| "learning_rate": 9.558195335943565e-11, |
| "loss": 1.6782605648040771, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5607142857142857, |
| "grad_norm": 0.43914857506752014, |
| "learning_rate": 9.550508765903672e-11, |
| "loss": 1.3400123119354248, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5642857142857143, |
| "grad_norm": 0.7770475745201111, |
| "learning_rate": 9.542759418513905e-11, |
| "loss": 1.4410650730133057, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5678571428571428, |
| "grad_norm": 0.4235874116420746, |
| "learning_rate": 9.534947413878556e-11, |
| "loss": 1.5005742311477661, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.4761328101158142, |
| "learning_rate": 9.527072873073018e-11, |
| "loss": 1.5790057182312012, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.30607500672340393, |
| "learning_rate": 9.519135918141913e-11, |
| "loss": 1.4029256105422974, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5785714285714286, |
| "grad_norm": 0.199839249253273, |
| "learning_rate": 9.511136672097194e-11, |
| "loss": 1.2247833013534546, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5821428571428572, |
| "grad_norm": 0.4178214371204376, |
| "learning_rate": 9.50307525891624e-11, |
| "loss": 1.5843498706817627, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5857142857142857, |
| "grad_norm": 0.5691080093383789, |
| "learning_rate": 9.494951803539943e-11, |
| "loss": 1.432054042816162, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5892857142857143, |
| "grad_norm": 0.2235386073589325, |
| "learning_rate": 9.486766431870752e-11, |
| "loss": 1.3727772235870361, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5928571428571429, |
| "grad_norm": 0.294951468706131, |
| "learning_rate": 9.478519270770745e-11, |
| "loss": 1.2510879039764404, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5964285714285714, |
| "grad_norm": 0.5128263831138611, |
| "learning_rate": 9.470210448059644e-11, |
| "loss": 1.4979231357574463, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.4786055088043213, |
| "learning_rate": 9.461840092512849e-11, |
| "loss": 1.4333927631378174, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6035714285714285, |
| "grad_norm": 0.34201353788375854, |
| "learning_rate": 9.453408333859427e-11, |
| "loss": 1.490931510925293, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 0.5112800002098083, |
| "learning_rate": 9.444915302780116e-11, |
| "loss": 1.5842055082321167, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6107142857142858, |
| "grad_norm": 0.25259843468666077, |
| "learning_rate": 9.436361130905288e-11, |
| "loss": 1.5695821046829224, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6142857142857143, |
| "grad_norm": 0.6922969222068787, |
| "learning_rate": 9.427745950812918e-11, |
| "loss": 1.4329246282577515, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6178571428571429, |
| "grad_norm": 0.5728318691253662, |
| "learning_rate": 9.41906989602652e-11, |
| "loss": 1.4237987995147705, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6214285714285714, |
| "grad_norm": 0.20611967146396637, |
| "learning_rate": 9.410333101013086e-11, |
| "loss": 1.5165703296661377, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.2743736207485199, |
| "learning_rate": 9.401535701180998e-11, |
| "loss": 1.3337576389312744, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.5341992974281311, |
| "learning_rate": 9.39267783287793e-11, |
| "loss": 1.6692336797714233, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6321428571428571, |
| "grad_norm": 0.6746388673782349, |
| "learning_rate": 9.383759633388736e-11, |
| "loss": 1.6261032819747925, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6357142857142857, |
| "grad_norm": 0.4282362163066864, |
| "learning_rate": 9.374781240933316e-11, |
| "loss": 1.385348916053772, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6392857142857142, |
| "grad_norm": 0.17694340646266937, |
| "learning_rate": 9.365742794664484e-11, |
| "loss": 1.1620020866394043, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.31339991092681885, |
| "learning_rate": 9.356644434665803e-11, |
| "loss": 1.6486270427703857, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6464285714285715, |
| "grad_norm": 0.23338527977466583, |
| "learning_rate": 9.347486301949418e-11, |
| "loss": 1.5743000507354736, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.35687318444252014, |
| "learning_rate": 9.338268538453869e-11, |
| "loss": 1.6195820569992065, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6535714285714286, |
| "grad_norm": 0.22189630568027496, |
| "learning_rate": 9.328991287041892e-11, |
| "loss": 1.555490255355835, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 0.24453404545783997, |
| "learning_rate": 9.319654691498206e-11, |
| "loss": 1.606858730316162, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6607142857142857, |
| "grad_norm": 0.29564130306243896, |
| "learning_rate": 9.310258896527278e-11, |
| "loss": 1.5415475368499756, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6642857142857143, |
| "grad_norm": 0.24590720236301422, |
| "learning_rate": 9.300804047751092e-11, |
| "loss": 1.5487055778503418, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6678571428571428, |
| "grad_norm": 0.21976569294929504, |
| "learning_rate": 9.29129029170688e-11, |
| "loss": 1.5091149806976318, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6714285714285714, |
| "grad_norm": 0.36947011947631836, |
| "learning_rate": 9.281717775844857e-11, |
| "loss": 1.5175336599349976, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.31734710931777954, |
| "learning_rate": 9.272086648525937e-11, |
| "loss": 1.5560580492019653, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 0.3260459005832672, |
| "learning_rate": 9.26239705901943e-11, |
| "loss": 1.5495206117630005, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6821428571428572, |
| "grad_norm": 0.2940950393676758, |
| "learning_rate": 9.25264915750073e-11, |
| "loss": 1.7193139791488647, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.5894531607627869, |
| "learning_rate": 9.242843095048987e-11, |
| "loss": 1.5194193124771118, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6892857142857143, |
| "grad_norm": 0.24562247097492218, |
| "learning_rate": 9.232979023644767e-11, |
| "loss": 1.3620585203170776, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6928571428571428, |
| "grad_norm": 0.25872641801834106, |
| "learning_rate": 9.223057096167696e-11, |
| "loss": 1.5675086975097656, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6964285714285714, |
| "grad_norm": 0.34952595829963684, |
| "learning_rate": 9.213077466394088e-11, |
| "loss": 1.556618094444275, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.3161361813545227, |
| "learning_rate": 9.203040288994566e-11, |
| "loss": 1.566756248474121, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7035714285714286, |
| "grad_norm": 0.2212635725736618, |
| "learning_rate": 9.192945719531662e-11, |
| "loss": 1.4415736198425293, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7071428571428572, |
| "grad_norm": 0.24640850722789764, |
| "learning_rate": 9.182793914457402e-11, |
| "loss": 1.5302636623382568, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7107142857142857, |
| "grad_norm": 0.32642388343811035, |
| "learning_rate": 9.172585031110895e-11, |
| "loss": 1.5108647346496582, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.44278931617736816, |
| "learning_rate": 9.162319227715878e-11, |
| "loss": 1.4763598442077637, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7178571428571429, |
| "grad_norm": 0.6782664060592651, |
| "learning_rate": 9.151996663378271e-11, |
| "loss": 1.5651448965072632, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7214285714285714, |
| "grad_norm": 0.3046627640724182, |
| "learning_rate": 9.141617498083716e-11, |
| "loss": 1.54989755153656, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.2901434302330017, |
| "learning_rate": 9.131181892695088e-11, |
| "loss": 1.5768945217132568, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7285714285714285, |
| "grad_norm": 0.33921149373054504, |
| "learning_rate": 9.120690008950008e-11, |
| "loss": 1.5979000329971313, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7321428571428571, |
| "grad_norm": 0.3361220359802246, |
| "learning_rate": 9.110142009458333e-11, |
| "loss": 1.497594952583313, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7357142857142858, |
| "grad_norm": 0.5470672249794006, |
| "learning_rate": 9.099538057699643e-11, |
| "loss": 1.574213981628418, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7392857142857143, |
| "grad_norm": 0.2838931381702423, |
| "learning_rate": 9.08887831802069e-11, |
| "loss": 1.5098413228988647, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.20152640342712402, |
| "learning_rate": 9.078162955632877e-11, |
| "loss": 1.4644570350646973, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7464285714285714, |
| "grad_norm": 0.4462726414203644, |
| "learning_rate": 9.067392136609671e-11, |
| "loss": 1.4528954029083252, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.3298100233078003, |
| "learning_rate": 9.05656602788405e-11, |
| "loss": 1.7332755327224731, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7535714285714286, |
| "grad_norm": 0.4603538513183594, |
| "learning_rate": 9.045684797245901e-11, |
| "loss": 1.278205156326294, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7571428571428571, |
| "grad_norm": 0.24970531463623047, |
| "learning_rate": 9.034748613339426e-11, |
| "loss": 1.462319254875183, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7607142857142857, |
| "grad_norm": 0.26904845237731934, |
| "learning_rate": 9.02375764566053e-11, |
| "loss": 1.5326104164123535, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7642857142857142, |
| "grad_norm": 0.2414090633392334, |
| "learning_rate": 9.012712064554189e-11, |
| "loss": 1.429811716079712, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7678571428571429, |
| "grad_norm": 0.1693522036075592, |
| "learning_rate": 9.001612041211817e-11, |
| "loss": 1.4766952991485596, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 0.1426696926355362, |
| "learning_rate": 8.9904577476686e-11, |
| "loss": 1.4853742122650146, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.2107386738061905, |
| "learning_rate": 8.979249356800845e-11, |
| "loss": 1.3115483522415161, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7785714285714286, |
| "grad_norm": 0.18600672483444214, |
| "learning_rate": 8.967987042323293e-11, |
| "loss": 1.4503666162490845, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7821428571428571, |
| "grad_norm": 0.13341867923736572, |
| "learning_rate": 8.956670978786423e-11, |
| "loss": 1.3890115022659302, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.16304397583007812, |
| "learning_rate": 8.945301341573758e-11, |
| "loss": 1.427004098892212, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7892857142857143, |
| "grad_norm": 0.15247249603271484, |
| "learning_rate": 8.93387830689913e-11, |
| "loss": 1.3906886577606201, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7928571428571428, |
| "grad_norm": 0.23002289235591888, |
| "learning_rate": 8.922402051803968e-11, |
| "loss": 1.4104899168014526, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7964285714285714, |
| "grad_norm": 0.17135556042194366, |
| "learning_rate": 8.910872754154539e-11, |
| "loss": 1.4079930782318115, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.16783295571804047, |
| "learning_rate": 8.8992905926392e-11, |
| "loss": 1.3688652515411377, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 0.13924425840377808, |
| "learning_rate": 8.887655746765626e-11, |
| "loss": 1.4514307975769043, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8071428571428572, |
| "grad_norm": 0.19156193733215332, |
| "learning_rate": 8.875968396858021e-11, |
| "loss": 1.43871009349823, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8107142857142857, |
| "grad_norm": 0.13427551090717316, |
| "learning_rate": 8.864228724054341e-11, |
| "loss": 1.3157857656478882, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8142857142857143, |
| "grad_norm": 0.3618568778038025, |
| "learning_rate": 8.852436910303467e-11, |
| "loss": 1.3874411582946777, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8178571428571428, |
| "grad_norm": 0.15196223556995392, |
| "learning_rate": 8.840593138362395e-11, |
| "loss": 1.3326449394226074, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 0.1430208683013916, |
| "learning_rate": 8.828697591793404e-11, |
| "loss": 1.3794419765472412, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.15054234862327576, |
| "learning_rate": 8.816750454961207e-11, |
| "loss": 1.3381770849227905, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 0.1533413976430893, |
| "learning_rate": 8.804751913030096e-11, |
| "loss": 1.3633875846862793, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8321428571428572, |
| "grad_norm": 0.23823502659797668, |
| "learning_rate": 8.792702151961074e-11, |
| "loss": 1.3637359142303467, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8357142857142857, |
| "grad_norm": 0.3262959122657776, |
| "learning_rate": 8.780601358508965e-11, |
| "loss": 1.358881950378418, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8392857142857143, |
| "grad_norm": 0.21195098757743835, |
| "learning_rate": 8.768449720219532e-11, |
| "loss": 1.3551591634750366, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8428571428571429, |
| "grad_norm": 0.15915225446224213, |
| "learning_rate": 8.756247425426559e-11, |
| "loss": 1.40420401096344, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8464285714285714, |
| "grad_norm": 0.18268409371376038, |
| "learning_rate": 8.743994663248939e-11, |
| "loss": 1.3795422315597534, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.3486330211162567, |
| "learning_rate": 8.731691623587738e-11, |
| "loss": 1.3883861303329468, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8535714285714285, |
| "grad_norm": 0.20148774981498718, |
| "learning_rate": 8.719338497123257e-11, |
| "loss": 1.4160270690917969, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.17149412631988525, |
| "learning_rate": 8.706935475312072e-11, |
| "loss": 1.4068703651428223, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8607142857142858, |
| "grad_norm": 0.20048604905605316, |
| "learning_rate": 8.694482750384069e-11, |
| "loss": 1.3851019144058228, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8642857142857143, |
| "grad_norm": 0.18711215257644653, |
| "learning_rate": 8.681980515339464e-11, |
| "loss": 1.4040751457214355, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8678571428571429, |
| "grad_norm": 0.1565803438425064, |
| "learning_rate": 8.669428963945815e-11, |
| "loss": 1.3519985675811768, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8714285714285714, |
| "grad_norm": 0.3789925277233124, |
| "learning_rate": 8.656828290735013e-11, |
| "loss": 1.3698389530181885, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.4136276841163635, |
| "learning_rate": 8.64417869100027e-11, |
| "loss": 1.375820279121399, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8785714285714286, |
| "grad_norm": 0.32030630111694336, |
| "learning_rate": 8.631480360793095e-11, |
| "loss": 1.4090447425842285, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8821428571428571, |
| "grad_norm": 0.19467246532440186, |
| "learning_rate": 8.61873349692025e-11, |
| "loss": 1.3862667083740234, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 0.18389186263084412, |
| "learning_rate": 8.605938296940702e-11, |
| "loss": 1.3221886157989502, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8892857142857142, |
| "grad_norm": 0.1759600192308426, |
| "learning_rate": 8.593094959162564e-11, |
| "loss": 1.360346794128418, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 0.18795090913772583, |
| "learning_rate": 8.580203682640018e-11, |
| "loss": 1.378947377204895, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8964285714285715, |
| "grad_norm": 0.2354908436536789, |
| "learning_rate": 8.567264667170233e-11, |
| "loss": 1.3998974561691284, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.16188935935497284, |
| "learning_rate": 8.554278113290261e-11, |
| "loss": 1.3859062194824219, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9035714285714286, |
| "grad_norm": 0.1890728920698166, |
| "learning_rate": 8.54124422227394e-11, |
| "loss": 1.3670257329940796, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9071428571428571, |
| "grad_norm": 0.18385370075702667, |
| "learning_rate": 8.528163196128768e-11, |
| "loss": 1.3132009506225586, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9107142857142857, |
| "grad_norm": 0.1908484250307083, |
| "learning_rate": 8.51503523759277e-11, |
| "loss": 1.369741678237915, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.1750551164150238, |
| "learning_rate": 8.501860550131361e-11, |
| "loss": 1.390952706336975, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9178571428571428, |
| "grad_norm": 0.17115701735019684, |
| "learning_rate": 8.488639337934187e-11, |
| "loss": 1.3319488763809204, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9214285714285714, |
| "grad_norm": 0.17177698016166687, |
| "learning_rate": 8.475371805911974e-11, |
| "loss": 1.3774604797363281, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.26216426491737366, |
| "learning_rate": 8.46205815969333e-11, |
| "loss": 1.355608582496643, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.1776549220085144, |
| "learning_rate": 8.448698605621579e-11, |
| "loss": 1.4020413160324097, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9321428571428572, |
| "grad_norm": 0.1494104564189911, |
| "learning_rate": 8.435293350751545e-11, |
| "loss": 1.4186415672302246, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9357142857142857, |
| "grad_norm": 0.15738944709300995, |
| "learning_rate": 8.421842602846361e-11, |
| "loss": 1.3921843767166138, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9392857142857143, |
| "grad_norm": 0.19063201546669006, |
| "learning_rate": 8.408346570374234e-11, |
| "loss": 1.3952494859695435, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.17228396236896515, |
| "learning_rate": 8.394805462505222e-11, |
| "loss": 1.3549234867095947, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9464285714285714, |
| "grad_norm": 0.18750858306884766, |
| "learning_rate": 8.381219489107991e-11, |
| "loss": 1.3113043308258057, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.3387869894504547, |
| "learning_rate": 8.367588860746559e-11, |
| "loss": 1.3371450901031494, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9535714285714286, |
| "grad_norm": 0.19042649865150452, |
| "learning_rate": 8.353913788677036e-11, |
| "loss": 1.3512016534805298, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9571428571428572, |
| "grad_norm": 0.22516337037086487, |
| "learning_rate": 8.340194484844349e-11, |
| "loss": 1.3233355283737183, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9607142857142857, |
| "grad_norm": 0.17853480577468872, |
| "learning_rate": 8.326431161878958e-11, |
| "loss": 1.3481040000915527, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 0.1799231916666031, |
| "learning_rate": 8.312624033093554e-11, |
| "loss": 1.3993315696716309, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9678571428571429, |
| "grad_norm": 0.2989877164363861, |
| "learning_rate": 8.298773312479766e-11, |
| "loss": 1.3812036514282227, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.17884613573551178, |
| "learning_rate": 8.284879214704834e-11, |
| "loss": 1.3906068801879883, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 0.28383585810661316, |
| "learning_rate": 8.27094195510828e-11, |
| "loss": 1.3412469625473022, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9785714285714285, |
| "grad_norm": 0.28323087096214294, |
| "learning_rate": 8.256961749698581e-11, |
| "loss": 1.33486807346344, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9821428571428571, |
| "grad_norm": 0.21711941063404083, |
| "learning_rate": 8.242938815149817e-11, |
| "loss": 1.370800256729126, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9857142857142858, |
| "grad_norm": 0.20825232565402985, |
| "learning_rate": 8.228873368798304e-11, |
| "loss": 1.3969084024429321, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9892857142857143, |
| "grad_norm": 0.19650134444236755, |
| "learning_rate": 8.214765628639235e-11, |
| "loss": 1.4430627822875977, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9928571428571429, |
| "grad_norm": 0.30671611428260803, |
| "learning_rate": 8.200615813323305e-11, |
| "loss": 2.1070995330810547, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9964285714285714, |
| "grad_norm": 0.3667857348918915, |
| "learning_rate": 8.18642414215331e-11, |
| "loss": 2.032963752746582, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.47882604598999023, |
| "learning_rate": 8.172190835080758e-11, |
| "loss": 1.951048731803894, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0035714285714286, |
| "grad_norm": 0.17595674097537994, |
| "learning_rate": 8.157916112702452e-11, |
| "loss": 1.538697361946106, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.0071428571428571, |
| "grad_norm": 0.15145337581634521, |
| "learning_rate": 8.143600196257085e-11, |
| "loss": 1.4741806983947754, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.0107142857142857, |
| "grad_norm": 0.16578371822834015, |
| "learning_rate": 8.129243307621791e-11, |
| "loss": 1.4760806560516357, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.0142857142857142, |
| "grad_norm": 0.208124577999115, |
| "learning_rate": 8.114845669308723e-11, |
| "loss": 1.5404026508331299, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.0178571428571428, |
| "grad_norm": 0.15642480552196503, |
| "learning_rate": 8.100407504461596e-11, |
| "loss": 1.4572526216506958, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0214285714285714, |
| "grad_norm": 0.17752893269062042, |
| "learning_rate": 8.085929036852235e-11, |
| "loss": 1.4172760248184204, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.025, |
| "grad_norm": 0.16976918280124664, |
| "learning_rate": 8.071410490877097e-11, |
| "loss": 1.3708614110946655, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.0285714285714285, |
| "grad_norm": 0.21366411447525024, |
| "learning_rate": 8.0568520915538e-11, |
| "loss": 1.426504373550415, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.032142857142857, |
| "grad_norm": 0.2278887778520584, |
| "learning_rate": 8.042254064517641e-11, |
| "loss": 1.4279263019561768, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.0357142857142858, |
| "grad_norm": 0.15804490447044373, |
| "learning_rate": 8.027616636018083e-11, |
| "loss": 1.3734034299850464, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.0392857142857144, |
| "grad_norm": 0.18044191598892212, |
| "learning_rate": 8.012940032915264e-11, |
| "loss": 1.3280595541000366, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.042857142857143, |
| "grad_norm": 0.2565247416496277, |
| "learning_rate": 7.998224482676473e-11, |
| "loss": 1.5239272117614746, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.0464285714285715, |
| "grad_norm": 0.20189274847507477, |
| "learning_rate": 7.983470213372624e-11, |
| "loss": 1.4172801971435547, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.1864236444234848, |
| "learning_rate": 7.96867745367473e-11, |
| "loss": 1.2826603651046753, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.0535714285714286, |
| "grad_norm": 0.1944390833377838, |
| "learning_rate": 7.953846432850344e-11, |
| "loss": 1.4027470350265503, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0571428571428572, |
| "grad_norm": 0.19034692645072937, |
| "learning_rate": 7.938977380760024e-11, |
| "loss": 1.35616934299469, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0607142857142857, |
| "grad_norm": 0.15960095822811127, |
| "learning_rate": 7.924070527853751e-11, |
| "loss": 1.438362717628479, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.0642857142857143, |
| "grad_norm": 0.1991662234067917, |
| "learning_rate": 7.909126105167373e-11, |
| "loss": 1.5469117164611816, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0678571428571428, |
| "grad_norm": 0.3842826187610626, |
| "learning_rate": 7.894144344319014e-11, |
| "loss": 1.5314031839370728, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.19364933669567108, |
| "learning_rate": 7.879125477505494e-11, |
| "loss": 1.2979897260665894, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.075, |
| "grad_norm": 0.22292639315128326, |
| "learning_rate": 7.864069737498722e-11, |
| "loss": 1.4224971532821655, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0785714285714285, |
| "grad_norm": 0.22089318931102753, |
| "learning_rate": 7.848977357642088e-11, |
| "loss": 1.4035828113555908, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.082142857142857, |
| "grad_norm": 0.17179912328720093, |
| "learning_rate": 7.833848571846855e-11, |
| "loss": 1.4242126941680908, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0857142857142856, |
| "grad_norm": 0.14348094165325165, |
| "learning_rate": 7.818683614588523e-11, |
| "loss": 1.2999255657196045, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0892857142857142, |
| "grad_norm": 0.17789550125598907, |
| "learning_rate": 7.803482720903205e-11, |
| "loss": 1.3410260677337646, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.092857142857143, |
| "grad_norm": 0.1673593670129776, |
| "learning_rate": 7.788246126383976e-11, |
| "loss": 1.3250867128372192, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.0964285714285715, |
| "grad_norm": 0.17654156684875488, |
| "learning_rate": 7.77297406717723e-11, |
| "loss": 1.4403133392333984, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.14737814664840698, |
| "learning_rate": 7.757666779979006e-11, |
| "loss": 1.4014517068862915, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.1035714285714286, |
| "grad_norm": 0.27064049243927, |
| "learning_rate": 7.742324502031339e-11, |
| "loss": 1.4908621311187744, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.1071428571428572, |
| "grad_norm": 0.19962742924690247, |
| "learning_rate": 7.72694747111857e-11, |
| "loss": 1.557312250137329, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.1107142857142858, |
| "grad_norm": 0.4431476294994354, |
| "learning_rate": 7.711535925563654e-11, |
| "loss": 1.3716031312942505, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.1142857142857143, |
| "grad_norm": 0.26187270879745483, |
| "learning_rate": 7.696090104224491e-11, |
| "loss": 1.5084398984909058, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.1178571428571429, |
| "grad_norm": 0.4299829602241516, |
| "learning_rate": 7.680610246490199e-11, |
| "loss": 1.483479380607605, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.1214285714285714, |
| "grad_norm": 0.25136733055114746, |
| "learning_rate": 7.665096592277414e-11, |
| "loss": 1.3639954328536987, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.3161426782608032, |
| "learning_rate": 7.649549382026574e-11, |
| "loss": 1.4137345552444458, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1285714285714286, |
| "grad_norm": 0.3137775659561157, |
| "learning_rate": 7.633968856698192e-11, |
| "loss": 1.1740094423294067, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.1321428571428571, |
| "grad_norm": 0.13070949912071228, |
| "learning_rate": 7.618355257769111e-11, |
| "loss": 1.2834131717681885, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.1357142857142857, |
| "grad_norm": 0.2657448947429657, |
| "learning_rate": 7.602708827228779e-11, |
| "loss": 1.3757896423339844, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.1392857142857142, |
| "grad_norm": 0.20995846390724182, |
| "learning_rate": 7.587029807575483e-11, |
| "loss": 1.4640997648239136, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.14955592155456543, |
| "learning_rate": 7.571318441812598e-11, |
| "loss": 1.3956358432769775, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1464285714285714, |
| "grad_norm": 0.2415839433670044, |
| "learning_rate": 7.55557497344482e-11, |
| "loss": 1.3727810382843018, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.18018396198749542, |
| "learning_rate": 7.539799646474393e-11, |
| "loss": 1.3874338865280151, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.1535714285714285, |
| "grad_norm": 0.5012511014938354, |
| "learning_rate": 7.52399270539732e-11, |
| "loss": 1.5338712930679321, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.157142857142857, |
| "grad_norm": 0.19371801614761353, |
| "learning_rate": 7.508154395199592e-11, |
| "loss": 1.4757752418518066, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.1607142857142858, |
| "grad_norm": 0.1719474196434021, |
| "learning_rate": 7.492284961353361e-11, |
| "loss": 1.3998007774353027, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1642857142857144, |
| "grad_norm": 0.2309015393257141, |
| "learning_rate": 7.476384649813167e-11, |
| "loss": 1.4197440147399902, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.167857142857143, |
| "grad_norm": 0.26694613695144653, |
| "learning_rate": 7.460453707012108e-11, |
| "loss": 1.4728981256484985, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1714285714285715, |
| "grad_norm": 0.24615098536014557, |
| "learning_rate": 7.44449237985802e-11, |
| "loss": 1.36574387550354, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.175, |
| "grad_norm": 0.17588579654693604, |
| "learning_rate": 7.428500915729663e-11, |
| "loss": 1.478152871131897, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.1785714285714286, |
| "grad_norm": 0.21529220044612885, |
| "learning_rate": 7.412479562472872e-11, |
| "loss": 1.4191372394561768, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1821428571428572, |
| "grad_norm": 0.24417810142040253, |
| "learning_rate": 7.396428568396729e-11, |
| "loss": 1.262759804725647, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.1857142857142857, |
| "grad_norm": 0.22581692039966583, |
| "learning_rate": 7.3803481822697e-11, |
| "loss": 1.380365014076233, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.1892857142857143, |
| "grad_norm": 0.2688646912574768, |
| "learning_rate": 7.364238653315794e-11, |
| "loss": 1.4285253286361694, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1928571428571428, |
| "grad_norm": 0.19933386147022247, |
| "learning_rate": 7.348100231210697e-11, |
| "loss": 1.3763307332992554, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1964285714285714, |
| "grad_norm": 0.2015334814786911, |
| "learning_rate": 7.331933166077885e-11, |
| "loss": 1.281455159187317, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.15446756780147552, |
| "learning_rate": 7.315737708484779e-11, |
| "loss": 1.3722114562988281, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.2035714285714285, |
| "grad_norm": 0.24289092421531677, |
| "learning_rate": 7.299514109438834e-11, |
| "loss": 1.385769248008728, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.207142857142857, |
| "grad_norm": 0.18633770942687988, |
| "learning_rate": 7.283262620383664e-11, |
| "loss": 1.3191158771514893, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.2107142857142856, |
| "grad_norm": 0.3428845703601837, |
| "learning_rate": 7.266983493195134e-11, |
| "loss": 1.3687481880187988, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 0.1786336749792099, |
| "learning_rate": 7.250676980177467e-11, |
| "loss": 1.386405110359192, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.217857142857143, |
| "grad_norm": 0.21881183981895447, |
| "learning_rate": 7.234343334059331e-11, |
| "loss": 1.4689749479293823, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.2214285714285715, |
| "grad_norm": 0.2003452330827713, |
| "learning_rate": 7.217982807989915e-11, |
| "loss": 1.3969757556915283, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.225, |
| "grad_norm": 0.23969583213329315, |
| "learning_rate": 7.201595655535011e-11, |
| "loss": 1.5124036073684692, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 0.2236098349094391, |
| "learning_rate": 7.185182130673088e-11, |
| "loss": 1.4658269882202148, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.2321428571428572, |
| "grad_norm": 0.2575554847717285, |
| "learning_rate": 7.168742487791345e-11, |
| "loss": 1.4607081413269043, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2357142857142858, |
| "grad_norm": 0.22493329644203186, |
| "learning_rate": 7.152276981681781e-11, |
| "loss": 1.4938011169433594, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.2392857142857143, |
| "grad_norm": 0.19840767979621887, |
| "learning_rate": 7.135785867537235e-11, |
| "loss": 1.3276569843292236, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.2428571428571429, |
| "grad_norm": 0.26204267144203186, |
| "learning_rate": 7.119269400947436e-11, |
| "loss": 1.3468204736709595, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.2464285714285714, |
| "grad_norm": 0.20326542854309082, |
| "learning_rate": 7.102727837895048e-11, |
| "loss": 1.2875033617019653, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.8089625835418701, |
| "learning_rate": 7.086161434751684e-11, |
| "loss": 1.326261043548584, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2535714285714286, |
| "grad_norm": 0.6610464453697205, |
| "learning_rate": 7.06957044827395e-11, |
| "loss": 1.4190644025802612, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.2571428571428571, |
| "grad_norm": 0.3214384615421295, |
| "learning_rate": 7.052955135599468e-11, |
| "loss": 1.5597217082977295, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.2607142857142857, |
| "grad_norm": 0.3793260157108307, |
| "learning_rate": 7.03631575424287e-11, |
| "loss": 1.3309506177902222, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.2642857142857142, |
| "grad_norm": 0.3192151188850403, |
| "learning_rate": 7.019652562091826e-11, |
| "loss": 1.5660918951034546, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.2678571428571428, |
| "grad_norm": 0.2909698188304901, |
| "learning_rate": 7.002965817403042e-11, |
| "loss": 1.4371814727783203, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.2714285714285714, |
| "grad_norm": 0.511368453502655, |
| "learning_rate": 6.986255778798253e-11, |
| "loss": 1.5524672269821167, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.275, |
| "grad_norm": 0.25665637850761414, |
| "learning_rate": 6.969522705260218e-11, |
| "loss": 1.5821752548217773, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.2785714285714285, |
| "grad_norm": 0.4477701187133789, |
| "learning_rate": 6.952766856128708e-11, |
| "loss": 1.3979148864746094, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.282142857142857, |
| "grad_norm": 0.44492456316947937, |
| "learning_rate": 6.935988491096485e-11, |
| "loss": 1.013071894645691, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.21502305567264557, |
| "learning_rate": 6.919187870205275e-11, |
| "loss": 1.3727712631225586, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2892857142857144, |
| "grad_norm": 0.3092592656612396, |
| "learning_rate": 6.902365253841737e-11, |
| "loss": 1.3816754817962646, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.292857142857143, |
| "grad_norm": 0.1694367378950119, |
| "learning_rate": 6.885520902733434e-11, |
| "loss": 1.69234299659729, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2964285714285715, |
| "grad_norm": 0.44906988739967346, |
| "learning_rate": 6.868655077944788e-11, |
| "loss": 1.3512274026870728, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.21286985278129578, |
| "learning_rate": 6.85176804087303e-11, |
| "loss": 1.219728946685791, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.3035714285714286, |
| "grad_norm": 0.38225606083869934, |
| "learning_rate": 6.834860053244154e-11, |
| "loss": 1.3647881746292114, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3071428571428572, |
| "grad_norm": 0.19242212176322937, |
| "learning_rate": 6.817931377108863e-11, |
| "loss": 1.635160207748413, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.3107142857142857, |
| "grad_norm": 0.24187368154525757, |
| "learning_rate": 6.800982274838496e-11, |
| "loss": 0.9980994462966919, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.3142857142857143, |
| "grad_norm": 0.22730109095573425, |
| "learning_rate": 6.784013009120974e-11, |
| "loss": 1.61106276512146, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.3178571428571428, |
| "grad_norm": 0.34292080998420715, |
| "learning_rate": 6.767023842956724e-11, |
| "loss": 1.4898104667663574, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.3214285714285714, |
| "grad_norm": 0.6306799650192261, |
| "learning_rate": 6.750015039654603e-11, |
| "loss": 1.2501347064971924, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.325, |
| "grad_norm": 0.5413057208061218, |
| "learning_rate": 6.732986862827812e-11, |
| "loss": 1.9483036994934082, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.3285714285714285, |
| "grad_norm": 0.34432464838027954, |
| "learning_rate": 6.715939576389822e-11, |
| "loss": 1.4203524589538574, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.332142857142857, |
| "grad_norm": 0.2747512459754944, |
| "learning_rate": 6.698873444550271e-11, |
| "loss": 1.3994556665420532, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.3357142857142856, |
| "grad_norm": 0.4280205965042114, |
| "learning_rate": 6.681788731810879e-11, |
| "loss": 1.5723004341125488, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.3392857142857144, |
| "grad_norm": 0.33289003372192383, |
| "learning_rate": 6.664685702961344e-11, |
| "loss": 1.5144555568695068, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.342857142857143, |
| "grad_norm": 0.17391818761825562, |
| "learning_rate": 6.647564623075235e-11, |
| "loss": 1.1204313039779663, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.3464285714285715, |
| "grad_norm": 0.16711686551570892, |
| "learning_rate": 6.630425757505894e-11, |
| "loss": 0.9277492165565491, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.20722971856594086, |
| "learning_rate": 6.613269371882309e-11, |
| "loss": 1.4686331748962402, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.3535714285714286, |
| "grad_norm": 0.31218937039375305, |
| "learning_rate": 6.596095732105011e-11, |
| "loss": 1.5793182849884033, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.3571428571428572, |
| "grad_norm": 0.9370469450950623, |
| "learning_rate": 6.578905104341944e-11, |
| "loss": 1.4430747032165527, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.3607142857142858, |
| "grad_norm": 0.15756045281887054, |
| "learning_rate": 6.561697755024343e-11, |
| "loss": 1.2268548011779785, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.3642857142857143, |
| "grad_norm": 0.18516050279140472, |
| "learning_rate": 6.544473950842606e-11, |
| "loss": 1.4649068117141724, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.3678571428571429, |
| "grad_norm": 0.19654791057109833, |
| "learning_rate": 6.527233958742153e-11, |
| "loss": 1.533630609512329, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.3714285714285714, |
| "grad_norm": 0.2150849997997284, |
| "learning_rate": 6.509978045919307e-11, |
| "loss": 1.478027105331421, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.2530464828014374, |
| "learning_rate": 6.492706479817125e-11, |
| "loss": 1.3111753463745117, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3785714285714286, |
| "grad_norm": 0.3632781207561493, |
| "learning_rate": 6.475419528121279e-11, |
| "loss": 1.6716187000274658, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.3821428571428571, |
| "grad_norm": 0.25495582818984985, |
| "learning_rate": 6.45811745875589e-11, |
| "loss": 1.4510148763656616, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3857142857142857, |
| "grad_norm": 0.17029410600662231, |
| "learning_rate": 6.440800539879391e-11, |
| "loss": 1.3360023498535156, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.3892857142857142, |
| "grad_norm": 0.17277792096138, |
| "learning_rate": 6.423469039880355e-11, |
| "loss": 1.3302971124649048, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3928571428571428, |
| "grad_norm": 0.2622109353542328, |
| "learning_rate": 6.406123227373342e-11, |
| "loss": 1.4888077974319458, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3964285714285714, |
| "grad_norm": 0.3945457935333252, |
| "learning_rate": 6.388763371194741e-11, |
| "loss": 1.3184008598327637, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.28038397431373596, |
| "learning_rate": 6.371389740398596e-11, |
| "loss": 1.4953203201293945, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.4035714285714285, |
| "grad_norm": 0.28005000948905945, |
| "learning_rate": 6.35400260425244e-11, |
| "loss": 1.5821740627288818, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.407142857142857, |
| "grad_norm": 0.2094164341688156, |
| "learning_rate": 6.336602232233117e-11, |
| "loss": 1.5140842199325562, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.4107142857142856, |
| "grad_norm": 0.22712309658527374, |
| "learning_rate": 6.319188894022612e-11, |
| "loss": 1.6447871923446655, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4142857142857144, |
| "grad_norm": 0.13142111897468567, |
| "learning_rate": 6.301762859503869e-11, |
| "loss": 1.5251423120498657, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.417857142857143, |
| "grad_norm": 0.26605498790740967, |
| "learning_rate": 6.284324398756605e-11, |
| "loss": 1.3444472551345825, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.4214285714285715, |
| "grad_norm": 0.23393483459949493, |
| "learning_rate": 6.266873782053131e-11, |
| "loss": 1.2516331672668457, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.425, |
| "grad_norm": 0.30043426156044006, |
| "learning_rate": 6.249411279854152e-11, |
| "loss": 1.2557588815689087, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.5380626916885376, |
| "learning_rate": 6.231937162804584e-11, |
| "loss": 1.3160637617111206, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4321428571428572, |
| "grad_norm": 0.24224776029586792, |
| "learning_rate": 6.214451701729363e-11, |
| "loss": 1.4175952672958374, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.4357142857142857, |
| "grad_norm": 0.18135520815849304, |
| "learning_rate": 6.196955167629236e-11, |
| "loss": 1.5174418687820435, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.4392857142857143, |
| "grad_norm": 0.3037535846233368, |
| "learning_rate": 6.179447831676566e-11, |
| "loss": 1.581838846206665, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.4428571428571428, |
| "grad_norm": 0.29367074370384216, |
| "learning_rate": 6.161929965211134e-11, |
| "loss": 1.4578280448913574, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.4464285714285714, |
| "grad_norm": 0.3817584216594696, |
| "learning_rate": 6.14440183973593e-11, |
| "loss": 1.678054690361023, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.46394509077072144, |
| "learning_rate": 6.12686372691294e-11, |
| "loss": 1.5505553483963013, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.4535714285714285, |
| "grad_norm": 0.1447513848543167, |
| "learning_rate": 6.109315898558943e-11, |
| "loss": 1.2679914236068726, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 0.24315816164016724, |
| "learning_rate": 6.091758626641295e-11, |
| "loss": 1.566739797592163, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.4607142857142856, |
| "grad_norm": 0.5375627875328064, |
| "learning_rate": 6.074192183273714e-11, |
| "loss": 1.3268802165985107, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.4642857142857144, |
| "grad_norm": 0.2522839605808258, |
| "learning_rate": 6.056616840712064e-11, |
| "loss": 1.4653449058532715, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.467857142857143, |
| "grad_norm": 0.40299344062805176, |
| "learning_rate": 6.039032871350136e-11, |
| "loss": 1.4160243272781372, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.4714285714285715, |
| "grad_norm": 0.6600869297981262, |
| "learning_rate": 6.021440547715418e-11, |
| "loss": 1.5625566244125366, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.475, |
| "grad_norm": 0.21130329370498657, |
| "learning_rate": 6.003840142464887e-11, |
| "loss": 1.4704797267913818, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.4785714285714286, |
| "grad_norm": 0.2992205321788788, |
| "learning_rate": 5.986231928380765e-11, |
| "loss": 1.5831502676010132, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.4821428571428572, |
| "grad_norm": 0.18447980284690857, |
| "learning_rate": 5.968616178366304e-11, |
| "loss": 1.2691162824630737, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4857142857142858, |
| "grad_norm": 0.23359040915966034, |
| "learning_rate": 5.95099316544156e-11, |
| "loss": 1.2079955339431763, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.4892857142857143, |
| "grad_norm": 0.21547919511795044, |
| "learning_rate": 5.933363162739138e-11, |
| "loss": 1.5598586797714233, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.4928571428571429, |
| "grad_norm": 0.41725102066993713, |
| "learning_rate": 5.915726443499991e-11, |
| "loss": 1.6024370193481445, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.4964285714285714, |
| "grad_norm": 0.23062889277935028, |
| "learning_rate": 5.89808328106916e-11, |
| "loss": 1.4700309038162231, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.17646436393260956, |
| "learning_rate": 5.880433948891548e-11, |
| "loss": 1.4606246948242188, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5035714285714286, |
| "grad_norm": 0.1672886461019516, |
| "learning_rate": 5.862778720507684e-11, |
| "loss": 1.473752498626709, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.5071428571428571, |
| "grad_norm": 0.18604016304016113, |
| "learning_rate": 5.845117869549476e-11, |
| "loss": 1.570174217224121, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.5107142857142857, |
| "grad_norm": 0.2805999517440796, |
| "learning_rate": 5.827451669735976e-11, |
| "loss": 1.525863528251648, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.5142857142857142, |
| "grad_norm": 0.2228318601846695, |
| "learning_rate": 5.80978039486914e-11, |
| "loss": 1.356152892112732, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.5178571428571428, |
| "grad_norm": 0.213277667760849, |
| "learning_rate": 5.79210431882957e-11, |
| "loss": 1.0404657125473022, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.5214285714285714, |
| "grad_norm": 0.23507511615753174, |
| "learning_rate": 5.774423715572289e-11, |
| "loss": 1.117300271987915, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.525, |
| "grad_norm": 0.2990909516811371, |
| "learning_rate": 5.7567388591224835e-11, |
| "loss": 1.3961025476455688, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.5285714285714285, |
| "grad_norm": 0.37220829725265503, |
| "learning_rate": 5.739050023571257e-11, |
| "loss": 1.18937087059021, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.532142857142857, |
| "grad_norm": 0.14452393352985382, |
| "learning_rate": 5.7213574830713854e-11, |
| "loss": 1.3321001529693604, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.5357142857142856, |
| "grad_norm": 0.23967216908931732, |
| "learning_rate": 5.703661511833064e-11, |
| "loss": 1.2635600566864014, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5392857142857141, |
| "grad_norm": 0.1488959789276123, |
| "learning_rate": 5.6859623841196594e-11, |
| "loss": 1.4870667457580566, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.5428571428571427, |
| "grad_norm": 0.10835379362106323, |
| "learning_rate": 5.668260374243467e-11, |
| "loss": 1.1637285947799683, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.5464285714285713, |
| "grad_norm": 0.26973676681518555, |
| "learning_rate": 5.650555756561439e-11, |
| "loss": 1.4411020278930664, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.11648156493902206, |
| "learning_rate": 5.632848805470957e-11, |
| "loss": 1.4853591918945312, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.5535714285714286, |
| "grad_norm": 0.24108168482780457, |
| "learning_rate": 5.6151397954055585e-11, |
| "loss": 1.235764741897583, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5571428571428572, |
| "grad_norm": 0.18987397849559784, |
| "learning_rate": 5.5974290008306997e-11, |
| "loss": 1.589194655418396, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.5607142857142857, |
| "grad_norm": 0.28706496953964233, |
| "learning_rate": 5.579716696239487e-11, |
| "loss": 1.270675539970398, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.5642857142857143, |
| "grad_norm": 0.38785573840141296, |
| "learning_rate": 5.562003156148434e-11, |
| "loss": 1.3905096054077148, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.5678571428571428, |
| "grad_norm": 0.3089293837547302, |
| "learning_rate": 5.5442886550932024e-11, |
| "loss": 1.4197461605072021, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.5964844822883606, |
| "learning_rate": 5.5265734676243505e-11, |
| "loss": 1.466257095336914, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.575, |
| "grad_norm": 0.1559826284646988, |
| "learning_rate": 5.508857868303068e-11, |
| "loss": 1.310103178024292, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.5785714285714287, |
| "grad_norm": 0.1740395873785019, |
| "learning_rate": 5.4911421316969336e-11, |
| "loss": 1.1727169752120972, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.5821428571428573, |
| "grad_norm": 0.2936098873615265, |
| "learning_rate": 5.47342653237565e-11, |
| "loss": 1.482723593711853, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.5857142857142859, |
| "grad_norm": 0.3703981041908264, |
| "learning_rate": 5.4557113449067964e-11, |
| "loss": 1.3175097703933716, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.5892857142857144, |
| "grad_norm": 0.18052178621292114, |
| "learning_rate": 5.4379968438515673e-11, |
| "loss": 1.2715235948562622, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.592857142857143, |
| "grad_norm": 0.4644646644592285, |
| "learning_rate": 5.420283303760515e-11, |
| "loss": 1.1736079454421997, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.5964285714285715, |
| "grad_norm": 0.31031447649002075, |
| "learning_rate": 5.402570999169303e-11, |
| "loss": 1.447801947593689, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.26687949895858765, |
| "learning_rate": 5.384860204594442e-11, |
| "loss": 1.3616970777511597, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.6035714285714286, |
| "grad_norm": 0.3266461193561554, |
| "learning_rate": 5.3671511945290443e-11, |
| "loss": 1.4090511798858643, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.6071428571428572, |
| "grad_norm": 0.270843505859375, |
| "learning_rate": 5.3494442434385626e-11, |
| "loss": 1.4962491989135742, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6107142857142858, |
| "grad_norm": 0.2539942264556885, |
| "learning_rate": 5.331739625756535e-11, |
| "loss": 1.484360933303833, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.6142857142857143, |
| "grad_norm": 0.2731671929359436, |
| "learning_rate": 5.314037615880341e-11, |
| "loss": 1.333021640777588, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.6178571428571429, |
| "grad_norm": 0.3004004955291748, |
| "learning_rate": 5.2963384881669383e-11, |
| "loss": 1.3362126350402832, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.6214285714285714, |
| "grad_norm": 0.18505540490150452, |
| "learning_rate": 5.278642516928617e-11, |
| "loss": 1.4492074251174927, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.2023400068283081, |
| "learning_rate": 5.260949976428745e-11, |
| "loss": 1.232993721961975, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.6285714285714286, |
| "grad_norm": 0.3517407178878784, |
| "learning_rate": 5.243261140877517e-11, |
| "loss": 1.5753288269042969, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.6321428571428571, |
| "grad_norm": 0.5919647216796875, |
| "learning_rate": 5.225576284427712e-11, |
| "loss": 1.4577170610427856, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.6357142857142857, |
| "grad_norm": 0.37958261370658875, |
| "learning_rate": 5.2078956811704316e-11, |
| "loss": 1.1840314865112305, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.6392857142857142, |
| "grad_norm": 0.48498472571372986, |
| "learning_rate": 5.190219605130863e-11, |
| "loss": 1.0771994590759277, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.6428571428571428, |
| "grad_norm": 0.22175057232379913, |
| "learning_rate": 5.172548330264023e-11, |
| "loss": 1.485005259513855, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6464285714285714, |
| "grad_norm": 0.15276828408241272, |
| "learning_rate": 5.1548821304505246e-11, |
| "loss": 1.4146548509597778, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.338878333568573, |
| "learning_rate": 5.137221279492317e-11, |
| "loss": 1.4840327501296997, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.6535714285714285, |
| "grad_norm": 0.17189446091651917, |
| "learning_rate": 5.119566051108453e-11, |
| "loss": 1.4107526540756226, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.657142857142857, |
| "grad_norm": 0.16768991947174072, |
| "learning_rate": 5.1019167189308406e-11, |
| "loss": 1.46079421043396, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.6607142857142856, |
| "grad_norm": 0.2172195166349411, |
| "learning_rate": 5.08427355650001e-11, |
| "loss": 1.4025652408599854, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.6642857142857141, |
| "grad_norm": 0.20361092686653137, |
| "learning_rate": 5.0666368372608627e-11, |
| "loss": 1.393689513206482, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.6678571428571427, |
| "grad_norm": 0.19188135862350464, |
| "learning_rate": 5.0490068345584426e-11, |
| "loss": 1.3864973783493042, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.6714285714285713, |
| "grad_norm": 0.3993217945098877, |
| "learning_rate": 5.031383821633695e-11, |
| "loss": 1.3697587251663208, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.675, |
| "grad_norm": 0.25149768590927124, |
| "learning_rate": 5.013768071619237e-11, |
| "loss": 1.4470927715301514, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.6785714285714286, |
| "grad_norm": 0.20718373358249664, |
| "learning_rate": 4.9961598575351155e-11, |
| "loss": 1.454539179801941, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.6821428571428572, |
| "grad_norm": 0.5436975955963135, |
| "learning_rate": 4.9785594522845833e-11, |
| "loss": 1.5738086700439453, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 0.3914676904678345, |
| "learning_rate": 4.9609671286498646e-11, |
| "loss": 1.32332181930542, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.6892857142857143, |
| "grad_norm": 0.22571302950382233, |
| "learning_rate": 4.943383159287935e-11, |
| "loss": 1.2254847288131714, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.6928571428571428, |
| "grad_norm": 0.255727618932724, |
| "learning_rate": 4.9258078167262875e-11, |
| "loss": 1.4280102252960205, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.6964285714285714, |
| "grad_norm": 0.17480432987213135, |
| "learning_rate": 4.9082413733587075e-11, |
| "loss": 1.4205875396728516, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.3139236569404602, |
| "learning_rate": 4.8906841014410586e-11, |
| "loss": 1.4274033308029175, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.7035714285714287, |
| "grad_norm": 0.1673169583082199, |
| "learning_rate": 4.873136273087061e-11, |
| "loss": 1.2917604446411133, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.7071428571428573, |
| "grad_norm": 0.13775251805782318, |
| "learning_rate": 4.855598160264071e-11, |
| "loss": 1.39280104637146, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.7107142857142859, |
| "grad_norm": 0.29080042243003845, |
| "learning_rate": 4.838070034788865e-11, |
| "loss": 1.3744114637374878, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.1755424588918686, |
| "learning_rate": 4.820552168323433e-11, |
| "loss": 1.3679919242858887, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.717857142857143, |
| "grad_norm": 0.26247265934944153, |
| "learning_rate": 4.8030448323707654e-11, |
| "loss": 1.4235137701034546, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.7214285714285715, |
| "grad_norm": 0.191309854388237, |
| "learning_rate": 4.785548298270639e-11, |
| "loss": 1.430955171585083, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.725, |
| "grad_norm": 0.25478798151016235, |
| "learning_rate": 4.7680628371954174e-11, |
| "loss": 1.4475382566452026, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.7285714285714286, |
| "grad_norm": 0.3051491677761078, |
| "learning_rate": 4.7505887201458484e-11, |
| "loss": 1.4724607467651367, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.7321428571428572, |
| "grad_norm": 0.2450665533542633, |
| "learning_rate": 4.73312621794687e-11, |
| "loss": 1.342381477355957, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7357142857142858, |
| "grad_norm": 0.20480383932590485, |
| "learning_rate": 4.7156756012433956e-11, |
| "loss": 1.4223990440368652, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.7392857142857143, |
| "grad_norm": 0.2236645370721817, |
| "learning_rate": 4.698237140496132e-11, |
| "loss": 1.3705493211746216, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.7428571428571429, |
| "grad_norm": 0.32822078466415405, |
| "learning_rate": 4.680811105977389e-11, |
| "loss": 1.34307062625885, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.7464285714285714, |
| "grad_norm": 0.2703489065170288, |
| "learning_rate": 4.663397767766885e-11, |
| "loss": 1.3283851146697998, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.46878188848495483, |
| "learning_rate": 4.645997395747562e-11, |
| "loss": 1.5840815305709839, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7535714285714286, |
| "grad_norm": 0.1596301943063736, |
| "learning_rate": 4.628610259601406e-11, |
| "loss": 1.1211912631988525, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.7571428571428571, |
| "grad_norm": 0.1534484177827835, |
| "learning_rate": 4.6112366288052587e-11, |
| "loss": 1.3600302934646606, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.7607142857142857, |
| "grad_norm": 0.1662014126777649, |
| "learning_rate": 4.5938767726266585e-11, |
| "loss": 1.4457441568374634, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.7642857142857142, |
| "grad_norm": 0.14455421268939972, |
| "learning_rate": 4.576530960119646e-11, |
| "loss": 1.3325226306915283, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.7678571428571428, |
| "grad_norm": 0.18354535102844238, |
| "learning_rate": 4.55919946012061e-11, |
| "loss": 1.3722156286239624, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.7714285714285714, |
| "grad_norm": 0.20837841928005219, |
| "learning_rate": 4.5418825412441104e-11, |
| "loss": 1.3799716234207153, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.775, |
| "grad_norm": 0.14612962305545807, |
| "learning_rate": 4.5245804718787233e-11, |
| "loss": 1.208733081817627, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.7785714285714285, |
| "grad_norm": 0.2214983105659485, |
| "learning_rate": 4.507293520182877e-11, |
| "loss": 1.3159799575805664, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.782142857142857, |
| "grad_norm": 0.19904594123363495, |
| "learning_rate": 4.4900219540806954e-11, |
| "loss": 1.276192545890808, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.13550381362438202, |
| "learning_rate": 4.4727660412578454e-11, |
| "loss": 1.32045316696167, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7892857142857141, |
| "grad_norm": 0.16557137668132782, |
| "learning_rate": 4.455526049157396e-11, |
| "loss": 1.293030023574829, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.7928571428571427, |
| "grad_norm": 0.31324100494384766, |
| "learning_rate": 4.438302244975658e-11, |
| "loss": 1.3284529447555542, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.7964285714285713, |
| "grad_norm": 0.16482140123844147, |
| "learning_rate": 4.421094895658058e-11, |
| "loss": 1.298207402229309, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.13380751013755798, |
| "learning_rate": 4.4039042678949905e-11, |
| "loss": 1.2636936902999878, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.8035714285714286, |
| "grad_norm": 0.27826249599456787, |
| "learning_rate": 4.386730628117692e-11, |
| "loss": 1.3365410566329956, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.8071428571428572, |
| "grad_norm": 0.1916886866092682, |
| "learning_rate": 4.369574242494108e-11, |
| "loss": 1.3265316486358643, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.8107142857142857, |
| "grad_norm": 0.1272166520357132, |
| "learning_rate": 4.352435376924766e-11, |
| "loss": 1.215617299079895, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.8142857142857143, |
| "grad_norm": 0.13315260410308838, |
| "learning_rate": 4.335314297038656e-11, |
| "loss": 1.2893967628479004, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.8178571428571428, |
| "grad_norm": 0.4151960611343384, |
| "learning_rate": 4.318211268189121e-11, |
| "loss": 1.2345842123031616, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.8214285714285714, |
| "grad_norm": 0.14745569229125977, |
| "learning_rate": 4.30112655544973e-11, |
| "loss": 1.2733076810836792, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.825, |
| "grad_norm": 0.4383365511894226, |
| "learning_rate": 4.28406042361018e-11, |
| "loss": 1.2510349750518799, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.8285714285714287, |
| "grad_norm": 0.11978907883167267, |
| "learning_rate": 4.2670131371721885e-11, |
| "loss": 1.2697960138320923, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.8321428571428573, |
| "grad_norm": 0.16413728892803192, |
| "learning_rate": 4.249984960345399e-11, |
| "loss": 1.2707985639572144, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.8357142857142859, |
| "grad_norm": 0.14620473980903625, |
| "learning_rate": 4.232976157043277e-11, |
| "loss": 1.2601186037063599, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.8392857142857144, |
| "grad_norm": 0.15290690958499908, |
| "learning_rate": 4.215986990879027e-11, |
| "loss": 1.2626264095306396, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.842857142857143, |
| "grad_norm": 0.1867092102766037, |
| "learning_rate": 4.199017725161505e-11, |
| "loss": 1.3110102415084839, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.8464285714285715, |
| "grad_norm": 0.1754860281944275, |
| "learning_rate": 4.182068622891139e-11, |
| "loss": 1.29032301902771, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.1369704008102417, |
| "learning_rate": 4.165139946755847e-11, |
| "loss": 1.2947062253952026, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.8535714285714286, |
| "grad_norm": 0.1493859589099884, |
| "learning_rate": 4.1482319591269726e-11, |
| "loss": 1.3114452362060547, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.19140051305294037, |
| "learning_rate": 4.131344922055213e-11, |
| "loss": 1.315718173980713, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8607142857142858, |
| "grad_norm": 0.21373005211353302, |
| "learning_rate": 4.114479097266567e-11, |
| "loss": 1.2820724248886108, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.8642857142857143, |
| "grad_norm": 0.13293863832950592, |
| "learning_rate": 4.0976347461582655e-11, |
| "loss": 1.3095886707305908, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.8678571428571429, |
| "grad_norm": 0.142401784658432, |
| "learning_rate": 4.080812129794728e-11, |
| "loss": 1.2567329406738281, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.8714285714285714, |
| "grad_norm": 0.1333981454372406, |
| "learning_rate": 4.064011508903516e-11, |
| "loss": 1.2891546487808228, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.2096826136112213, |
| "learning_rate": 4.047233143871292e-11, |
| "loss": 1.2824205160140991, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.8785714285714286, |
| "grad_norm": 0.17919301986694336, |
| "learning_rate": 4.030477294739783e-11, |
| "loss": 1.3100366592407227, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.8821428571428571, |
| "grad_norm": 0.16150566935539246, |
| "learning_rate": 4.013744221201749e-11, |
| "loss": 1.3002382516860962, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.8857142857142857, |
| "grad_norm": 0.1178724467754364, |
| "learning_rate": 3.997034182596958e-11, |
| "loss": 1.2348383665084839, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.8892857142857142, |
| "grad_norm": 0.22984254360198975, |
| "learning_rate": 3.980347437908175e-11, |
| "loss": 1.2670952081680298, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.8928571428571428, |
| "grad_norm": 0.13385139405727386, |
| "learning_rate": 3.963684245757131e-11, |
| "loss": 1.2981677055358887, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.8964285714285714, |
| "grad_norm": 0.4410691559314728, |
| "learning_rate": 3.9470448644005344e-11, |
| "loss": 1.3137519359588623, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.3602881133556366, |
| "learning_rate": 3.930429551726049e-11, |
| "loss": 1.2973172664642334, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.9035714285714285, |
| "grad_norm": 0.13422204554080963, |
| "learning_rate": 3.913838565248318e-11, |
| "loss": 1.2817872762680054, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.907142857142857, |
| "grad_norm": 0.1284937709569931, |
| "learning_rate": 3.8972721621049546e-11, |
| "loss": 1.2446856498718262, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.9107142857142856, |
| "grad_norm": 0.14526879787445068, |
| "learning_rate": 3.880730599052564e-11, |
| "loss": 1.2884457111358643, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.9142857142857141, |
| "grad_norm": 0.13001468777656555, |
| "learning_rate": 3.8642141324627655e-11, |
| "loss": 1.3033688068389893, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.9178571428571427, |
| "grad_norm": 0.1109691932797432, |
| "learning_rate": 3.84772301831822e-11, |
| "loss": 1.2417011260986328, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.9214285714285713, |
| "grad_norm": 0.12410853058099747, |
| "learning_rate": 3.831257512208657e-11, |
| "loss": 1.2828723192214966, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.925, |
| "grad_norm": 0.15531232953071594, |
| "learning_rate": 3.8148178693269145e-11, |
| "loss": 1.269210934638977, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.16727633774280548, |
| "learning_rate": 3.79840434446499e-11, |
| "loss": 1.327274203300476, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9321428571428572, |
| "grad_norm": 0.13850118219852448, |
| "learning_rate": 3.782017192010087e-11, |
| "loss": 1.329694390296936, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.9357142857142857, |
| "grad_norm": 0.14651495218276978, |
| "learning_rate": 3.76565666594067e-11, |
| "loss": 1.2990163564682007, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.9392857142857143, |
| "grad_norm": 0.14077956974506378, |
| "learning_rate": 3.749323019822534e-11, |
| "loss": 1.3179551362991333, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.9428571428571428, |
| "grad_norm": 0.14183518290519714, |
| "learning_rate": 3.733016506804867e-11, |
| "loss": 1.2862507104873657, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.9464285714285714, |
| "grad_norm": 0.19720809161663055, |
| "learning_rate": 3.716737379616337e-11, |
| "loss": 1.2341052293777466, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.1752225160598755, |
| "learning_rate": 3.700485890561167e-11, |
| "loss": 1.2532352209091187, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.9535714285714287, |
| "grad_norm": 0.15048037469387054, |
| "learning_rate": 3.684262291515223e-11, |
| "loss": 1.2703880071640015, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.9571428571428573, |
| "grad_norm": 0.1569390445947647, |
| "learning_rate": 3.668066833922116e-11, |
| "loss": 1.2509340047836304, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.9607142857142859, |
| "grad_norm": 0.25200825929641724, |
| "learning_rate": 3.6518997687893055e-11, |
| "loss": 1.2597652673721313, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.9642857142857144, |
| "grad_norm": 0.14276815950870514, |
| "learning_rate": 3.635761346684206e-11, |
| "loss": 1.3016998767852783, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.967857142857143, |
| "grad_norm": 0.16960203647613525, |
| "learning_rate": 3.619651817730302e-11, |
| "loss": 1.2928533554077148, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.9714285714285715, |
| "grad_norm": 0.1542951613664627, |
| "learning_rate": 3.603571431603272e-11, |
| "loss": 1.3007863759994507, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.975, |
| "grad_norm": 0.17226935923099518, |
| "learning_rate": 3.5875204375271275e-11, |
| "loss": 1.2495512962341309, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.9785714285714286, |
| "grad_norm": 0.13276343047618866, |
| "learning_rate": 3.571499084270338e-11, |
| "loss": 1.2602217197418213, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.9821428571428572, |
| "grad_norm": 0.18922999501228333, |
| "learning_rate": 3.555507620141981e-11, |
| "loss": 1.2807466983795166, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.9857142857142858, |
| "grad_norm": 0.21290341019630432, |
| "learning_rate": 3.539546292987894e-11, |
| "loss": 1.313105583190918, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.9892857142857143, |
| "grad_norm": 0.1874661147594452, |
| "learning_rate": 3.523615350186834e-11, |
| "loss": 1.3670251369476318, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.9928571428571429, |
| "grad_norm": 0.24585479497909546, |
| "learning_rate": 3.507715038646641e-11, |
| "loss": 1.625638484954834, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.9964285714285714, |
| "grad_norm": 0.26914891600608826, |
| "learning_rate": 3.49184560480041e-11, |
| "loss": 1.5536011457443237, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5115702152252197, |
| "learning_rate": 3.476007294602678e-11, |
| "loss": 1.4410558938980103, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.0035714285714286, |
| "grad_norm": 0.19693709909915924, |
| "learning_rate": 3.46020035352561e-11, |
| "loss": 1.42737877368927, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.007142857142857, |
| "grad_norm": 0.14978626370429993, |
| "learning_rate": 3.444425026555182e-11, |
| "loss": 1.3573956489562988, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.0107142857142857, |
| "grad_norm": 0.13327878713607788, |
| "learning_rate": 3.4286815581874045e-11, |
| "loss": 1.3719631433486938, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.0142857142857142, |
| "grad_norm": 0.17517639696598053, |
| "learning_rate": 3.412970192424517e-11, |
| "loss": 1.4211511611938477, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.017857142857143, |
| "grad_norm": 0.1492166668176651, |
| "learning_rate": 3.397291172771221e-11, |
| "loss": 1.362390160560608, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0214285714285714, |
| "grad_norm": 0.1388062685728073, |
| "learning_rate": 3.3816447422308884e-11, |
| "loss": 1.3313382863998413, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.025, |
| "grad_norm": 0.13775278627872467, |
| "learning_rate": 3.366031143301811e-11, |
| "loss": 1.2752572298049927, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.0285714285714285, |
| "grad_norm": 0.15108071267604828, |
| "learning_rate": 3.3504506179734254e-11, |
| "loss": 1.344627022743225, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.032142857142857, |
| "grad_norm": 0.1777033656835556, |
| "learning_rate": 3.334903407722587e-11, |
| "loss": 1.3156089782714844, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.0357142857142856, |
| "grad_norm": 0.1836261749267578, |
| "learning_rate": 3.319389753509803e-11, |
| "loss": 1.2969412803649902, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.039285714285714, |
| "grad_norm": 0.1210937574505806, |
| "learning_rate": 3.30390989577551e-11, |
| "loss": 1.2561373710632324, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.0428571428571427, |
| "grad_norm": 0.22169147431850433, |
| "learning_rate": 3.288464074436346e-11, |
| "loss": 1.418563961982727, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.0464285714285713, |
| "grad_norm": 0.1952674388885498, |
| "learning_rate": 3.273052528881433e-11, |
| "loss": 1.336297631263733, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.15283362567424774, |
| "learning_rate": 3.257675497968661e-11, |
| "loss": 1.2213199138641357, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.0535714285714284, |
| "grad_norm": 0.20565065741539001, |
| "learning_rate": 3.242333220020994e-11, |
| "loss": 1.3224503993988037, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.057142857142857, |
| "grad_norm": 0.15253019332885742, |
| "learning_rate": 3.22702593282277e-11, |
| "loss": 1.2786815166473389, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.0607142857142855, |
| "grad_norm": 0.20985379815101624, |
| "learning_rate": 3.211753873616024e-11, |
| "loss": 1.367148756980896, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.064285714285714, |
| "grad_norm": 0.1880485862493515, |
| "learning_rate": 3.196517279096797e-11, |
| "loss": 1.4771831035614014, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.067857142857143, |
| "grad_norm": 0.21174761652946472, |
| "learning_rate": 3.181316385411479e-11, |
| "loss": 1.44190514087677, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.0714285714285716, |
| "grad_norm": 0.23373731970787048, |
| "learning_rate": 3.166151428153146e-11, |
| "loss": 1.2126356363296509, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.075, |
| "grad_norm": 0.16597457230091095, |
| "learning_rate": 3.1510226423579124e-11, |
| "loss": 1.3093327283859253, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.0785714285714287, |
| "grad_norm": 0.17415602505207062, |
| "learning_rate": 3.1359302625012785e-11, |
| "loss": 1.3309526443481445, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.0821428571428573, |
| "grad_norm": 0.2406979203224182, |
| "learning_rate": 3.1208745224945054e-11, |
| "loss": 1.3279399871826172, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.085714285714286, |
| "grad_norm": 0.15369997918605804, |
| "learning_rate": 3.105855655680986e-11, |
| "loss": 1.2411911487579346, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.0892857142857144, |
| "grad_norm": 0.1417544037103653, |
| "learning_rate": 3.090873894832628e-11, |
| "loss": 1.2670767307281494, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.092857142857143, |
| "grad_norm": 0.39732953906059265, |
| "learning_rate": 3.0759294721462496e-11, |
| "loss": 1.2473151683807373, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.0964285714285715, |
| "grad_norm": 0.19093656539916992, |
| "learning_rate": 3.061022619239977e-11, |
| "loss": 1.3719512224197388, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.13845385611057281, |
| "learning_rate": 3.046153567149654e-11, |
| "loss": 1.3432111740112305, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.1035714285714286, |
| "grad_norm": 0.15389707684516907, |
| "learning_rate": 3.0313225463252715e-11, |
| "loss": 1.421315312385559, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.107142857142857, |
| "grad_norm": 0.1846085637807846, |
| "learning_rate": 3.0165297866273766e-11, |
| "loss": 1.4859906435012817, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.1107142857142858, |
| "grad_norm": 0.1564885377883911, |
| "learning_rate": 3.00177551732353e-11, |
| "loss": 1.3033545017242432, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.1142857142857143, |
| "grad_norm": 0.24117198586463928, |
| "learning_rate": 2.9870599670847367e-11, |
| "loss": 1.4248461723327637, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.117857142857143, |
| "grad_norm": 0.145203098654747, |
| "learning_rate": 2.972383363981917e-11, |
| "loss": 1.3876914978027344, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.1214285714285714, |
| "grad_norm": 0.20995774865150452, |
| "learning_rate": 2.95774593548236e-11, |
| "loss": 1.2380093336105347, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 0.17368246614933014, |
| "learning_rate": 2.943147908446201e-11, |
| "loss": 1.297877311706543, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.1285714285714286, |
| "grad_norm": 0.3502728044986725, |
| "learning_rate": 2.9285895091229044e-11, |
| "loss": 1.0307722091674805, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.132142857142857, |
| "grad_norm": 0.19696146249771118, |
| "learning_rate": 2.9140709631477665e-11, |
| "loss": 1.2322001457214355, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.1357142857142857, |
| "grad_norm": 0.1824025809764862, |
| "learning_rate": 2.8995924955384046e-11, |
| "loss": 1.3014825582504272, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.1392857142857142, |
| "grad_norm": 0.17128019034862518, |
| "learning_rate": 2.8851543306912782e-11, |
| "loss": 1.3863450288772583, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.1545657217502594, |
| "learning_rate": 2.8707566923782103e-11, |
| "loss": 1.2946215867996216, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.1464285714285714, |
| "grad_norm": 0.16589367389678955, |
| "learning_rate": 2.8563998037429164e-11, |
| "loss": 1.2779427766799927, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.30030524730682373, |
| "learning_rate": 2.842083887297548e-11, |
| "loss": 1.2760953903198242, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.1535714285714285, |
| "grad_norm": 0.18602436780929565, |
| "learning_rate": 2.827809164919244e-11, |
| "loss": 1.4392411708831787, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.157142857142857, |
| "grad_norm": 0.15597227215766907, |
| "learning_rate": 2.8135758578466896e-11, |
| "loss": 1.3980666399002075, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.1607142857142856, |
| "grad_norm": 0.1635390818119049, |
| "learning_rate": 2.7993841866766962e-11, |
| "loss": 1.3193120956420898, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.164285714285714, |
| "grad_norm": 0.22947640717029572, |
| "learning_rate": 2.785234371360766e-11, |
| "loss": 1.342287540435791, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.1678571428571427, |
| "grad_norm": 0.20655785501003265, |
| "learning_rate": 2.7711266312016987e-11, |
| "loss": 1.373154640197754, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.1714285714285713, |
| "grad_norm": 0.1706349104642868, |
| "learning_rate": 2.757061184850183e-11, |
| "loss": 1.2490075826644897, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.175, |
| "grad_norm": 0.22908586263656616, |
| "learning_rate": 2.743038250301418e-11, |
| "loss": 1.379451036453247, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.1785714285714284, |
| "grad_norm": 0.17054013907909393, |
| "learning_rate": 2.7290580448917202e-11, |
| "loss": 1.3195239305496216, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.182142857142857, |
| "grad_norm": 0.2558165490627289, |
| "learning_rate": 2.7151207852951677e-11, |
| "loss": 1.1808172464370728, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.185714285714286, |
| "grad_norm": 0.1999257206916809, |
| "learning_rate": 2.7012266875202346e-11, |
| "loss": 1.2860252857208252, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.189285714285714, |
| "grad_norm": 0.23496678471565247, |
| "learning_rate": 2.6873759669064475e-11, |
| "loss": 1.3138054609298706, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.192857142857143, |
| "grad_norm": 0.2032238245010376, |
| "learning_rate": 2.673568838121045e-11, |
| "loss": 1.291284203529358, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.1964285714285716, |
| "grad_norm": 0.16216710209846497, |
| "learning_rate": 2.659805515155653e-11, |
| "loss": 1.212812900543213, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.21912680566310883, |
| "learning_rate": 2.6460862113229657e-11, |
| "loss": 1.2930355072021484, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.2035714285714287, |
| "grad_norm": 0.16590477526187897, |
| "learning_rate": 2.632411139253442e-11, |
| "loss": 1.3133563995361328, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.2071428571428573, |
| "grad_norm": 0.19360366463661194, |
| "learning_rate": 2.6187805108920104e-11, |
| "loss": 1.2435790300369263, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.210714285714286, |
| "grad_norm": 0.1824900209903717, |
| "learning_rate": 2.605194537494779e-11, |
| "loss": 1.295372486114502, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.2142857142857144, |
| "grad_norm": 0.17591625452041626, |
| "learning_rate": 2.5916534296257656e-11, |
| "loss": 1.3211277723312378, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.217857142857143, |
| "grad_norm": 0.18499885499477386, |
| "learning_rate": 2.5781573971536387e-11, |
| "loss": 1.4101108312606812, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.2214285714285715, |
| "grad_norm": 0.20316849648952484, |
| "learning_rate": 2.564706649248456e-11, |
| "loss": 1.3189499378204346, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.225, |
| "grad_norm": 0.18995395302772522, |
| "learning_rate": 2.5513013943784236e-11, |
| "loss": 1.4385656118392944, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.2285714285714286, |
| "grad_norm": 0.1785760372877121, |
| "learning_rate": 2.537941840306669e-11, |
| "loss": 1.375566005706787, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.232142857142857, |
| "grad_norm": 0.2664630711078644, |
| "learning_rate": 2.5246281940880267e-11, |
| "loss": 1.4056965112686157, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.2357142857142858, |
| "grad_norm": 0.24670979380607605, |
| "learning_rate": 2.5113606620658124e-11, |
| "loss": 1.398148536682129, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.2392857142857143, |
| "grad_norm": 0.32755303382873535, |
| "learning_rate": 2.4981394498686412e-11, |
| "loss": 1.2407431602478027, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.242857142857143, |
| "grad_norm": 0.25386714935302734, |
| "learning_rate": 2.484964762407232e-11, |
| "loss": 1.2231903076171875, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.2464285714285714, |
| "grad_norm": 0.31434598565101624, |
| "learning_rate": 2.4718368038712332e-11, |
| "loss": 1.2211343050003052, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.39853435754776, |
| "learning_rate": 2.45875577772606e-11, |
| "loss": 1.2230669260025024, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.2535714285714286, |
| "grad_norm": 0.4850930869579315, |
| "learning_rate": 2.4457218867097394e-11, |
| "loss": 1.3305819034576416, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.257142857142857, |
| "grad_norm": 0.3038150370121002, |
| "learning_rate": 2.4327353328297673e-11, |
| "loss": 1.4300968647003174, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.2607142857142857, |
| "grad_norm": 0.42150551080703735, |
| "learning_rate": 2.4197963173599828e-11, |
| "loss": 1.210048794746399, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.2642857142857142, |
| "grad_norm": 0.4590936303138733, |
| "learning_rate": 2.4069050408374375e-11, |
| "loss": 1.4437575340270996, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.267857142857143, |
| "grad_norm": 0.2934648394584656, |
| "learning_rate": 2.3940617030592998e-11, |
| "loss": 1.2751147747039795, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.2714285714285714, |
| "grad_norm": 0.2862495481967926, |
| "learning_rate": 2.3812665030797508e-11, |
| "loss": 1.4070979356765747, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.275, |
| "grad_norm": 0.2849954068660736, |
| "learning_rate": 2.3685196392069054e-11, |
| "loss": 1.4459384679794312, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.2785714285714285, |
| "grad_norm": 0.34812119603157043, |
| "learning_rate": 2.3558213089997302e-11, |
| "loss": 1.2700358629226685, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.282142857142857, |
| "grad_norm": 0.2995881140232086, |
| "learning_rate": 2.343171709264989e-11, |
| "loss": 0.9016294479370117, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.4387514591217041, |
| "learning_rate": 2.3305710360541853e-11, |
| "loss": 1.2820184230804443, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.289285714285714, |
| "grad_norm": 0.20915277302265167, |
| "learning_rate": 2.3180194846605366e-11, |
| "loss": 1.296189308166504, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.2928571428571427, |
| "grad_norm": 0.32164767384529114, |
| "learning_rate": 2.3055172496159327e-11, |
| "loss": 1.6072280406951904, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.2964285714285713, |
| "grad_norm": 0.878715455532074, |
| "learning_rate": 2.2930645246879285e-11, |
| "loss": 1.2524617910385132, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.2663787007331848, |
| "learning_rate": 2.2806615028767446e-11, |
| "loss": 1.1642640829086304, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.3035714285714284, |
| "grad_norm": 0.30167171359062195, |
| "learning_rate": 2.2683083764122627e-11, |
| "loss": 1.2560240030288696, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.307142857142857, |
| "grad_norm": 0.19819043576717377, |
| "learning_rate": 2.2560053367510623e-11, |
| "loss": 1.562985897064209, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.310714285714286, |
| "grad_norm": 0.3227558135986328, |
| "learning_rate": 2.24375257457344e-11, |
| "loss": 0.941666841506958, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.314285714285714, |
| "grad_norm": 0.30193987488746643, |
| "learning_rate": 2.2315502797804678e-11, |
| "loss": 1.497104525566101, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.317857142857143, |
| "grad_norm": 0.22983092069625854, |
| "learning_rate": 2.2193986414910348e-11, |
| "loss": 1.3992904424667358, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.3214285714285716, |
| "grad_norm": 0.3726719617843628, |
| "learning_rate": 2.2072978480389284e-11, |
| "loss": 1.1520694494247437, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.325, |
| "grad_norm": 0.43815112113952637, |
| "learning_rate": 2.1952480869699036e-11, |
| "loss": 1.7719744443893433, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.3285714285714287, |
| "grad_norm": 0.3036911189556122, |
| "learning_rate": 2.1832495450387936e-11, |
| "loss": 1.3131693601608276, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.3321428571428573, |
| "grad_norm": 0.4145776331424713, |
| "learning_rate": 2.1713024082065964e-11, |
| "loss": 1.334944486618042, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.335714285714286, |
| "grad_norm": 0.44712337851524353, |
| "learning_rate": 2.1594068616376053e-11, |
| "loss": 1.4629075527191162, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.3392857142857144, |
| "grad_norm": 0.29300954937934875, |
| "learning_rate": 2.1475630896965337e-11, |
| "loss": 1.3952922821044922, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.342857142857143, |
| "grad_norm": 0.2007542997598648, |
| "learning_rate": 2.1357712759456594e-11, |
| "loss": 1.0657954216003418, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.3464285714285715, |
| "grad_norm": 0.24710141122341156, |
| "learning_rate": 2.1240316031419794e-11, |
| "loss": 0.8775244355201721, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.2989410161972046, |
| "learning_rate": 2.1123442532343767e-11, |
| "loss": 1.389277458190918, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.3535714285714286, |
| "grad_norm": 0.3632841408252716, |
| "learning_rate": 2.1007094073607998e-11, |
| "loss": 1.512230634689331, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 0.20877733826637268, |
| "learning_rate": 2.089127245845461e-11, |
| "loss": 1.338801383972168, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.3607142857142858, |
| "grad_norm": 0.14346203207969666, |
| "learning_rate": 2.0775979481960342e-11, |
| "loss": 1.1795090436935425, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.3642857142857143, |
| "grad_norm": 0.32592296600341797, |
| "learning_rate": 2.0661216931008714e-11, |
| "loss": 1.364555835723877, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.367857142857143, |
| "grad_norm": 2.3708178997039795, |
| "learning_rate": 2.0546986584262438e-11, |
| "loss": 1.4721930027008057, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.3714285714285714, |
| "grad_norm": 0.19912676513195038, |
| "learning_rate": 2.043329021213577e-11, |
| "loss": 1.4108600616455078, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.20013919472694397, |
| "learning_rate": 2.032012957676708e-11, |
| "loss": 1.2515854835510254, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.3785714285714286, |
| "grad_norm": 0.29759207367897034, |
| "learning_rate": 2.0207506431991558e-11, |
| "loss": 1.568236231803894, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.382142857142857, |
| "grad_norm": 0.18850746750831604, |
| "learning_rate": 2.0095422523314016e-11, |
| "loss": 1.3757435083389282, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.3857142857142857, |
| "grad_norm": 0.16454045474529266, |
| "learning_rate": 1.9983879587881848e-11, |
| "loss": 1.2330865859985352, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.3892857142857142, |
| "grad_norm": 0.13228176534175873, |
| "learning_rate": 1.9872879354458114e-11, |
| "loss": 1.2531322240829468, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.392857142857143, |
| "grad_norm": 0.17047590017318726, |
| "learning_rate": 1.976242354339471e-11, |
| "loss": 1.392430305480957, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.3964285714285714, |
| "grad_norm": 0.964780330657959, |
| "learning_rate": 1.9652513866605748e-11, |
| "loss": 1.2138177156448364, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.20008714497089386, |
| "learning_rate": 1.9543152027541e-11, |
| "loss": 1.3786259889602661, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.4035714285714285, |
| "grad_norm": 0.2520330250263214, |
| "learning_rate": 1.9434339721159505e-11, |
| "loss": 1.5099730491638184, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.407142857142857, |
| "grad_norm": 0.26674994826316833, |
| "learning_rate": 1.932607863390329e-11, |
| "loss": 1.4184703826904297, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.4107142857142856, |
| "grad_norm": 0.3661814332008362, |
| "learning_rate": 1.9218370443671232e-11, |
| "loss": 1.5577000379562378, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.414285714285714, |
| "grad_norm": 0.1575201451778412, |
| "learning_rate": 1.9111216819793098e-11, |
| "loss": 1.4460270404815674, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.4178571428571427, |
| "grad_norm": 0.2647629678249359, |
| "learning_rate": 1.900461942300359e-11, |
| "loss": 1.242174506187439, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.4214285714285713, |
| "grad_norm": 0.22450336813926697, |
| "learning_rate": 1.8898579905416678e-11, |
| "loss": 1.143039345741272, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.425, |
| "grad_norm": 0.14635813236236572, |
| "learning_rate": 1.8793099910499927e-11, |
| "loss": 1.1687003374099731, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 0.14772412180900574, |
| "learning_rate": 1.8688181073049123e-11, |
| "loss": 1.2316815853118896, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.432142857142857, |
| "grad_norm": 0.1730790138244629, |
| "learning_rate": 1.8583825019162844e-11, |
| "loss": 1.316425085067749, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.435714285714286, |
| "grad_norm": 0.1888405829668045, |
| "learning_rate": 1.8480033366217288e-11, |
| "loss": 1.437265396118164, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.439285714285714, |
| "grad_norm": 0.22746612131595612, |
| "learning_rate": 1.837680772284123e-11, |
| "loss": 1.5048937797546387, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.442857142857143, |
| "grad_norm": 0.24730168282985687, |
| "learning_rate": 1.8274149688891058e-11, |
| "loss": 1.3409751653671265, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.4464285714285716, |
| "grad_norm": 0.2903384268283844, |
| "learning_rate": 1.8172060855425984e-11, |
| "loss": 1.6011804342269897, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.4629653990268707, |
| "learning_rate": 1.8070542804683405e-11, |
| "loss": 1.39284086227417, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.4535714285714287, |
| "grad_norm": 0.1703772395849228, |
| "learning_rate": 1.7969597110054342e-11, |
| "loss": 1.1957430839538574, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.4571428571428573, |
| "grad_norm": 0.46089184284210205, |
| "learning_rate": 1.786922533605913e-11, |
| "loss": 1.4656089544296265, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.460714285714286, |
| "grad_norm": 0.18819937109947205, |
| "learning_rate": 1.776942903832306e-11, |
| "loss": 1.223941445350647, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.4642857142857144, |
| "grad_norm": 0.23815391957759857, |
| "learning_rate": 1.7670209763552342e-11, |
| "loss": 1.3691385984420776, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.467857142857143, |
| "grad_norm": 0.17686067521572113, |
| "learning_rate": 1.7571569049510138e-11, |
| "loss": 1.317873239517212, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.4714285714285715, |
| "grad_norm": 0.38989320397377014, |
| "learning_rate": 1.747350842499271e-11, |
| "loss": 1.466494083404541, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.475, |
| "grad_norm": 0.22029191255569458, |
| "learning_rate": 1.7376029409805707e-11, |
| "loss": 1.3847404718399048, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.4785714285714286, |
| "grad_norm": 0.27662548422813416, |
| "learning_rate": 1.7279133514740644e-11, |
| "loss": 1.37192702293396, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.482142857142857, |
| "grad_norm": 0.23457181453704834, |
| "learning_rate": 1.7182822241551434e-11, |
| "loss": 1.1607106924057007, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.4857142857142858, |
| "grad_norm": 0.1711721569299698, |
| "learning_rate": 1.708709708293121e-11, |
| "loss": 1.1327881813049316, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.4892857142857143, |
| "grad_norm": 0.24287395179271698, |
| "learning_rate": 1.699195952248908e-11, |
| "loss": 1.4552991390228271, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.492857142857143, |
| "grad_norm": 0.3649357557296753, |
| "learning_rate": 1.6897411034727216e-11, |
| "loss": 1.497635006904602, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.4964285714285714, |
| "grad_norm": 0.35778528451919556, |
| "learning_rate": 1.680345308501795e-11, |
| "loss": 1.3948183059692383, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.1573958396911621, |
| "learning_rate": 1.6710087129581085e-11, |
| "loss": 1.3863242864608765, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.5035714285714286, |
| "grad_norm": 0.20089280605316162, |
| "learning_rate": 1.6617314615461325e-11, |
| "loss": 1.3896986246109009, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.507142857142857, |
| "grad_norm": 0.23711322247982025, |
| "learning_rate": 1.6525136980505835e-11, |
| "loss": 1.4924380779266357, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.5107142857142857, |
| "grad_norm": 0.20246915519237518, |
| "learning_rate": 1.6433555653341975e-11, |
| "loss": 1.4420058727264404, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.5142857142857142, |
| "grad_norm": 0.20075786113739014, |
| "learning_rate": 1.6342572053355166e-11, |
| "loss": 1.2571265697479248, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.517857142857143, |
| "grad_norm": 0.2846459150314331, |
| "learning_rate": 1.625218759066685e-11, |
| "loss": 0.9605814218521118, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.5214285714285714, |
| "grad_norm": 0.2503245770931244, |
| "learning_rate": 1.6162403666112653e-11, |
| "loss": 1.0498977899551392, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.525, |
| "grad_norm": 0.2854737639427185, |
| "learning_rate": 1.607322167122069e-11, |
| "loss": 1.3054733276367188, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.5285714285714285, |
| "grad_norm": 0.4330965280532837, |
| "learning_rate": 1.598464298819002e-11, |
| "loss": 1.088234305381775, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.532142857142857, |
| "grad_norm": 0.23491685092449188, |
| "learning_rate": 1.5896668989869152e-11, |
| "loss": 1.2604817152023315, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.5357142857142856, |
| "grad_norm": 0.20269037783145905, |
| "learning_rate": 1.5809301039734815e-11, |
| "loss": 1.2018966674804688, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.539285714285714, |
| "grad_norm": 0.15246133506298065, |
| "learning_rate": 1.572254049187084e-11, |
| "loss": 1.3999316692352295, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.5428571428571427, |
| "grad_norm": 0.13255059719085693, |
| "learning_rate": 1.5636388690947122e-11, |
| "loss": 1.1051727533340454, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.5464285714285713, |
| "grad_norm": 0.4702497720718384, |
| "learning_rate": 1.5550846972198852e-11, |
| "loss": 1.3160711526870728, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 0.12075657397508621, |
| "learning_rate": 1.546591666140573e-11, |
| "loss": 1.3880804777145386, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.553571428571429, |
| "grad_norm": 0.2639578580856323, |
| "learning_rate": 1.5381599074871512e-11, |
| "loss": 1.1285425424575806, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.557142857142857, |
| "grad_norm": 0.30509477853775024, |
| "learning_rate": 1.5297895519403562e-11, |
| "loss": 1.5178343057632446, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.560714285714286, |
| "grad_norm": 0.5000651478767395, |
| "learning_rate": 1.5214807292292565e-11, |
| "loss": 1.17734694480896, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.564285714285714, |
| "grad_norm": 0.2977011799812317, |
| "learning_rate": 1.513233568129249e-11, |
| "loss": 1.3034253120422363, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.567857142857143, |
| "grad_norm": 0.2919791638851166, |
| "learning_rate": 1.5050481964600582e-11, |
| "loss": 1.330073595046997, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.281448096036911, |
| "learning_rate": 1.4969247410837587e-11, |
| "loss": 1.3406702280044556, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.575, |
| "grad_norm": 0.20965971052646637, |
| "learning_rate": 1.4888633279028066e-11, |
| "loss": 1.233846664428711, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.5785714285714287, |
| "grad_norm": 0.13012030720710754, |
| "learning_rate": 1.4808640818580886e-11, |
| "loss": 1.1111416816711426, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.5821428571428573, |
| "grad_norm": 0.2702961266040802, |
| "learning_rate": 1.4729271269269823e-11, |
| "loss": 1.3957263231277466, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.585714285714286, |
| "grad_norm": 0.5005801320075989, |
| "learning_rate": 1.4650525861214453e-11, |
| "loss": 1.228448748588562, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.5892857142857144, |
| "grad_norm": 0.2977920472621918, |
| "learning_rate": 1.4572405814860953e-11, |
| "loss": 1.1848669052124023, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.592857142857143, |
| "grad_norm": 0.19575802981853485, |
| "learning_rate": 1.4494912340963285e-11, |
| "loss": 1.0956735610961914, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.5964285714285715, |
| "grad_norm": 0.285162091255188, |
| "learning_rate": 1.441804664056437e-11, |
| "loss": 1.3711469173431396, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.3140964210033417, |
| "learning_rate": 1.434180990497751e-11, |
| "loss": 1.2784916162490845, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.6035714285714286, |
| "grad_norm": 0.2630551755428314, |
| "learning_rate": 1.4266203315767917e-11, |
| "loss": 1.3073674440383911, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.607142857142857, |
| "grad_norm": 0.276987224817276, |
| "learning_rate": 1.4191228044734386e-11, |
| "loss": 1.3831404447555542, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.6107142857142858, |
| "grad_norm": 0.21835049986839294, |
| "learning_rate": 1.411688525389114e-11, |
| "loss": 1.4132722616195679, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.6142857142857143, |
| "grad_norm": 0.36612504720687866, |
| "learning_rate": 1.4043176095449842e-11, |
| "loss": 1.2454001903533936, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.617857142857143, |
| "grad_norm": 0.9990705847740173, |
| "learning_rate": 1.397010171180171e-11, |
| "loss": 1.2597897052764893, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.6214285714285714, |
| "grad_norm": 0.28593096137046814, |
| "learning_rate": 1.3897663235499797e-11, |
| "loss": 1.3988193273544312, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 0.26052477955818176, |
| "learning_rate": 1.382586178924149e-11, |
| "loss": 1.1380650997161865, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.6285714285714286, |
| "grad_norm": 1.1111445426940918, |
| "learning_rate": 1.3754698485851073e-11, |
| "loss": 1.457688808441162, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.632142857142857, |
| "grad_norm": 0.7538399696350098, |
| "learning_rate": 1.3684174428262489e-11, |
| "loss": 1.3109138011932373, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.6357142857142857, |
| "grad_norm": 0.23928874731063843, |
| "learning_rate": 1.3614290709502243e-11, |
| "loss": 1.0036344528198242, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.6392857142857142, |
| "grad_norm": 0.30908122658729553, |
| "learning_rate": 1.354504841267246e-11, |
| "loss": 0.977489709854126, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.642857142857143, |
| "grad_norm": 0.24736160039901733, |
| "learning_rate": 1.3476448610934103e-11, |
| "loss": 1.356593132019043, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.6464285714285714, |
| "grad_norm": 0.25912997126579285, |
| "learning_rate": 1.3408492367490344e-11, |
| "loss": 1.3192540407180786, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.24118413031101227, |
| "learning_rate": 1.334118073557008e-11, |
| "loss": 1.3637826442718506, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.6535714285714285, |
| "grad_norm": 0.1752414107322693, |
| "learning_rate": 1.3274514758411593e-11, |
| "loss": 1.3229981660842896, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.657142857142857, |
| "grad_norm": 0.19834518432617188, |
| "learning_rate": 1.3208495469246445e-11, |
| "loss": 1.365134596824646, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.6607142857142856, |
| "grad_norm": 0.19379328191280365, |
| "learning_rate": 1.3143123891283355e-11, |
| "loss": 1.3100693225860596, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.664285714285714, |
| "grad_norm": 0.17539915442466736, |
| "learning_rate": 1.307840103769245e-11, |
| "loss": 1.3072569370269775, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.6678571428571427, |
| "grad_norm": 0.1835700273513794, |
| "learning_rate": 1.3014327911589493e-11, |
| "loss": 1.2993477582931519, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.6714285714285713, |
| "grad_norm": 0.16093674302101135, |
| "learning_rate": 1.2950905506020382e-11, |
| "loss": 1.2758489847183228, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.675, |
| "grad_norm": 0.24639339745044708, |
| "learning_rate": 1.288813480394571e-11, |
| "loss": 1.3632615804672241, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.678571428571429, |
| "grad_norm": 0.2552201747894287, |
| "learning_rate": 1.2826016778225578e-11, |
| "loss": 1.3620305061340332, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.682142857142857, |
| "grad_norm": 0.24661146104335785, |
| "learning_rate": 1.2764552391604466e-11, |
| "loss": 1.4900641441345215, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.685714285714286, |
| "grad_norm": 0.3591724634170532, |
| "learning_rate": 1.2703742596696384e-11, |
| "loss": 1.1812725067138672, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.689285714285714, |
| "grad_norm": 0.221350759267807, |
| "learning_rate": 1.2643588335970021e-11, |
| "loss": 1.1195027828216553, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.692857142857143, |
| "grad_norm": 0.21987511217594147, |
| "learning_rate": 1.2584090541734216e-11, |
| "loss": 1.333940863609314, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.696428571428571, |
| "grad_norm": 0.16496390104293823, |
| "learning_rate": 1.252525013612346e-11, |
| "loss": 1.335278034210205, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.2967815399169922, |
| "learning_rate": 1.2467068031083622e-11, |
| "loss": 1.312089204788208, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.7035714285714287, |
| "grad_norm": 0.16498564183712006, |
| "learning_rate": 1.2409545128357805e-11, |
| "loss": 1.184598445892334, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.7071428571428573, |
| "grad_norm": 0.16838309168815613, |
| "learning_rate": 1.235268231947238e-11, |
| "loss": 1.2930303812026978, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.710714285714286, |
| "grad_norm": 0.27942174673080444, |
| "learning_rate": 1.229648048572317e-11, |
| "loss": 1.2529990673065186, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 0.16519573330879211, |
| "learning_rate": 1.2240940498161798e-11, |
| "loss": 1.2533071041107178, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.717857142857143, |
| "grad_norm": 0.358005553483963, |
| "learning_rate": 1.2186063217582143e-11, |
| "loss": 1.2952263355255127, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.7214285714285715, |
| "grad_norm": 0.3745562732219696, |
| "learning_rate": 1.2131849494507061e-11, |
| "loss": 1.3314462900161743, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.725, |
| "grad_norm": 0.20380382239818573, |
| "learning_rate": 1.2078300169175156e-11, |
| "loss": 1.3251063823699951, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.7285714285714286, |
| "grad_norm": 0.3119201064109802, |
| "learning_rate": 1.20254160715278e-11, |
| "loss": 1.3549195528030396, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.732142857142857, |
| "grad_norm": 0.22798198461532593, |
| "learning_rate": 1.1973198021196207e-11, |
| "loss": 1.2130204439163208, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.7357142857142858, |
| "grad_norm": 0.2302948534488678, |
| "learning_rate": 1.1921646827488806e-11, |
| "loss": 1.32042396068573, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.7392857142857143, |
| "grad_norm": 0.22557149827480316, |
| "learning_rate": 1.1870763289378629e-11, |
| "loss": 1.2422351837158203, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.742857142857143, |
| "grad_norm": 0.15642249584197998, |
| "learning_rate": 1.182054819549098e-11, |
| "loss": 1.2506171464920044, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.7464285714285714, |
| "grad_norm": 0.32084932923316956, |
| "learning_rate": 1.1771002324091182e-11, |
| "loss": 1.2349700927734375, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.4902629554271698, |
| "learning_rate": 1.1722126443072518e-11, |
| "loss": 1.4166244268417358, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.7535714285714286, |
| "grad_norm": 0.17237702012062073, |
| "learning_rate": 1.1673921309944354e-11, |
| "loss": 1.020308017730713, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.757142857142857, |
| "grad_norm": 0.1602608561515808, |
| "learning_rate": 1.1626387671820362e-11, |
| "loss": 1.2884690761566162, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.7607142857142857, |
| "grad_norm": 0.16682331264019012, |
| "learning_rate": 1.1579526265406972e-11, |
| "loss": 1.3690019845962524, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.7642857142857142, |
| "grad_norm": 0.1700551062822342, |
| "learning_rate": 1.1533337816991932e-11, |
| "loss": 1.2616186141967773, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.767857142857143, |
| "grad_norm": 0.16919812560081482, |
| "learning_rate": 1.1487823042433062e-11, |
| "loss": 1.307798981666565, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.7714285714285714, |
| "grad_norm": 0.11971700936555862, |
| "learning_rate": 1.1442982647147166e-11, |
| "loss": 1.3186330795288086, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.775, |
| "grad_norm": 0.15739700198173523, |
| "learning_rate": 1.1398817326099093e-11, |
| "loss": 1.1444510221481323, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.7785714285714285, |
| "grad_norm": 0.15496599674224854, |
| "learning_rate": 1.1355327763790944e-11, |
| "loss": 1.236445665359497, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.782142857142857, |
| "grad_norm": 0.2167055755853653, |
| "learning_rate": 1.1312514634251493e-11, |
| "loss": 1.1993296146392822, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.14245696365833282, |
| "learning_rate": 1.1270378601025748e-11, |
| "loss": 1.2451859712600708, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.789285714285714, |
| "grad_norm": 0.15736693143844604, |
| "learning_rate": 1.1228920317164623e-11, |
| "loss": 1.2228260040283203, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.7928571428571427, |
| "grad_norm": 0.13533014059066772, |
| "learning_rate": 1.1188140425214861e-11, |
| "loss": 1.2670307159423828, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.7964285714285713, |
| "grad_norm": 0.1510917991399765, |
| "learning_rate": 1.1148039557209056e-11, |
| "loss": 1.227577805519104, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.12420517206192017, |
| "learning_rate": 1.1108618334655843e-11, |
| "loss": 1.2005560398101807, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.803571428571429, |
| "grad_norm": 0.18340545892715454, |
| "learning_rate": 1.1069877368530302e-11, |
| "loss": 1.2663429975509644, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.807142857142857, |
| "grad_norm": 0.15353085100650787, |
| "learning_rate": 1.1031817259264454e-11, |
| "loss": 1.251989722251892, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.810714285714286, |
| "grad_norm": 0.12760750949382782, |
| "learning_rate": 1.0994438596737972e-11, |
| "loss": 1.1478347778320312, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.814285714285714, |
| "grad_norm": 1.7780303955078125, |
| "learning_rate": 1.0957741960269049e-11, |
| "loss": 1.2149173021316528, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.817857142857143, |
| "grad_norm": 0.13187356293201447, |
| "learning_rate": 1.092172791860539e-11, |
| "loss": 1.1681101322174072, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.821428571428571, |
| "grad_norm": 0.18063776195049286, |
| "learning_rate": 1.0886397029915413e-11, |
| "loss": 1.199445366859436, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.825, |
| "grad_norm": 0.1410759687423706, |
| "learning_rate": 1.0851749841779609e-11, |
| "loss": 1.1747792959213257, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.8285714285714287, |
| "grad_norm": 0.19593161344528198, |
| "learning_rate": 1.0817786891182041e-11, |
| "loss": 1.1981396675109863, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.8321428571428573, |
| "grad_norm": 0.14246626198291779, |
| "learning_rate": 1.078450870450203e-11, |
| "loss": 1.2036207914352417, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.835714285714286, |
| "grad_norm": 0.25931403040885925, |
| "learning_rate": 1.0751915797505985e-11, |
| "loss": 1.1870161294937134, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.8392857142857144, |
| "grad_norm": 0.18567614257335663, |
| "learning_rate": 1.0720008675339403e-11, |
| "loss": 1.1964879035949707, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.842857142857143, |
| "grad_norm": 0.1569967120885849, |
| "learning_rate": 1.0688787832519084e-11, |
| "loss": 1.234360933303833, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.8464285714285715, |
| "grad_norm": 0.17869971692562103, |
| "learning_rate": 1.0658253752925418e-11, |
| "loss": 1.208680510520935, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 1.144254207611084, |
| "learning_rate": 1.0628406909794908e-11, |
| "loss": 1.2222168445587158, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.8535714285714286, |
| "grad_norm": 0.13791275024414062, |
| "learning_rate": 1.059924776571283e-11, |
| "loss": 1.2378734350204468, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.1584549993276596, |
| "learning_rate": 1.0570776772606056e-11, |
| "loss": 1.2322474718093872, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8607142857142858, |
| "grad_norm": 0.1755964756011963, |
| "learning_rate": 1.0542994371736075e-11, |
| "loss": 1.1988189220428467, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.8642857142857143, |
| "grad_norm": 0.21343602240085602, |
| "learning_rate": 1.0515900993692126e-11, |
| "loss": 1.228104829788208, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.867857142857143, |
| "grad_norm": 0.15894442796707153, |
| "learning_rate": 1.048949705838454e-11, |
| "loss": 1.1797680854797363, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.8714285714285714, |
| "grad_norm": 0.1659688502550125, |
| "learning_rate": 1.0463782975038227e-11, |
| "loss": 1.2225085496902466, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.2029978483915329, |
| "learning_rate": 1.0438759142186335e-11, |
| "loss": 1.2150154113769531, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.8785714285714286, |
| "grad_norm": 0.17542648315429688, |
| "learning_rate": 1.0414425947664074e-11, |
| "loss": 1.2423878908157349, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.882142857142857, |
| "grad_norm": 0.18975086510181427, |
| "learning_rate": 1.0390783768602692e-11, |
| "loss": 1.227378010749817, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.8857142857142857, |
| "grad_norm": 0.24369192123413086, |
| "learning_rate": 1.0367832971423663e-11, |
| "loss": 1.1613997220993042, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.8892857142857142, |
| "grad_norm": 0.15854114294052124, |
| "learning_rate": 1.0345573911832976e-11, |
| "loss": 1.1997004747390747, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.892857142857143, |
| "grad_norm": 0.15320441126823425, |
| "learning_rate": 1.0324006934815622e-11, |
| "loss": 1.2283430099487305, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.8964285714285714, |
| "grad_norm": 0.24903114140033722, |
| "learning_rate": 1.0303132374630275e-11, |
| "loss": 1.2436856031417847, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.13410496711730957, |
| "learning_rate": 1.0282950554804083e-11, |
| "loss": 1.2366242408752441, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.9035714285714285, |
| "grad_norm": 0.17165103554725647, |
| "learning_rate": 1.0263461788127682e-11, |
| "loss": 1.2125755548477173, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.907142857142857, |
| "grad_norm": 0.1607826203107834, |
| "learning_rate": 1.0244666376650306e-11, |
| "loss": 1.1719647645950317, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.9107142857142856, |
| "grad_norm": 0.2778455913066864, |
| "learning_rate": 1.0226564611675146e-11, |
| "loss": 1.2128190994262695, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.914285714285714, |
| "grad_norm": 0.16275779902935028, |
| "learning_rate": 1.020915677375483e-11, |
| "loss": 1.229934811592102, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.9178571428571427, |
| "grad_norm": 0.1258879005908966, |
| "learning_rate": 1.019244313268704e-11, |
| "loss": 1.1735552549362183, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.9214285714285713, |
| "grad_norm": 0.2216557413339615, |
| "learning_rate": 1.0176423947510376e-11, |
| "loss": 1.2107999324798584, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.925, |
| "grad_norm": 0.19989758729934692, |
| "learning_rate": 1.0161099466500318e-11, |
| "loss": 1.1985503435134888, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.928571428571429, |
| "grad_norm": 0.21431688964366913, |
| "learning_rate": 1.014646992716537e-11, |
| "loss": 1.25062894821167, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.932142857142857, |
| "grad_norm": 0.15615607798099518, |
| "learning_rate": 1.0132535556243399e-11, |
| "loss": 1.2564295530319214, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.935714285714286, |
| "grad_norm": 0.16791290044784546, |
| "learning_rate": 1.0119296569698111e-11, |
| "loss": 1.2255940437316895, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.939285714285714, |
| "grad_norm": 0.16198527812957764, |
| "learning_rate": 1.01067531727157e-11, |
| "loss": 1.2428375482559204, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.942857142857143, |
| "grad_norm": 0.16298729181289673, |
| "learning_rate": 1.0094905559701677e-11, |
| "loss": 1.2208831310272217, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.946428571428571, |
| "grad_norm": 0.2169935405254364, |
| "learning_rate": 1.0083753914277857e-11, |
| "loss": 1.1699635982513428, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.1867915242910385, |
| "learning_rate": 1.0073298409279491e-11, |
| "loss": 1.1837371587753296, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.9535714285714287, |
| "grad_norm": 0.19683191180229187, |
| "learning_rate": 1.006353920675263e-11, |
| "loss": 1.2122198343276978, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.9571428571428573, |
| "grad_norm": 0.16676557064056396, |
| "learning_rate": 1.0054476457951567e-11, |
| "loss": 1.1825740337371826, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.960714285714286, |
| "grad_norm": 0.651061475276947, |
| "learning_rate": 1.0046110303336517e-11, |
| "loss": 1.1873282194137573, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.9642857142857144, |
| "grad_norm": 0.16477417945861816, |
| "learning_rate": 1.0038440872571455e-11, |
| "loss": 1.236222743988037, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.967857142857143, |
| "grad_norm": 0.1982717663049698, |
| "learning_rate": 1.0031468284522064e-11, |
| "loss": 1.2251464128494263, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.9714285714285715, |
| "grad_norm": 0.1778063327074051, |
| "learning_rate": 1.0025192647253938e-11, |
| "loss": 1.2394579648971558, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.975, |
| "grad_norm": 0.7245147824287415, |
| "learning_rate": 1.0019614058030873e-11, |
| "loss": 1.1742377281188965, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.9785714285714286, |
| "grad_norm": 0.16432130336761475, |
| "learning_rate": 1.0014732603313377e-11, |
| "loss": 1.194645643234253, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.982142857142857, |
| "grad_norm": 0.16144277155399323, |
| "learning_rate": 1.0010548358757328e-11, |
| "loss": 1.2105306386947632, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.9857142857142858, |
| "grad_norm": 0.18343304097652435, |
| "learning_rate": 1.0007061389212795e-11, |
| "loss": 1.2389037609100342, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.9892857142857143, |
| "grad_norm": 0.4671591520309448, |
| "learning_rate": 1.0004271748723041e-11, |
| "loss": 1.2875266075134277, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.992857142857143, |
| "grad_norm": 0.557140588760376, |
| "learning_rate": 1.0002179480523687e-11, |
| "loss": 1.168911337852478, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.9964285714285714, |
| "grad_norm": 0.35813069343566895, |
| "learning_rate": 1.0000784617042024e-11, |
| "loss": 1.0809228420257568, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.509574294090271, |
| "learning_rate": 1.0000087179896533e-11, |
| "loss": 0.9652191400527954, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1680, |
| "total_flos": 2.513699577954042e+18, |
| "train_loss": 1.4121140867471695, |
| "train_runtime": 17187.153, |
| "train_samples_per_second": 1.564, |
| "train_steps_per_second": 0.098 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1680, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.513699577954042e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|