| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1053, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002849002849002849, |
| "grad_norm": 56.0523474942412, |
| "learning_rate": 0.0, |
| "loss": 11.2375, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005698005698005698, |
| "grad_norm": 56.692305048985084, |
| "learning_rate": 4.7169811320754717e-07, |
| "loss": 11.1498, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008547008547008548, |
| "grad_norm": 56.20671215302259, |
| "learning_rate": 9.433962264150943e-07, |
| "loss": 11.1995, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011396011396011397, |
| "grad_norm": 57.07016050341919, |
| "learning_rate": 1.4150943396226415e-06, |
| "loss": 11.17, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014245014245014245, |
| "grad_norm": 55.67070270531962, |
| "learning_rate": 1.8867924528301887e-06, |
| "loss": 11.2141, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017094017094017096, |
| "grad_norm": 67.41256511207831, |
| "learning_rate": 2.358490566037736e-06, |
| "loss": 10.8094, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.019943019943019943, |
| "grad_norm": 66.98704800790512, |
| "learning_rate": 2.830188679245283e-06, |
| "loss": 10.6191, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.022792022792022793, |
| "grad_norm": 99.53214132413315, |
| "learning_rate": 3.30188679245283e-06, |
| "loss": 9.2224, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 107.3638343426545, |
| "learning_rate": 3.7735849056603773e-06, |
| "loss": 8.9747, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "grad_norm": 65.50392578974085, |
| "learning_rate": 4.245283018867925e-06, |
| "loss": 3.7694, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03133903133903134, |
| "grad_norm": 52.753132297048275, |
| "learning_rate": 4.716981132075472e-06, |
| "loss": 3.1517, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03418803418803419, |
| "grad_norm": 34.22470483345867, |
| "learning_rate": 5.188679245283019e-06, |
| "loss": 2.3701, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.037037037037037035, |
| "grad_norm": 8.289999698115077, |
| "learning_rate": 5.660377358490566e-06, |
| "loss": 1.5114, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.039886039886039885, |
| "grad_norm": 4.907971366358463, |
| "learning_rate": 6.132075471698113e-06, |
| "loss": 1.3167, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.042735042735042736, |
| "grad_norm": 3.878187177104665, |
| "learning_rate": 6.60377358490566e-06, |
| "loss": 1.1878, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.045584045584045586, |
| "grad_norm": 2.614057256962741, |
| "learning_rate": 7.0754716981132075e-06, |
| "loss": 1.134, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04843304843304843, |
| "grad_norm": 2.1563070672973015, |
| "learning_rate": 7.547169811320755e-06, |
| "loss": 1.0503, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 5.014628550733802, |
| "learning_rate": 8.018867924528302e-06, |
| "loss": 0.968, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05413105413105413, |
| "grad_norm": 5.938244849443368, |
| "learning_rate": 8.49056603773585e-06, |
| "loss": 0.919, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "grad_norm": 1.2998505975680303, |
| "learning_rate": 8.962264150943396e-06, |
| "loss": 0.9074, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05982905982905983, |
| "grad_norm": 1.0231800381234573, |
| "learning_rate": 9.433962264150944e-06, |
| "loss": 0.864, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06267806267806268, |
| "grad_norm": 0.791900948928139, |
| "learning_rate": 9.905660377358492e-06, |
| "loss": 0.8147, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06552706552706553, |
| "grad_norm": 0.7091561135310548, |
| "learning_rate": 1.0377358490566038e-05, |
| "loss": 0.8188, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06837606837606838, |
| "grad_norm": 0.7166203845304848, |
| "learning_rate": 1.0849056603773586e-05, |
| "loss": 0.7601, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07122507122507123, |
| "grad_norm": 0.8012651357824443, |
| "learning_rate": 1.1320754716981132e-05, |
| "loss": 0.7598, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07407407407407407, |
| "grad_norm": 0.6383142924108046, |
| "learning_rate": 1.179245283018868e-05, |
| "loss": 0.7514, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.5725581821662165, |
| "learning_rate": 1.2264150943396227e-05, |
| "loss": 0.7071, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07977207977207977, |
| "grad_norm": 0.6425822567656265, |
| "learning_rate": 1.2735849056603775e-05, |
| "loss": 0.7076, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08262108262108261, |
| "grad_norm": 0.6708098823752852, |
| "learning_rate": 1.320754716981132e-05, |
| "loss": 0.6887, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 0.5685632358364251, |
| "learning_rate": 1.3679245283018869e-05, |
| "loss": 0.6724, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08831908831908832, |
| "grad_norm": 0.44124658499047564, |
| "learning_rate": 1.4150943396226415e-05, |
| "loss": 0.6856, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09116809116809117, |
| "grad_norm": 0.46800011010506615, |
| "learning_rate": 1.4622641509433963e-05, |
| "loss": 0.6644, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09401709401709402, |
| "grad_norm": 0.5235273680352841, |
| "learning_rate": 1.509433962264151e-05, |
| "loss": 0.6491, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09686609686609686, |
| "grad_norm": 0.46201236009771707, |
| "learning_rate": 1.5566037735849056e-05, |
| "loss": 0.6403, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09971509971509972, |
| "grad_norm": 0.38923522274855954, |
| "learning_rate": 1.6037735849056604e-05, |
| "loss": 0.6325, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 0.38662678714338583, |
| "learning_rate": 1.650943396226415e-05, |
| "loss": 0.6432, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10541310541310542, |
| "grad_norm": 0.40192513620663517, |
| "learning_rate": 1.69811320754717e-05, |
| "loss": 0.6126, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10826210826210826, |
| "grad_norm": 0.3948475707755113, |
| "learning_rate": 1.7452830188679244e-05, |
| "loss": 0.647, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.3356078989163562, |
| "learning_rate": 1.7924528301886792e-05, |
| "loss": 0.6174, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "grad_norm": 0.30046759356107233, |
| "learning_rate": 1.839622641509434e-05, |
| "loss": 0.6072, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1168091168091168, |
| "grad_norm": 0.30888114755625345, |
| "learning_rate": 1.8867924528301888e-05, |
| "loss": 0.5935, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11965811965811966, |
| "grad_norm": 0.30715993049476403, |
| "learning_rate": 1.9339622641509436e-05, |
| "loss": 0.6021, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1225071225071225, |
| "grad_norm": 0.29429080617445386, |
| "learning_rate": 1.9811320754716984e-05, |
| "loss": 0.5897, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12535612535612536, |
| "grad_norm": 0.2806609748601335, |
| "learning_rate": 2.0283018867924532e-05, |
| "loss": 0.5712, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 0.3035543416731401, |
| "learning_rate": 2.0754716981132076e-05, |
| "loss": 0.5879, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13105413105413105, |
| "grad_norm": 0.2757782348628876, |
| "learning_rate": 2.1226415094339624e-05, |
| "loss": 0.5777, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1339031339031339, |
| "grad_norm": 0.2639801457025701, |
| "learning_rate": 2.1698113207547172e-05, |
| "loss": 0.5605, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.13675213675213677, |
| "grad_norm": 0.2712816187794645, |
| "learning_rate": 2.216981132075472e-05, |
| "loss": 0.5873, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1396011396011396, |
| "grad_norm": 0.28871963465497535, |
| "learning_rate": 2.2641509433962265e-05, |
| "loss": 0.5727, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "grad_norm": 0.28060397078353755, |
| "learning_rate": 2.3113207547169813e-05, |
| "loss": 0.5602, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1452991452991453, |
| "grad_norm": 0.2771839138643321, |
| "learning_rate": 2.358490566037736e-05, |
| "loss": 0.5543, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "grad_norm": 0.25817545404116393, |
| "learning_rate": 2.405660377358491e-05, |
| "loss": 0.5495, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.150997150997151, |
| "grad_norm": 0.2715327298990826, |
| "learning_rate": 2.4528301886792453e-05, |
| "loss": 0.5386, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.3009269107284128, |
| "learning_rate": 2.5e-05, |
| "loss": 0.54, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15669515669515668, |
| "grad_norm": 0.26603937399372396, |
| "learning_rate": 2.547169811320755e-05, |
| "loss": 0.5688, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15954415954415954, |
| "grad_norm": 0.23331692549766264, |
| "learning_rate": 2.5943396226415094e-05, |
| "loss": 0.5408, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1623931623931624, |
| "grad_norm": 0.257528511255327, |
| "learning_rate": 2.641509433962264e-05, |
| "loss": 0.5451, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.16524216524216523, |
| "grad_norm": 0.24892299069207233, |
| "learning_rate": 2.688679245283019e-05, |
| "loss": 0.5488, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.16809116809116809, |
| "grad_norm": 0.22893417370533484, |
| "learning_rate": 2.7358490566037738e-05, |
| "loss": 0.5427, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 0.2529033488136545, |
| "learning_rate": 2.7830188679245282e-05, |
| "loss": 0.5667, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1737891737891738, |
| "grad_norm": 0.2363533954890101, |
| "learning_rate": 2.830188679245283e-05, |
| "loss": 0.5535, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.17663817663817663, |
| "grad_norm": 0.22892978820463591, |
| "learning_rate": 2.8773584905660378e-05, |
| "loss": 0.5482, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 0.23882495609332313, |
| "learning_rate": 2.9245283018867926e-05, |
| "loss": 0.535, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.18233618233618235, |
| "grad_norm": 0.2506420779209896, |
| "learning_rate": 2.971698113207547e-05, |
| "loss": 0.5454, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.18518518518518517, |
| "grad_norm": 0.3063275398024107, |
| "learning_rate": 3.018867924528302e-05, |
| "loss": 0.5444, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18803418803418803, |
| "grad_norm": 0.23091797227336122, |
| "learning_rate": 3.0660377358490567e-05, |
| "loss": 0.5524, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1908831908831909, |
| "grad_norm": 0.26395478944072415, |
| "learning_rate": 3.113207547169811e-05, |
| "loss": 0.5063, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.19373219373219372, |
| "grad_norm": 0.24299930461895702, |
| "learning_rate": 3.160377358490566e-05, |
| "loss": 0.5369, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.19658119658119658, |
| "grad_norm": 0.24673885752288957, |
| "learning_rate": 3.207547169811321e-05, |
| "loss": 0.51, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "grad_norm": 0.2706880306834431, |
| "learning_rate": 3.254716981132075e-05, |
| "loss": 0.5264, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2022792022792023, |
| "grad_norm": 0.26249806371875767, |
| "learning_rate": 3.30188679245283e-05, |
| "loss": 0.5451, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 0.3079867132956617, |
| "learning_rate": 3.349056603773585e-05, |
| "loss": 0.5639, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.20797720797720798, |
| "grad_norm": 0.2677449528243656, |
| "learning_rate": 3.39622641509434e-05, |
| "loss": 0.5236, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.21082621082621084, |
| "grad_norm": 0.29313189476756446, |
| "learning_rate": 3.4433962264150943e-05, |
| "loss": 0.5238, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "grad_norm": 0.3077114867531552, |
| "learning_rate": 3.490566037735849e-05, |
| "loss": 0.5184, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.21652421652421652, |
| "grad_norm": 0.24841288456262042, |
| "learning_rate": 3.537735849056604e-05, |
| "loss": 0.5164, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21937321937321938, |
| "grad_norm": 0.28963777073614116, |
| "learning_rate": 3.5849056603773584e-05, |
| "loss": 0.5073, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.27823065639727734, |
| "learning_rate": 3.632075471698113e-05, |
| "loss": 0.5269, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.22507122507122507, |
| "grad_norm": 0.32870802313426584, |
| "learning_rate": 3.679245283018868e-05, |
| "loss": 0.5043, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "grad_norm": 0.27025584380220774, |
| "learning_rate": 3.7264150943396224e-05, |
| "loss": 0.5024, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.33251614434676685, |
| "learning_rate": 3.7735849056603776e-05, |
| "loss": 0.5305, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2336182336182336, |
| "grad_norm": 0.33641696003904814, |
| "learning_rate": 3.820754716981133e-05, |
| "loss": 0.5161, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.23646723646723647, |
| "grad_norm": 0.27164595148241766, |
| "learning_rate": 3.867924528301887e-05, |
| "loss": 0.4933, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.23931623931623933, |
| "grad_norm": 0.293519778964147, |
| "learning_rate": 3.9150943396226416e-05, |
| "loss": 0.5106, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.24216524216524216, |
| "grad_norm": 0.2711786679654629, |
| "learning_rate": 3.962264150943397e-05, |
| "loss": 0.5056, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.245014245014245, |
| "grad_norm": 0.2909911239118421, |
| "learning_rate": 4.009433962264151e-05, |
| "loss": 0.4906, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.24786324786324787, |
| "grad_norm": 0.2749435561722525, |
| "learning_rate": 4.0566037735849064e-05, |
| "loss": 0.5239, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.25071225071225073, |
| "grad_norm": 0.2966697733485334, |
| "learning_rate": 4.103773584905661e-05, |
| "loss": 0.4857, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2535612535612536, |
| "grad_norm": 0.2577003407624272, |
| "learning_rate": 4.150943396226415e-05, |
| "loss": 0.5017, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 0.2509274192908608, |
| "learning_rate": 4.1981132075471704e-05, |
| "loss": 0.5094, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.25925925925925924, |
| "grad_norm": 0.26220476456285075, |
| "learning_rate": 4.245283018867925e-05, |
| "loss": 0.521, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2621082621082621, |
| "grad_norm": 0.26233472271330177, |
| "learning_rate": 4.292452830188679e-05, |
| "loss": 0.5039, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.26495726495726496, |
| "grad_norm": 0.24761878468553983, |
| "learning_rate": 4.3396226415094345e-05, |
| "loss": 0.4948, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.2678062678062678, |
| "grad_norm": 0.2525575556034209, |
| "learning_rate": 4.386792452830189e-05, |
| "loss": 0.5044, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2706552706552707, |
| "grad_norm": 0.27089466017971453, |
| "learning_rate": 4.433962264150944e-05, |
| "loss": 0.5008, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.27350427350427353, |
| "grad_norm": 0.25465583266476105, |
| "learning_rate": 4.4811320754716985e-05, |
| "loss": 0.4925, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.27635327635327633, |
| "grad_norm": 0.2459248256786745, |
| "learning_rate": 4.528301886792453e-05, |
| "loss": 0.4716, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2792022792022792, |
| "grad_norm": 0.27690227321171584, |
| "learning_rate": 4.575471698113208e-05, |
| "loss": 0.5057, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 0.31505919622587614, |
| "learning_rate": 4.6226415094339625e-05, |
| "loss": 0.5196, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2849002849002849, |
| "grad_norm": 0.2645004241178192, |
| "learning_rate": 4.669811320754717e-05, |
| "loss": 0.4741, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.28774928774928776, |
| "grad_norm": 0.28420887135800427, |
| "learning_rate": 4.716981132075472e-05, |
| "loss": 0.489, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2905982905982906, |
| "grad_norm": 0.2979244733987184, |
| "learning_rate": 4.7641509433962266e-05, |
| "loss": 0.5049, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2934472934472934, |
| "grad_norm": 0.31699020934728167, |
| "learning_rate": 4.811320754716982e-05, |
| "loss": 0.4846, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.2958211929457519, |
| "learning_rate": 4.858490566037736e-05, |
| "loss": 0.4904, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.29914529914529914, |
| "grad_norm": 0.31430371098933885, |
| "learning_rate": 4.9056603773584906e-05, |
| "loss": 0.4864, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.301994301994302, |
| "grad_norm": 0.31807223918157157, |
| "learning_rate": 4.952830188679246e-05, |
| "loss": 0.496, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.30484330484330485, |
| "grad_norm": 0.3012030394747298, |
| "learning_rate": 5e-05, |
| "loss": 0.4889, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.26530051400289184, |
| "learning_rate": 4.994720168954594e-05, |
| "loss": 0.4933, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.31054131054131057, |
| "grad_norm": 0.3230552696656243, |
| "learning_rate": 4.989440337909187e-05, |
| "loss": 0.5068, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.31339031339031337, |
| "grad_norm": 0.25705267366009066, |
| "learning_rate": 4.9841605068637805e-05, |
| "loss": 0.499, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3162393162393162, |
| "grad_norm": 0.3143313651222358, |
| "learning_rate": 4.978880675818374e-05, |
| "loss": 0.4784, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3190883190883191, |
| "grad_norm": 0.3376761893109501, |
| "learning_rate": 4.973600844772968e-05, |
| "loss": 0.482, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.32193732193732194, |
| "grad_norm": 0.28882046798653027, |
| "learning_rate": 4.968321013727561e-05, |
| "loss": 0.4961, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3247863247863248, |
| "grad_norm": 0.34419030637706083, |
| "learning_rate": 4.9630411826821544e-05, |
| "loss": 0.4971, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.32763532763532766, |
| "grad_norm": 0.29426005659261695, |
| "learning_rate": 4.957761351636748e-05, |
| "loss": 0.4903, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.33048433048433046, |
| "grad_norm": 0.29019064666682737, |
| "learning_rate": 4.952481520591341e-05, |
| "loss": 0.4897, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.33297662577690634, |
| "learning_rate": 4.947201689545935e-05, |
| "loss": 0.4792, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.33618233618233617, |
| "grad_norm": 0.35409595270488886, |
| "learning_rate": 4.941921858500528e-05, |
| "loss": 0.4925, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.33903133903133903, |
| "grad_norm": 0.292433952733655, |
| "learning_rate": 4.936642027455122e-05, |
| "loss": 0.5069, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.3320923132157635, |
| "learning_rate": 4.931362196409715e-05, |
| "loss": 0.4946, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34472934472934474, |
| "grad_norm": 0.27067630597401476, |
| "learning_rate": 4.9260823653643085e-05, |
| "loss": 0.4871, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3475783475783476, |
| "grad_norm": 0.30304447173190324, |
| "learning_rate": 4.920802534318902e-05, |
| "loss": 0.487, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3504273504273504, |
| "grad_norm": 0.3039645539285113, |
| "learning_rate": 4.915522703273496e-05, |
| "loss": 0.4867, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.35327635327635326, |
| "grad_norm": 0.2824982694185181, |
| "learning_rate": 4.9102428722280894e-05, |
| "loss": 0.4611, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3561253561253561, |
| "grad_norm": 0.28260131702992686, |
| "learning_rate": 4.9049630411826823e-05, |
| "loss": 0.4652, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 0.34703216534596504, |
| "learning_rate": 4.899683210137276e-05, |
| "loss": 0.4865, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.36182336182336183, |
| "grad_norm": 0.3162837076477058, |
| "learning_rate": 4.894403379091869e-05, |
| "loss": 0.4954, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3646723646723647, |
| "grad_norm": 0.3293203697511355, |
| "learning_rate": 4.8891235480464626e-05, |
| "loss": 0.4995, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.36752136752136755, |
| "grad_norm": 0.27295379955106025, |
| "learning_rate": 4.883843717001056e-05, |
| "loss": 0.4872, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.3102376623883744, |
| "learning_rate": 4.87856388595565e-05, |
| "loss": 0.481, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3732193732193732, |
| "grad_norm": 0.31091911179304077, |
| "learning_rate": 4.8732840549102435e-05, |
| "loss": 0.4891, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.37606837606837606, |
| "grad_norm": 0.29399727700411593, |
| "learning_rate": 4.8680042238648365e-05, |
| "loss": 0.4678, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3789173789173789, |
| "grad_norm": 0.3409889899938261, |
| "learning_rate": 4.86272439281943e-05, |
| "loss": 0.4881, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3817663817663818, |
| "grad_norm": 0.3046568772014337, |
| "learning_rate": 4.857444561774023e-05, |
| "loss": 0.4788, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.24665708555682755, |
| "learning_rate": 4.852164730728617e-05, |
| "loss": 0.4572, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.38746438746438744, |
| "grad_norm": 0.3016729598811574, |
| "learning_rate": 4.8468848996832103e-05, |
| "loss": 0.4804, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3903133903133903, |
| "grad_norm": 0.3530427507474638, |
| "learning_rate": 4.841605068637804e-05, |
| "loss": 0.4843, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.39316239316239315, |
| "grad_norm": 0.2856167787643274, |
| "learning_rate": 4.8363252375923976e-05, |
| "loss": 0.4715, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.396011396011396, |
| "grad_norm": 0.37847572959684284, |
| "learning_rate": 4.8310454065469906e-05, |
| "loss": 0.4968, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.39886039886039887, |
| "grad_norm": 0.28801978777804754, |
| "learning_rate": 4.825765575501584e-05, |
| "loss": 0.4632, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4017094017094017, |
| "grad_norm": 0.3525074681127096, |
| "learning_rate": 4.820485744456177e-05, |
| "loss": 0.4827, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4045584045584046, |
| "grad_norm": 0.3526537528509632, |
| "learning_rate": 4.8152059134107715e-05, |
| "loss": 0.4732, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4074074074074074, |
| "grad_norm": 0.34857652018932117, |
| "learning_rate": 4.8099260823653645e-05, |
| "loss": 0.4867, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 0.3727176334678898, |
| "learning_rate": 4.804646251319958e-05, |
| "loss": 0.4834, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4131054131054131, |
| "grad_norm": 0.3140976089416083, |
| "learning_rate": 4.799366420274552e-05, |
| "loss": 0.4675, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.41595441595441596, |
| "grad_norm": 0.44474986509411835, |
| "learning_rate": 4.794086589229145e-05, |
| "loss": 0.4914, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4188034188034188, |
| "grad_norm": 0.3390407952306162, |
| "learning_rate": 4.788806758183738e-05, |
| "loss": 0.4731, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.42165242165242167, |
| "grad_norm": 0.48836879750115086, |
| "learning_rate": 4.783526927138332e-05, |
| "loss": 0.4921, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.42450142450142453, |
| "grad_norm": 0.33589080513403735, |
| "learning_rate": 4.7782470960929256e-05, |
| "loss": 0.4912, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 0.36236282963696415, |
| "learning_rate": 4.7729672650475186e-05, |
| "loss": 0.4601, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4301994301994302, |
| "grad_norm": 0.2797897413436219, |
| "learning_rate": 4.767687434002112e-05, |
| "loss": 0.4672, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.43304843304843305, |
| "grad_norm": 0.4148496937386019, |
| "learning_rate": 4.762407602956706e-05, |
| "loss": 0.4819, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 0.3395999932409137, |
| "learning_rate": 4.757127771911299e-05, |
| "loss": 0.4701, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.43874643874643876, |
| "grad_norm": 0.31394240245627125, |
| "learning_rate": 4.7518479408658925e-05, |
| "loss": 0.4702, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4415954415954416, |
| "grad_norm": 0.3976902607497981, |
| "learning_rate": 4.746568109820486e-05, |
| "loss": 0.4754, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.26007701688937257, |
| "learning_rate": 4.74128827877508e-05, |
| "loss": 0.4719, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4472934472934473, |
| "grad_norm": 0.3615746631327949, |
| "learning_rate": 4.736008447729673e-05, |
| "loss": 0.483, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.45014245014245013, |
| "grad_norm": 0.3262559663383385, |
| "learning_rate": 4.730728616684266e-05, |
| "loss": 0.4701, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.452991452991453, |
| "grad_norm": 0.3054913734906384, |
| "learning_rate": 4.725448785638859e-05, |
| "loss": 0.4755, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.45584045584045585, |
| "grad_norm": 0.28714899368154406, |
| "learning_rate": 4.720168954593453e-05, |
| "loss": 0.4715, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4586894586894587, |
| "grad_norm": 0.3094277794993974, |
| "learning_rate": 4.7148891235480466e-05, |
| "loss": 0.4621, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.3440166736205401, |
| "learning_rate": 4.70960929250264e-05, |
| "loss": 0.485, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.46438746438746437, |
| "grad_norm": 0.32050130614453004, |
| "learning_rate": 4.704329461457234e-05, |
| "loss": 0.4699, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4672364672364672, |
| "grad_norm": 0.3462845175709499, |
| "learning_rate": 4.699049630411827e-05, |
| "loss": 0.467, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4700854700854701, |
| "grad_norm": 0.25813062925512487, |
| "learning_rate": 4.6937697993664204e-05, |
| "loss": 0.4678, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.47293447293447294, |
| "grad_norm": 0.2774578719211831, |
| "learning_rate": 4.6884899683210134e-05, |
| "loss": 0.4517, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4757834757834758, |
| "grad_norm": 0.27947314173878857, |
| "learning_rate": 4.683210137275608e-05, |
| "loss": 0.465, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.47863247863247865, |
| "grad_norm": 0.22733723587768467, |
| "learning_rate": 4.677930306230201e-05, |
| "loss": 0.4537, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.48148148148148145, |
| "grad_norm": 0.29661252845278596, |
| "learning_rate": 4.672650475184794e-05, |
| "loss": 0.4846, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4843304843304843, |
| "grad_norm": 0.3089830748079026, |
| "learning_rate": 4.667370644139388e-05, |
| "loss": 0.4557, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 0.24997311130541303, |
| "learning_rate": 4.662090813093981e-05, |
| "loss": 0.45, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.49002849002849, |
| "grad_norm": 0.33175156279149914, |
| "learning_rate": 4.6568109820485746e-05, |
| "loss": 0.4689, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4928774928774929, |
| "grad_norm": 0.2602987055448183, |
| "learning_rate": 4.651531151003168e-05, |
| "loss": 0.4659, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.49572649572649574, |
| "grad_norm": 0.3460341458612977, |
| "learning_rate": 4.646251319957762e-05, |
| "loss": 0.4765, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4985754985754986, |
| "grad_norm": 0.2961420307282209, |
| "learning_rate": 4.640971488912355e-05, |
| "loss": 0.4924, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5014245014245015, |
| "grad_norm": 0.300448359282463, |
| "learning_rate": 4.6356916578669484e-05, |
| "loss": 0.4772, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5042735042735043, |
| "grad_norm": 0.27326740002063005, |
| "learning_rate": 4.630411826821542e-05, |
| "loss": 0.4481, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5071225071225072, |
| "grad_norm": 0.3260005365533567, |
| "learning_rate": 4.625131995776135e-05, |
| "loss": 0.4757, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.50997150997151, |
| "grad_norm": 0.3072044287066991, |
| "learning_rate": 4.619852164730729e-05, |
| "loss": 0.4749, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.29686452489417353, |
| "learning_rate": 4.614572333685322e-05, |
| "loss": 0.4599, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5156695156695157, |
| "grad_norm": 0.294950820175362, |
| "learning_rate": 4.609292502639916e-05, |
| "loss": 0.4821, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5185185185185185, |
| "grad_norm": 0.30491136720622375, |
| "learning_rate": 4.604012671594509e-05, |
| "loss": 0.4537, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5213675213675214, |
| "grad_norm": 0.30581018423213313, |
| "learning_rate": 4.5987328405491026e-05, |
| "loss": 0.4641, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5242165242165242, |
| "grad_norm": 0.35426832507345146, |
| "learning_rate": 4.593453009503696e-05, |
| "loss": 0.4671, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5270655270655271, |
| "grad_norm": 0.33464439271326646, |
| "learning_rate": 4.588173178458289e-05, |
| "loss": 0.4692, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5299145299145299, |
| "grad_norm": 0.30593521291345677, |
| "learning_rate": 4.5828933474128835e-05, |
| "loss": 0.4744, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5327635327635327, |
| "grad_norm": 0.27674703467670597, |
| "learning_rate": 4.5776135163674764e-05, |
| "loss": 0.4437, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5356125356125356, |
| "grad_norm": 0.280964484952, |
| "learning_rate": 4.57233368532207e-05, |
| "loss": 0.467, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.26185851570092444, |
| "learning_rate": 4.567053854276663e-05, |
| "loss": 0.4426, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5413105413105413, |
| "grad_norm": 0.2584808882693542, |
| "learning_rate": 4.561774023231257e-05, |
| "loss": 0.4611, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5441595441595442, |
| "grad_norm": 0.26904667580690866, |
| "learning_rate": 4.55649419218585e-05, |
| "loss": 0.4575, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5470085470085471, |
| "grad_norm": 0.2487167605386707, |
| "learning_rate": 4.551214361140444e-05, |
| "loss": 0.457, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5498575498575499, |
| "grad_norm": 0.28142301387763136, |
| "learning_rate": 4.5459345300950376e-05, |
| "loss": 0.4586, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5527065527065527, |
| "grad_norm": 0.31342474806414733, |
| "learning_rate": 4.5406546990496306e-05, |
| "loss": 0.4866, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.2847923191286588, |
| "learning_rate": 4.535374868004224e-05, |
| "loss": 0.4643, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5584045584045584, |
| "grad_norm": 0.3095635932106011, |
| "learning_rate": 4.530095036958817e-05, |
| "loss": 0.4645, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5612535612535613, |
| "grad_norm": 0.24639525285599803, |
| "learning_rate": 4.524815205913411e-05, |
| "loss": 0.4577, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 0.26955963685848316, |
| "learning_rate": 4.5195353748680044e-05, |
| "loss": 0.4626, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5669515669515669, |
| "grad_norm": 0.24214389870117736, |
| "learning_rate": 4.514255543822598e-05, |
| "loss": 0.4535, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5698005698005698, |
| "grad_norm": 0.2610341258337823, |
| "learning_rate": 4.508975712777192e-05, |
| "loss": 0.4583, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5726495726495726, |
| "grad_norm": 0.25907163228127456, |
| "learning_rate": 4.503695881731785e-05, |
| "loss": 0.4535, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5754985754985755, |
| "grad_norm": 0.2761897395293128, |
| "learning_rate": 4.498416050686378e-05, |
| "loss": 0.4543, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5783475783475783, |
| "grad_norm": 0.31059563404737606, |
| "learning_rate": 4.493136219640971e-05, |
| "loss": 0.4523, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5811965811965812, |
| "grad_norm": 0.286738470928712, |
| "learning_rate": 4.487856388595565e-05, |
| "loss": 0.4836, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.584045584045584, |
| "grad_norm": 0.2967602486544982, |
| "learning_rate": 4.4825765575501585e-05, |
| "loss": 0.4637, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5868945868945868, |
| "grad_norm": 0.2708153047318558, |
| "learning_rate": 4.477296726504752e-05, |
| "loss": 0.4497, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 0.2746307872171804, |
| "learning_rate": 4.472016895459346e-05, |
| "loss": 0.4512, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.3328817061733048, |
| "learning_rate": 4.466737064413939e-05, |
| "loss": 0.4613, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5954415954415955, |
| "grad_norm": 0.2759545614945111, |
| "learning_rate": 4.4614572333685324e-05, |
| "loss": 0.4727, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5982905982905983, |
| "grad_norm": 0.318009089876618, |
| "learning_rate": 4.4561774023231254e-05, |
| "loss": 0.4747, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6011396011396012, |
| "grad_norm": 0.25640815318642596, |
| "learning_rate": 4.45089757127772e-05, |
| "loss": 0.4719, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.603988603988604, |
| "grad_norm": 0.34436494976967913, |
| "learning_rate": 4.445617740232313e-05, |
| "loss": 0.4646, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6068376068376068, |
| "grad_norm": 1.7399032191303827, |
| "learning_rate": 4.440337909186906e-05, |
| "loss": 0.5284, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6096866096866097, |
| "grad_norm": 0.26447171131249575, |
| "learning_rate": 4.4350580781415e-05, |
| "loss": 0.4598, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6125356125356125, |
| "grad_norm": 0.2571865578633, |
| "learning_rate": 4.429778247096093e-05, |
| "loss": 0.4491, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.28498607832783857, |
| "learning_rate": 4.4244984160506865e-05, |
| "loss": 0.4572, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6182336182336182, |
| "grad_norm": 0.23557920258321058, |
| "learning_rate": 4.41921858500528e-05, |
| "loss": 0.4489, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6210826210826211, |
| "grad_norm": 0.3049575913824341, |
| "learning_rate": 4.413938753959874e-05, |
| "loss": 0.4505, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6239316239316239, |
| "grad_norm": 0.2582271333688809, |
| "learning_rate": 4.408658922914467e-05, |
| "loss": 0.4608, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6267806267806267, |
| "grad_norm": 0.2823458887788208, |
| "learning_rate": 4.4033790918690604e-05, |
| "loss": 0.4533, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6296296296296297, |
| "grad_norm": 0.2386332818361518, |
| "learning_rate": 4.398099260823654e-05, |
| "loss": 0.4559, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6324786324786325, |
| "grad_norm": 0.28795366606672157, |
| "learning_rate": 4.392819429778247e-05, |
| "loss": 0.4566, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6353276353276354, |
| "grad_norm": 0.24919391725324347, |
| "learning_rate": 4.3875395987328407e-05, |
| "loss": 0.4716, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6381766381766382, |
| "grad_norm": 0.2818278431774406, |
| "learning_rate": 4.382259767687434e-05, |
| "loss": 0.4504, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.2814493678841764, |
| "learning_rate": 4.376979936642028e-05, |
| "loss": 0.4434, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6438746438746439, |
| "grad_norm": 0.2666133240725091, |
| "learning_rate": 4.371700105596621e-05, |
| "loss": 0.4552, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6467236467236467, |
| "grad_norm": 0.2685467121594498, |
| "learning_rate": 4.3664202745512145e-05, |
| "loss": 0.4726, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6495726495726496, |
| "grad_norm": 0.299879211553589, |
| "learning_rate": 4.361140443505808e-05, |
| "loss": 0.4471, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6524216524216524, |
| "grad_norm": 0.23994886539791888, |
| "learning_rate": 4.355860612460401e-05, |
| "loss": 0.4588, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6552706552706553, |
| "grad_norm": 0.32262816469344513, |
| "learning_rate": 4.3505807814149955e-05, |
| "loss": 0.4518, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6581196581196581, |
| "grad_norm": 0.26072493194234536, |
| "learning_rate": 4.3453009503695884e-05, |
| "loss": 0.4659, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6609686609686609, |
| "grad_norm": 0.31586564589968463, |
| "learning_rate": 4.340021119324182e-05, |
| "loss": 0.456, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6638176638176638, |
| "grad_norm": 0.253201215778207, |
| "learning_rate": 4.334741288278775e-05, |
| "loss": 0.4519, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.33113742268580176, |
| "learning_rate": 4.3294614572333687e-05, |
| "loss": 0.4533, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6695156695156695, |
| "grad_norm": 0.2760668240736258, |
| "learning_rate": 4.324181626187962e-05, |
| "loss": 0.4663, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6723646723646723, |
| "grad_norm": 0.344650634136037, |
| "learning_rate": 4.318901795142556e-05, |
| "loss": 0.4161, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6752136752136753, |
| "grad_norm": 0.26467354227622525, |
| "learning_rate": 4.3136219640971496e-05, |
| "loss": 0.4558, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6780626780626781, |
| "grad_norm": 0.310387292035022, |
| "learning_rate": 4.3083421330517425e-05, |
| "loss": 0.4507, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6809116809116809, |
| "grad_norm": 0.26104826670851183, |
| "learning_rate": 4.303062302006336e-05, |
| "loss": 0.4418, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 0.33335191034296224, |
| "learning_rate": 4.297782470960929e-05, |
| "loss": 0.4696, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6866096866096866, |
| "grad_norm": 0.23349617986863688, |
| "learning_rate": 4.292502639915523e-05, |
| "loss": 0.4354, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6894586894586895, |
| "grad_norm": 0.3264954704665932, |
| "learning_rate": 4.2872228088701164e-05, |
| "loss": 0.4495, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.23219348921049893, |
| "learning_rate": 4.28194297782471e-05, |
| "loss": 0.4368, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6951566951566952, |
| "grad_norm": 0.30581540788611133, |
| "learning_rate": 4.276663146779304e-05, |
| "loss": 0.4632, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.698005698005698, |
| "grad_norm": 0.2544190208608847, |
| "learning_rate": 4.2713833157338966e-05, |
| "loss": 0.4289, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7008547008547008, |
| "grad_norm": 0.28161765276112943, |
| "learning_rate": 4.26610348468849e-05, |
| "loss": 0.4581, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7037037037037037, |
| "grad_norm": 0.22266448187906657, |
| "learning_rate": 4.260823653643083e-05, |
| "loss": 0.4442, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7065527065527065, |
| "grad_norm": 0.2845469466464988, |
| "learning_rate": 4.255543822597677e-05, |
| "loss": 0.4574, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7094017094017094, |
| "grad_norm": 0.27607235956300574, |
| "learning_rate": 4.2502639915522705e-05, |
| "loss": 0.4486, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7122507122507122, |
| "grad_norm": 0.2771911103653057, |
| "learning_rate": 4.244984160506864e-05, |
| "loss": 0.4502, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7150997150997151, |
| "grad_norm": 0.30336246911969705, |
| "learning_rate": 4.239704329461457e-05, |
| "loss": 0.4521, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 0.2530206241923089, |
| "learning_rate": 4.234424498416051e-05, |
| "loss": 0.4548, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7207977207977208, |
| "grad_norm": 0.3101890920570867, |
| "learning_rate": 4.2291446673706444e-05, |
| "loss": 0.4477, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7236467236467237, |
| "grad_norm": 0.255737006138244, |
| "learning_rate": 4.2238648363252374e-05, |
| "loss": 0.4494, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7264957264957265, |
| "grad_norm": 0.3158292287331288, |
| "learning_rate": 4.218585005279832e-05, |
| "loss": 0.4486, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7293447293447294, |
| "grad_norm": 0.2874817334553929, |
| "learning_rate": 4.2133051742344246e-05, |
| "loss": 0.4492, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7321937321937322, |
| "grad_norm": 0.2993674086081719, |
| "learning_rate": 4.208025343189018e-05, |
| "loss": 0.446, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7350427350427351, |
| "grad_norm": 0.26020269412885194, |
| "learning_rate": 4.202745512143611e-05, |
| "loss": 0.4416, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7378917378917379, |
| "grad_norm": 0.29543250701797663, |
| "learning_rate": 4.197465681098205e-05, |
| "loss": 0.4481, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.24853929821482904, |
| "learning_rate": 4.1921858500527985e-05, |
| "loss": 0.46, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 0.3195779365953625, |
| "learning_rate": 4.186906019007392e-05, |
| "loss": 0.4508, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7464387464387464, |
| "grad_norm": 0.2326123884289747, |
| "learning_rate": 4.181626187961986e-05, |
| "loss": 0.446, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7492877492877493, |
| "grad_norm": 0.2848262848249219, |
| "learning_rate": 4.176346356916579e-05, |
| "loss": 0.4558, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7521367521367521, |
| "grad_norm": 0.2499757501835736, |
| "learning_rate": 4.1710665258711724e-05, |
| "loss": 0.4426, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7549857549857549, |
| "grad_norm": 0.2914547474394867, |
| "learning_rate": 4.1657866948257654e-05, |
| "loss": 0.4563, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7578347578347578, |
| "grad_norm": 0.2576885537277797, |
| "learning_rate": 4.160506863780359e-05, |
| "loss": 0.4396, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7606837606837606, |
| "grad_norm": 0.26204392829857903, |
| "learning_rate": 4.1552270327349526e-05, |
| "loss": 0.4472, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7635327635327636, |
| "grad_norm": 0.3265158503996716, |
| "learning_rate": 4.149947201689546e-05, |
| "loss": 0.451, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7663817663817664, |
| "grad_norm": 0.2609942596958091, |
| "learning_rate": 4.14466737064414e-05, |
| "loss": 0.4595, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.268707951552568, |
| "learning_rate": 4.139387539598733e-05, |
| "loss": 0.4447, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7720797720797721, |
| "grad_norm": 0.28137031045693933, |
| "learning_rate": 4.1341077085533265e-05, |
| "loss": 0.4549, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7749287749287749, |
| "grad_norm": 0.29190078199597813, |
| "learning_rate": 4.1288278775079195e-05, |
| "loss": 0.4477, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.2454715815920555, |
| "learning_rate": 4.123548046462513e-05, |
| "loss": 0.4324, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7806267806267806, |
| "grad_norm": 0.3094193045635567, |
| "learning_rate": 4.118268215417107e-05, |
| "loss": 0.427, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7834757834757835, |
| "grad_norm": 0.24434281879639438, |
| "learning_rate": 4.1129883843717004e-05, |
| "loss": 0.4446, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7863247863247863, |
| "grad_norm": 0.29785841511085437, |
| "learning_rate": 4.107708553326294e-05, |
| "loss": 0.4316, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7891737891737892, |
| "grad_norm": 0.24685554556974273, |
| "learning_rate": 4.102428722280887e-05, |
| "loss": 0.4376, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.792022792022792, |
| "grad_norm": 0.24898713039939369, |
| "learning_rate": 4.0971488912354806e-05, |
| "loss": 0.4403, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 0.30241270134797515, |
| "learning_rate": 4.0918690601900736e-05, |
| "loss": 0.4416, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7977207977207977, |
| "grad_norm": 0.24307748159009482, |
| "learning_rate": 4.086589229144668e-05, |
| "loss": 0.4398, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8005698005698005, |
| "grad_norm": 0.26394622572849324, |
| "learning_rate": 4.081309398099261e-05, |
| "loss": 0.4421, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8034188034188035, |
| "grad_norm": 0.24299948905266622, |
| "learning_rate": 4.0760295670538545e-05, |
| "loss": 0.456, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8062678062678063, |
| "grad_norm": 0.2538995235843933, |
| "learning_rate": 4.070749736008448e-05, |
| "loss": 0.4411, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8091168091168092, |
| "grad_norm": 0.24352793364283998, |
| "learning_rate": 4.065469904963041e-05, |
| "loss": 0.4457, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.811965811965812, |
| "grad_norm": 0.754328353026504, |
| "learning_rate": 4.060190073917635e-05, |
| "loss": 0.4749, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8148148148148148, |
| "grad_norm": 0.2608557320949828, |
| "learning_rate": 4.0549102428722284e-05, |
| "loss": 0.4366, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8176638176638177, |
| "grad_norm": 0.25680855446240963, |
| "learning_rate": 4.049630411826822e-05, |
| "loss": 0.4435, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 0.23235073760106573, |
| "learning_rate": 4.044350580781415e-05, |
| "loss": 0.4507, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8233618233618234, |
| "grad_norm": 0.2503349096849241, |
| "learning_rate": 4.0390707497360086e-05, |
| "loss": 0.4474, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8262108262108262, |
| "grad_norm": 0.24087457888823843, |
| "learning_rate": 4.033790918690602e-05, |
| "loss": 0.4312, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8290598290598291, |
| "grad_norm": 0.25448968083447765, |
| "learning_rate": 4.028511087645195e-05, |
| "loss": 0.4466, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8319088319088319, |
| "grad_norm": 0.24829756385015966, |
| "learning_rate": 4.023231256599789e-05, |
| "loss": 0.4545, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8347578347578347, |
| "grad_norm": 0.25607715347273097, |
| "learning_rate": 4.0179514255543825e-05, |
| "loss": 0.4565, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8376068376068376, |
| "grad_norm": 0.3091527511324264, |
| "learning_rate": 4.012671594508976e-05, |
| "loss": 0.4561, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8404558404558404, |
| "grad_norm": 0.24780329551924565, |
| "learning_rate": 4.007391763463569e-05, |
| "loss": 0.4442, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8433048433048433, |
| "grad_norm": 0.2931917392412753, |
| "learning_rate": 4.002111932418163e-05, |
| "loss": 0.4562, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.23945446840779216, |
| "learning_rate": 3.9968321013727564e-05, |
| "loss": 0.4407, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8490028490028491, |
| "grad_norm": 0.27070479647529605, |
| "learning_rate": 3.991552270327349e-05, |
| "loss": 0.4371, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8518518518518519, |
| "grad_norm": 0.2692423182003942, |
| "learning_rate": 3.9862724392819437e-05, |
| "loss": 0.4461, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 0.2707107693436041, |
| "learning_rate": 3.9809926082365366e-05, |
| "loss": 0.4435, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8575498575498576, |
| "grad_norm": 0.2500219756684885, |
| "learning_rate": 3.97571277719113e-05, |
| "loss": 0.4288, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8603988603988604, |
| "grad_norm": 0.2783790061607216, |
| "learning_rate": 3.970432946145723e-05, |
| "loss": 0.4486, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8632478632478633, |
| "grad_norm": 0.2407111058958332, |
| "learning_rate": 3.965153115100317e-05, |
| "loss": 0.4514, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8660968660968661, |
| "grad_norm": 0.30074552592413345, |
| "learning_rate": 3.9598732840549105e-05, |
| "loss": 0.4408, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8689458689458689, |
| "grad_norm": 0.21341465024781467, |
| "learning_rate": 3.954593453009504e-05, |
| "loss": 0.4216, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 0.2938618051639469, |
| "learning_rate": 3.949313621964098e-05, |
| "loss": 0.4416, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8746438746438746, |
| "grad_norm": 0.48732469986333266, |
| "learning_rate": 3.944033790918691e-05, |
| "loss": 0.4516, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8774928774928775, |
| "grad_norm": 0.2735554566382607, |
| "learning_rate": 3.9387539598732844e-05, |
| "loss": 0.4383, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8803418803418803, |
| "grad_norm": 0.28479664265954874, |
| "learning_rate": 3.933474128827877e-05, |
| "loss": 0.4458, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8831908831908832, |
| "grad_norm": 0.22798637141219172, |
| "learning_rate": 3.928194297782471e-05, |
| "loss": 0.442, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.886039886039886, |
| "grad_norm": 0.24677969245732811, |
| "learning_rate": 3.9229144667370646e-05, |
| "loss": 0.4481, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.29202684528549283, |
| "learning_rate": 3.917634635691658e-05, |
| "loss": 0.4412, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8917378917378918, |
| "grad_norm": 0.2606008473665173, |
| "learning_rate": 3.912354804646252e-05, |
| "loss": 0.435, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8945868945868946, |
| "grad_norm": 0.26749609172100086, |
| "learning_rate": 3.907074973600845e-05, |
| "loss": 0.4516, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 0.22520943451800887, |
| "learning_rate": 3.9017951425554385e-05, |
| "loss": 0.4241, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9002849002849003, |
| "grad_norm": 0.22909468804333016, |
| "learning_rate": 3.8965153115100314e-05, |
| "loss": 0.4379, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9031339031339032, |
| "grad_norm": 0.2836733485621368, |
| "learning_rate": 3.891235480464625e-05, |
| "loss": 0.4476, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.905982905982906, |
| "grad_norm": 0.244148163521808, |
| "learning_rate": 3.885955649419219e-05, |
| "loss": 0.4546, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9088319088319088, |
| "grad_norm": 0.24672915009941915, |
| "learning_rate": 3.8806758183738124e-05, |
| "loss": 0.4368, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9116809116809117, |
| "grad_norm": 0.4272615119652992, |
| "learning_rate": 3.875395987328406e-05, |
| "loss": 0.4334, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9145299145299145, |
| "grad_norm": 0.24895020848554578, |
| "learning_rate": 3.870116156282999e-05, |
| "loss": 0.4442, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9173789173789174, |
| "grad_norm": 0.2658053036431955, |
| "learning_rate": 3.8648363252375926e-05, |
| "loss": 0.4383, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9202279202279202, |
| "grad_norm": 0.2643219649864309, |
| "learning_rate": 3.8595564941921856e-05, |
| "loss": 0.4251, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.2365954433037453, |
| "learning_rate": 3.85427666314678e-05, |
| "loss": 0.4499, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9259259259259259, |
| "grad_norm": 0.2633023483060389, |
| "learning_rate": 3.848996832101373e-05, |
| "loss": 0.4384, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9287749287749287, |
| "grad_norm": 0.2956130036845057, |
| "learning_rate": 3.8437170010559665e-05, |
| "loss": 0.4396, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9316239316239316, |
| "grad_norm": 0.24882987462496686, |
| "learning_rate": 3.83843717001056e-05, |
| "loss": 0.45, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9344729344729344, |
| "grad_norm": 1.5891449688358956, |
| "learning_rate": 3.833157338965153e-05, |
| "loss": 0.4415, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9373219373219374, |
| "grad_norm": 0.3349715325263592, |
| "learning_rate": 3.827877507919747e-05, |
| "loss": 0.4457, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9401709401709402, |
| "grad_norm": 0.2848178852794376, |
| "learning_rate": 3.8225976768743404e-05, |
| "loss": 0.456, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9430199430199431, |
| "grad_norm": 0.302751156256454, |
| "learning_rate": 3.817317845828934e-05, |
| "loss": 0.4387, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9458689458689459, |
| "grad_norm": 0.25955277850770286, |
| "learning_rate": 3.812038014783527e-05, |
| "loss": 0.44, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 0.2502011385796011, |
| "learning_rate": 3.8067581837381206e-05, |
| "loss": 0.4515, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.9515669515669516, |
| "grad_norm": 0.2650885248812146, |
| "learning_rate": 3.801478352692714e-05, |
| "loss": 0.4475, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9544159544159544, |
| "grad_norm": 0.24800530026732448, |
| "learning_rate": 3.796198521647307e-05, |
| "loss": 0.4589, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9572649572649573, |
| "grad_norm": 0.24826963064202007, |
| "learning_rate": 3.7909186906019015e-05, |
| "loss": 0.4338, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9601139601139601, |
| "grad_norm": 0.23027863612091976, |
| "learning_rate": 3.7856388595564945e-05, |
| "loss": 0.4301, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9629629629629629, |
| "grad_norm": 0.22579121856289947, |
| "learning_rate": 3.780359028511088e-05, |
| "loss": 0.4592, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9658119658119658, |
| "grad_norm": 0.2548134083345822, |
| "learning_rate": 3.775079197465681e-05, |
| "loss": 0.442, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9686609686609686, |
| "grad_norm": 0.24004323105561068, |
| "learning_rate": 3.769799366420275e-05, |
| "loss": 0.4379, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9715099715099715, |
| "grad_norm": 0.22500518319359325, |
| "learning_rate": 3.764519535374868e-05, |
| "loss": 0.4383, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 0.23655137638652632, |
| "learning_rate": 3.759239704329461e-05, |
| "loss": 0.4304, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9772079772079773, |
| "grad_norm": 0.23186063099612125, |
| "learning_rate": 3.7539598732840556e-05, |
| "loss": 0.457, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.98005698005698, |
| "grad_norm": 0.22345800936056795, |
| "learning_rate": 3.7486800422386486e-05, |
| "loss": 0.4233, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9829059829059829, |
| "grad_norm": 0.22901659678419103, |
| "learning_rate": 3.743400211193242e-05, |
| "loss": 0.4575, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9857549857549858, |
| "grad_norm": 0.34541054663294773, |
| "learning_rate": 3.738120380147835e-05, |
| "loss": 0.4539, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9886039886039886, |
| "grad_norm": 0.24563352556354082, |
| "learning_rate": 3.732840549102429e-05, |
| "loss": 0.4326, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9914529914529915, |
| "grad_norm": 0.28378321755449876, |
| "learning_rate": 3.727560718057022e-05, |
| "loss": 0.4527, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9943019943019943, |
| "grad_norm": 0.2376023328816585, |
| "learning_rate": 3.722280887011616e-05, |
| "loss": 0.465, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9971509971509972, |
| "grad_norm": 0.23913998831345243, |
| "learning_rate": 3.717001055966209e-05, |
| "loss": 0.4502, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2446661739630635, |
| "learning_rate": 3.711721224920803e-05, |
| "loss": 0.436, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.002849002849003, |
| "grad_norm": 0.30212623378795767, |
| "learning_rate": 3.7064413938753963e-05, |
| "loss": 0.3773, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0056980056980056, |
| "grad_norm": 0.2727102306693993, |
| "learning_rate": 3.701161562829989e-05, |
| "loss": 0.3721, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.0085470085470085, |
| "grad_norm": 0.24519118257200442, |
| "learning_rate": 3.695881731784583e-05, |
| "loss": 0.3556, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.0113960113960114, |
| "grad_norm": 0.2708532250345628, |
| "learning_rate": 3.6906019007391766e-05, |
| "loss": 0.3631, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0142450142450143, |
| "grad_norm": 0.2848868957990222, |
| "learning_rate": 3.68532206969377e-05, |
| "loss": 0.3647, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.017094017094017, |
| "grad_norm": 0.2609789117933919, |
| "learning_rate": 3.680042238648363e-05, |
| "loss": 0.3598, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.01994301994302, |
| "grad_norm": 0.30731469194417627, |
| "learning_rate": 3.674762407602957e-05, |
| "loss": 0.3905, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.0227920227920229, |
| "grad_norm": 0.2751477526094167, |
| "learning_rate": 3.6694825765575505e-05, |
| "loss": 0.3766, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 0.3041300782163382, |
| "learning_rate": 3.6642027455121434e-05, |
| "loss": 0.3598, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0284900284900285, |
| "grad_norm": 0.29025593237743824, |
| "learning_rate": 3.658922914466738e-05, |
| "loss": 0.3656, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.0313390313390314, |
| "grad_norm": 0.26450155835177075, |
| "learning_rate": 3.653643083421331e-05, |
| "loss": 0.3765, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.0341880341880343, |
| "grad_norm": 0.29763564022742284, |
| "learning_rate": 3.648363252375924e-05, |
| "loss": 0.3677, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.037037037037037, |
| "grad_norm": 0.2531649793647418, |
| "learning_rate": 3.643083421330517e-05, |
| "loss": 0.3687, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.03988603988604, |
| "grad_norm": 0.278947460622146, |
| "learning_rate": 3.637803590285111e-05, |
| "loss": 0.3988, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0427350427350428, |
| "grad_norm": 0.28829274848081526, |
| "learning_rate": 3.6325237592397046e-05, |
| "loss": 0.3724, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.0455840455840455, |
| "grad_norm": 0.2618749521918147, |
| "learning_rate": 3.6272439281942975e-05, |
| "loss": 0.3542, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.0484330484330484, |
| "grad_norm": 2.0062858158052452, |
| "learning_rate": 3.621964097148892e-05, |
| "loss": 0.3987, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.0512820512820513, |
| "grad_norm": 0.28445975801774975, |
| "learning_rate": 3.616684266103485e-05, |
| "loss": 0.3653, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.0541310541310542, |
| "grad_norm": 0.2462050134870002, |
| "learning_rate": 3.6114044350580785e-05, |
| "loss": 0.3609, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.056980056980057, |
| "grad_norm": 0.28062644181944496, |
| "learning_rate": 3.6061246040126714e-05, |
| "loss": 0.3865, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.0598290598290598, |
| "grad_norm": 0.24606665304462752, |
| "learning_rate": 3.600844772967265e-05, |
| "loss": 0.3502, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0626780626780628, |
| "grad_norm": 0.3129397581417704, |
| "learning_rate": 3.595564941921859e-05, |
| "loss": 0.3373, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.0655270655270654, |
| "grad_norm": 0.2957891434648334, |
| "learning_rate": 3.590285110876452e-05, |
| "loss": 0.3591, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.0683760683760684, |
| "grad_norm": 0.2570065060278186, |
| "learning_rate": 3.585005279831046e-05, |
| "loss": 0.3861, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0712250712250713, |
| "grad_norm": 0.25605383657422265, |
| "learning_rate": 3.579725448785639e-05, |
| "loss": 0.3834, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.074074074074074, |
| "grad_norm": 0.26319733021304453, |
| "learning_rate": 3.5744456177402326e-05, |
| "loss": 0.3973, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 0.26823999119784286, |
| "learning_rate": 3.5691657866948255e-05, |
| "loss": 0.3843, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0797720797720798, |
| "grad_norm": 0.2801739766417507, |
| "learning_rate": 3.563885955649419e-05, |
| "loss": 0.3613, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0826210826210827, |
| "grad_norm": 0.2752489664634236, |
| "learning_rate": 3.558606124604013e-05, |
| "loss": 0.3608, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0854700854700854, |
| "grad_norm": 0.253420517582108, |
| "learning_rate": 3.5533262935586064e-05, |
| "loss": 0.3654, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0883190883190883, |
| "grad_norm": 0.2352861368854798, |
| "learning_rate": 3.5480464625132e-05, |
| "loss": 0.3567, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0911680911680912, |
| "grad_norm": 0.24009458895765762, |
| "learning_rate": 3.542766631467793e-05, |
| "loss": 0.3622, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.0940170940170941, |
| "grad_norm": 0.26857315547032595, |
| "learning_rate": 3.537486800422387e-05, |
| "loss": 0.3654, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.0968660968660968, |
| "grad_norm": 0.3755228777720786, |
| "learning_rate": 3.5322069693769796e-05, |
| "loss": 0.3785, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0997150997150997, |
| "grad_norm": 0.25523099197348387, |
| "learning_rate": 3.526927138331573e-05, |
| "loss": 0.3703, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.1025641025641026, |
| "grad_norm": 0.25215901520376743, |
| "learning_rate": 3.521647307286167e-05, |
| "loss": 0.3717, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.1054131054131053, |
| "grad_norm": 0.21795121913627405, |
| "learning_rate": 3.5163674762407606e-05, |
| "loss": 0.3613, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.1082621082621082, |
| "grad_norm": 0.21930209265394288, |
| "learning_rate": 3.511087645195354e-05, |
| "loss": 0.3656, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.2443453451193516, |
| "learning_rate": 3.505807814149947e-05, |
| "loss": 0.3569, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1139601139601139, |
| "grad_norm": 0.23464081224607158, |
| "learning_rate": 3.500527983104541e-05, |
| "loss": 0.3733, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.1168091168091168, |
| "grad_norm": 0.23312948395717237, |
| "learning_rate": 3.495248152059134e-05, |
| "loss": 0.3695, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.1196581196581197, |
| "grad_norm": 0.20794997263096304, |
| "learning_rate": 3.489968321013728e-05, |
| "loss": 0.3573, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.1225071225071226, |
| "grad_norm": 0.26627834231921077, |
| "learning_rate": 3.484688489968321e-05, |
| "loss": 0.3801, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.1253561253561253, |
| "grad_norm": 0.210991273677547, |
| "learning_rate": 3.479408658922915e-05, |
| "loss": 0.3447, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.1282051282051282, |
| "grad_norm": 0.23724562353300943, |
| "learning_rate": 3.474128827877508e-05, |
| "loss": 0.356, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.131054131054131, |
| "grad_norm": 0.2629540478309352, |
| "learning_rate": 3.468848996832101e-05, |
| "loss": 0.3808, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.133903133903134, |
| "grad_norm": 0.21190850006514703, |
| "learning_rate": 3.463569165786695e-05, |
| "loss": 0.3606, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.1367521367521367, |
| "grad_norm": 0.22884348327135656, |
| "learning_rate": 3.4582893347412886e-05, |
| "loss": 0.3612, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.1396011396011396, |
| "grad_norm": 0.2387204757429875, |
| "learning_rate": 3.453009503695882e-05, |
| "loss": 0.3663, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1424501424501425, |
| "grad_norm": 0.2531987312790015, |
| "learning_rate": 3.447729672650475e-05, |
| "loss": 0.3708, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.1452991452991452, |
| "grad_norm": 0.23529785297450195, |
| "learning_rate": 3.442449841605069e-05, |
| "loss": 0.3908, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.1481481481481481, |
| "grad_norm": 0.21187095337907066, |
| "learning_rate": 3.4371700105596624e-05, |
| "loss": 0.3746, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.150997150997151, |
| "grad_norm": 0.20927824341881443, |
| "learning_rate": 3.4318901795142554e-05, |
| "loss": 0.3744, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 0.23470612919324702, |
| "learning_rate": 3.42661034846885e-05, |
| "loss": 0.3546, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.1566951566951567, |
| "grad_norm": 0.20792179111583126, |
| "learning_rate": 3.421330517423443e-05, |
| "loss": 0.3711, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.1595441595441596, |
| "grad_norm": 0.2426450060857107, |
| "learning_rate": 3.416050686378036e-05, |
| "loss": 0.3446, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.1623931623931625, |
| "grad_norm": 0.2583692183849906, |
| "learning_rate": 3.410770855332629e-05, |
| "loss": 0.3725, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.1652421652421652, |
| "grad_norm": 0.24566054945662688, |
| "learning_rate": 3.405491024287223e-05, |
| "loss": 0.3802, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.168091168091168, |
| "grad_norm": 0.26319005374456306, |
| "learning_rate": 3.4002111932418166e-05, |
| "loss": 0.3579, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.170940170940171, |
| "grad_norm": 0.2371042478251275, |
| "learning_rate": 3.3949313621964095e-05, |
| "loss": 0.3713, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.173789173789174, |
| "grad_norm": 0.21150424231515216, |
| "learning_rate": 3.389651531151004e-05, |
| "loss": 0.3617, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.1766381766381766, |
| "grad_norm": 0.2660573468294059, |
| "learning_rate": 3.384371700105597e-05, |
| "loss": 0.3619, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.1794871794871795, |
| "grad_norm": 0.2629889818118525, |
| "learning_rate": 3.3790918690601904e-05, |
| "loss": 0.3729, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.1823361823361824, |
| "grad_norm": 0.21963192309042254, |
| "learning_rate": 3.3738120380147834e-05, |
| "loss": 0.3649, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "grad_norm": 0.2676340624788005, |
| "learning_rate": 3.368532206969377e-05, |
| "loss": 0.3557, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.188034188034188, |
| "grad_norm": 0.21995558755670147, |
| "learning_rate": 3.363252375923971e-05, |
| "loss": 0.3645, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.190883190883191, |
| "grad_norm": 0.21570771890834206, |
| "learning_rate": 3.357972544878564e-05, |
| "loss": 0.3576, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1937321937321936, |
| "grad_norm": 0.26463658898885334, |
| "learning_rate": 3.352692713833158e-05, |
| "loss": 0.3567, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.1965811965811965, |
| "grad_norm": 0.21514849821934176, |
| "learning_rate": 3.347412882787751e-05, |
| "loss": 0.3745, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1994301994301995, |
| "grad_norm": 0.24477901943499678, |
| "learning_rate": 3.3421330517423445e-05, |
| "loss": 0.3733, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.2022792022792024, |
| "grad_norm": 0.21967390180786867, |
| "learning_rate": 3.3368532206969375e-05, |
| "loss": 0.3848, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.205128205128205, |
| "grad_norm": 0.22210804416610816, |
| "learning_rate": 3.331573389651531e-05, |
| "loss": 0.3502, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.207977207977208, |
| "grad_norm": 0.23341712719478797, |
| "learning_rate": 3.326293558606125e-05, |
| "loss": 0.3788, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.210826210826211, |
| "grad_norm": 0.22474183284157034, |
| "learning_rate": 3.3210137275607184e-05, |
| "loss": 0.3739, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.2136752136752136, |
| "grad_norm": 0.20810663859008569, |
| "learning_rate": 3.315733896515312e-05, |
| "loss": 0.3575, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.2165242165242165, |
| "grad_norm": 0.23332852349670008, |
| "learning_rate": 3.310454065469905e-05, |
| "loss": 0.3431, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.2193732193732194, |
| "grad_norm": 0.2158486368866276, |
| "learning_rate": 3.305174234424499e-05, |
| "loss": 0.3579, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.23876477976217, |
| "learning_rate": 3.2998944033790916e-05, |
| "loss": 0.3748, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.225071225071225, |
| "grad_norm": 0.22671754097370178, |
| "learning_rate": 3.294614572333686e-05, |
| "loss": 0.3733, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.227920227920228, |
| "grad_norm": 0.20882908615979884, |
| "learning_rate": 3.289334741288279e-05, |
| "loss": 0.3478, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 0.22921490176990594, |
| "learning_rate": 3.2840549102428725e-05, |
| "loss": 0.3947, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.2336182336182335, |
| "grad_norm": 0.24877033092993325, |
| "learning_rate": 3.278775079197466e-05, |
| "loss": 0.3587, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.2364672364672364, |
| "grad_norm": 0.21983363052940397, |
| "learning_rate": 3.273495248152059e-05, |
| "loss": 0.3621, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.2393162393162394, |
| "grad_norm": 0.23244050435128452, |
| "learning_rate": 3.268215417106653e-05, |
| "loss": 0.3414, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.242165242165242, |
| "grad_norm": 0.3093091073113401, |
| "learning_rate": 3.262935586061246e-05, |
| "loss": 0.3708, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.245014245014245, |
| "grad_norm": 0.21362305859740863, |
| "learning_rate": 3.25765575501584e-05, |
| "loss": 0.3516, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.2478632478632479, |
| "grad_norm": 0.2454599928955601, |
| "learning_rate": 3.252375923970433e-05, |
| "loss": 0.3692, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.2507122507122508, |
| "grad_norm": 0.25257783308579096, |
| "learning_rate": 3.2470960929250267e-05, |
| "loss": 0.3522, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.2535612535612537, |
| "grad_norm": 0.24797928773992586, |
| "learning_rate": 3.2418162618796196e-05, |
| "loss": 0.3681, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2564102564102564, |
| "grad_norm": 0.2276797048697118, |
| "learning_rate": 3.236536430834213e-05, |
| "loss": 0.3419, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.2592592592592593, |
| "grad_norm": 0.2646126048006256, |
| "learning_rate": 3.231256599788807e-05, |
| "loss": 0.3834, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.2621082621082622, |
| "grad_norm": 0.23971069233365036, |
| "learning_rate": 3.2259767687434005e-05, |
| "loss": 0.3496, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.264957264957265, |
| "grad_norm": 0.21565481165037637, |
| "learning_rate": 3.220696937697994e-05, |
| "loss": 0.3621, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.2678062678062678, |
| "grad_norm": 0.2429214837642576, |
| "learning_rate": 3.215417106652587e-05, |
| "loss": 0.3653, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2706552706552707, |
| "grad_norm": 0.23114362643819203, |
| "learning_rate": 3.210137275607181e-05, |
| "loss": 0.3563, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.2735042735042734, |
| "grad_norm": 0.22101693642272935, |
| "learning_rate": 3.204857444561774e-05, |
| "loss": 0.3485, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.2763532763532763, |
| "grad_norm": 0.2483813722230642, |
| "learning_rate": 3.1995776135163674e-05, |
| "loss": 0.3679, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.2792022792022792, |
| "grad_norm": 0.21814879797928116, |
| "learning_rate": 3.194297782470961e-05, |
| "loss": 0.3699, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 0.22150552594897266, |
| "learning_rate": 3.1890179514255547e-05, |
| "loss": 0.3595, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2849002849002849, |
| "grad_norm": 0.22366186353090892, |
| "learning_rate": 3.183738120380148e-05, |
| "loss": 0.3599, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.2877492877492878, |
| "grad_norm": 0.23116335927041887, |
| "learning_rate": 3.178458289334741e-05, |
| "loss": 0.3676, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2905982905982907, |
| "grad_norm": 0.2033308734127439, |
| "learning_rate": 3.173178458289335e-05, |
| "loss": 0.3521, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.2934472934472934, |
| "grad_norm": 0.2165082929329912, |
| "learning_rate": 3.167898627243928e-05, |
| "loss": 0.3542, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2962962962962963, |
| "grad_norm": 0.19988858957987832, |
| "learning_rate": 3.1626187961985215e-05, |
| "loss": 0.3498, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2991452991452992, |
| "grad_norm": 0.2292675192834246, |
| "learning_rate": 3.157338965153115e-05, |
| "loss": 0.3739, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.301994301994302, |
| "grad_norm": 0.2110292978902313, |
| "learning_rate": 3.152059134107709e-05, |
| "loss": 0.3585, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.3048433048433048, |
| "grad_norm": 0.22404470257022518, |
| "learning_rate": 3.1467793030623024e-05, |
| "loss": 0.3448, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 0.244584760285099, |
| "learning_rate": 3.1414994720168954e-05, |
| "loss": 0.371, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.3105413105413106, |
| "grad_norm": 0.2014664128466252, |
| "learning_rate": 3.136219640971489e-05, |
| "loss": 0.3568, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3133903133903133, |
| "grad_norm": 0.23086767835485536, |
| "learning_rate": 3.130939809926082e-05, |
| "loss": 0.359, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.3162393162393162, |
| "grad_norm": 0.23970941434834442, |
| "learning_rate": 3.125659978880676e-05, |
| "loss": 0.3865, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.3190883190883191, |
| "grad_norm": 0.22027324796314413, |
| "learning_rate": 3.120380147835269e-05, |
| "loss": 0.3814, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.3219373219373218, |
| "grad_norm": 0.1997543044951977, |
| "learning_rate": 3.115100316789863e-05, |
| "loss": 0.3612, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.3247863247863247, |
| "grad_norm": 0.2298484065968129, |
| "learning_rate": 3.1098204857444565e-05, |
| "loss": 0.3486, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.3276353276353277, |
| "grad_norm": 0.23459882831026682, |
| "learning_rate": 3.1045406546990495e-05, |
| "loss": 0.3531, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.3304843304843303, |
| "grad_norm": 0.19872573469259944, |
| "learning_rate": 3.099260823653643e-05, |
| "loss": 0.3572, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.22072697065816743, |
| "learning_rate": 3.093980992608237e-05, |
| "loss": 0.3722, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.3361823361823362, |
| "grad_norm": 0.24096874308813088, |
| "learning_rate": 3.0887011615628304e-05, |
| "loss": 0.3596, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.339031339031339, |
| "grad_norm": 0.19914935516155746, |
| "learning_rate": 3.0834213305174234e-05, |
| "loss": 0.3515, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.341880341880342, |
| "grad_norm": 0.2064034362417422, |
| "learning_rate": 3.078141499472017e-05, |
| "loss": 0.3683, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.3447293447293447, |
| "grad_norm": 0.21371146142366845, |
| "learning_rate": 3.0728616684266106e-05, |
| "loss": 0.3517, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.3475783475783476, |
| "grad_norm": 0.21765447741613048, |
| "learning_rate": 3.0675818373812036e-05, |
| "loss": 0.3602, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.3504273504273505, |
| "grad_norm": 0.20977178211422376, |
| "learning_rate": 3.062302006335798e-05, |
| "loss": 0.356, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.3532763532763532, |
| "grad_norm": 0.2204679501825524, |
| "learning_rate": 3.057022175290391e-05, |
| "loss": 0.3638, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3561253561253561, |
| "grad_norm": 0.2351375455226534, |
| "learning_rate": 3.0517423442449845e-05, |
| "loss": 0.3771, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.358974358974359, |
| "grad_norm": 0.32620489900955313, |
| "learning_rate": 3.0464625131995778e-05, |
| "loss": 0.3753, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.3618233618233617, |
| "grad_norm": 0.222755197443211, |
| "learning_rate": 3.041182682154171e-05, |
| "loss": 0.3674, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.3646723646723646, |
| "grad_norm": 0.22289274030678055, |
| "learning_rate": 3.0359028511087644e-05, |
| "loss": 0.3573, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.3675213675213675, |
| "grad_norm": 0.23379350169681865, |
| "learning_rate": 3.0306230200633577e-05, |
| "loss": 0.3653, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3703703703703702, |
| "grad_norm": 0.18992629897318228, |
| "learning_rate": 3.0253431890179517e-05, |
| "loss": 0.3364, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.3732193732193732, |
| "grad_norm": 0.21790569721651318, |
| "learning_rate": 3.020063357972545e-05, |
| "loss": 0.3583, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.376068376068376, |
| "grad_norm": 0.21174281018764105, |
| "learning_rate": 3.0147835269271386e-05, |
| "loss": 0.3525, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.378917378917379, |
| "grad_norm": 0.23195477568030867, |
| "learning_rate": 3.009503695881732e-05, |
| "loss": 0.3603, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.381766381766382, |
| "grad_norm": 0.2185865276436869, |
| "learning_rate": 3.0042238648363252e-05, |
| "loss": 0.3751, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 0.21213207655408392, |
| "learning_rate": 2.9989440337909185e-05, |
| "loss": 0.3695, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.3874643874643875, |
| "grad_norm": 0.21005915264245484, |
| "learning_rate": 2.9936642027455125e-05, |
| "loss": 0.356, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.3903133903133904, |
| "grad_norm": 0.21163776861595562, |
| "learning_rate": 2.9883843717001058e-05, |
| "loss": 0.3709, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.393162393162393, |
| "grad_norm": 0.20028548407646818, |
| "learning_rate": 2.983104540654699e-05, |
| "loss": 0.3677, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.396011396011396, |
| "grad_norm": 0.18880981649360443, |
| "learning_rate": 2.9778247096092927e-05, |
| "loss": 0.349, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.398860398860399, |
| "grad_norm": 0.2234668654648058, |
| "learning_rate": 2.972544878563886e-05, |
| "loss": 0.3663, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.4017094017094016, |
| "grad_norm": 0.21853579314131166, |
| "learning_rate": 2.9672650475184793e-05, |
| "loss": 0.3613, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.4045584045584045, |
| "grad_norm": 0.22246500288700016, |
| "learning_rate": 2.9619852164730733e-05, |
| "loss": 0.3803, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.4074074074074074, |
| "grad_norm": 0.20429826743140633, |
| "learning_rate": 2.9567053854276666e-05, |
| "loss": 0.3658, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.4102564102564101, |
| "grad_norm": 0.2539252714453901, |
| "learning_rate": 2.95142555438226e-05, |
| "loss": 0.3734, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.413105413105413, |
| "grad_norm": 0.2176197906573368, |
| "learning_rate": 2.9461457233368532e-05, |
| "loss": 0.3697, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.415954415954416, |
| "grad_norm": 0.20262652133163678, |
| "learning_rate": 2.940865892291447e-05, |
| "loss": 0.3552, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.4188034188034189, |
| "grad_norm": 0.2307771718731254, |
| "learning_rate": 2.93558606124604e-05, |
| "loss": 0.3483, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.4216524216524218, |
| "grad_norm": 0.19342225282003825, |
| "learning_rate": 2.930306230200634e-05, |
| "loss": 0.3689, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.4245014245014245, |
| "grad_norm": 0.20142188241082173, |
| "learning_rate": 2.9250263991552274e-05, |
| "loss": 0.3515, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4273504273504274, |
| "grad_norm": 0.21412132531144384, |
| "learning_rate": 2.9197465681098207e-05, |
| "loss": 0.3507, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.4301994301994303, |
| "grad_norm": 0.25094985245186724, |
| "learning_rate": 2.914466737064414e-05, |
| "loss": 0.383, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.433048433048433, |
| "grad_norm": 0.21232864199916715, |
| "learning_rate": 2.9091869060190073e-05, |
| "loss": 0.3764, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.435897435897436, |
| "grad_norm": 0.22049102509543148, |
| "learning_rate": 2.903907074973601e-05, |
| "loss": 0.3595, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.4387464387464388, |
| "grad_norm": 0.21682319020739224, |
| "learning_rate": 2.8986272439281943e-05, |
| "loss": 0.3547, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4415954415954415, |
| "grad_norm": 0.23709929673053004, |
| "learning_rate": 2.8933474128827883e-05, |
| "loss": 0.3687, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.2101951752079666, |
| "learning_rate": 2.8880675818373816e-05, |
| "loss": 0.359, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.4472934472934473, |
| "grad_norm": 0.2474601153654613, |
| "learning_rate": 2.882787750791975e-05, |
| "loss": 0.3705, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.45014245014245, |
| "grad_norm": 0.21254139978772235, |
| "learning_rate": 2.877507919746568e-05, |
| "loss": 0.3564, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.452991452991453, |
| "grad_norm": 0.24070048810217534, |
| "learning_rate": 2.8722280887011615e-05, |
| "loss": 0.3706, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4558404558404558, |
| "grad_norm": 0.2987871246913325, |
| "learning_rate": 2.8669482576557548e-05, |
| "loss": 0.3687, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.4586894586894588, |
| "grad_norm": 0.2300676722177236, |
| "learning_rate": 2.8616684266103487e-05, |
| "loss": 0.3647, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 0.26581166943990137, |
| "learning_rate": 2.856388595564942e-05, |
| "loss": 0.372, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.4643874643874644, |
| "grad_norm": 0.3655749618941395, |
| "learning_rate": 2.8511087645195357e-05, |
| "loss": 0.3776, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.4672364672364673, |
| "grad_norm": 0.26115893033445464, |
| "learning_rate": 2.845828933474129e-05, |
| "loss": 0.3645, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.4700854700854702, |
| "grad_norm": 0.21588849574942176, |
| "learning_rate": 2.8405491024287223e-05, |
| "loss": 0.3554, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.4729344729344729, |
| "grad_norm": 0.22519123294647936, |
| "learning_rate": 2.8352692713833156e-05, |
| "loss": 0.3378, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.4757834757834758, |
| "grad_norm": 0.24778294747275895, |
| "learning_rate": 2.8299894403379096e-05, |
| "loss": 0.3738, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.4786324786324787, |
| "grad_norm": 0.2265365912202125, |
| "learning_rate": 2.824709609292503e-05, |
| "loss": 0.3691, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.22460772397621678, |
| "learning_rate": 2.819429778247096e-05, |
| "loss": 0.3691, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4843304843304843, |
| "grad_norm": 0.21608141296269037, |
| "learning_rate": 2.8141499472016898e-05, |
| "loss": 0.3467, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.4871794871794872, |
| "grad_norm": 0.2213366031457417, |
| "learning_rate": 2.808870116156283e-05, |
| "loss": 0.3622, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.49002849002849, |
| "grad_norm": 0.21175641658464092, |
| "learning_rate": 2.8035902851108764e-05, |
| "loss": 0.3667, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.4928774928774928, |
| "grad_norm": 0.2543379886177831, |
| "learning_rate": 2.7983104540654697e-05, |
| "loss": 0.3909, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.4957264957264957, |
| "grad_norm": 0.2394839478000981, |
| "learning_rate": 2.7930306230200637e-05, |
| "loss": 0.3694, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4985754985754987, |
| "grad_norm": 0.2275501360298665, |
| "learning_rate": 2.787750791974657e-05, |
| "loss": 0.3559, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.5014245014245016, |
| "grad_norm": 0.2763856292468043, |
| "learning_rate": 2.7824709609292503e-05, |
| "loss": 0.3508, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.5042735042735043, |
| "grad_norm": 0.23957120121880873, |
| "learning_rate": 2.777191129883844e-05, |
| "loss": 0.3589, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.5071225071225072, |
| "grad_norm": 0.23319500553009573, |
| "learning_rate": 2.7719112988384372e-05, |
| "loss": 0.3703, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.50997150997151, |
| "grad_norm": 0.24921375732341508, |
| "learning_rate": 2.7666314677930305e-05, |
| "loss": 0.359, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5128205128205128, |
| "grad_norm": 0.21008771333534176, |
| "learning_rate": 2.7613516367476245e-05, |
| "loss": 0.3572, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.5156695156695157, |
| "grad_norm": 0.21839097000403634, |
| "learning_rate": 2.7560718057022178e-05, |
| "loss": 0.3657, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.5185185185185186, |
| "grad_norm": 0.22689335860669002, |
| "learning_rate": 2.750791974656811e-05, |
| "loss": 0.3707, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.5213675213675213, |
| "grad_norm": 0.2454238635949199, |
| "learning_rate": 2.7455121436114044e-05, |
| "loss": 0.3665, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.5242165242165242, |
| "grad_norm": 0.21117182341715873, |
| "learning_rate": 2.740232312565998e-05, |
| "loss": 0.3543, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.5270655270655271, |
| "grad_norm": 0.2167859209575041, |
| "learning_rate": 2.7349524815205913e-05, |
| "loss": 0.3648, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.5299145299145298, |
| "grad_norm": 0.25424621395457675, |
| "learning_rate": 2.7296726504751853e-05, |
| "loss": 0.3783, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.5327635327635327, |
| "grad_norm": 0.21880596935336596, |
| "learning_rate": 2.7243928194297786e-05, |
| "loss": 0.3628, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.5356125356125356, |
| "grad_norm": 0.21692887185646115, |
| "learning_rate": 2.719112988384372e-05, |
| "loss": 0.363, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.2575849502562457, |
| "learning_rate": 2.7138331573389652e-05, |
| "loss": 0.369, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5413105413105415, |
| "grad_norm": 0.23885268332551002, |
| "learning_rate": 2.7085533262935585e-05, |
| "loss": 0.3501, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.5441595441595442, |
| "grad_norm": 0.21686275578065198, |
| "learning_rate": 2.703273495248152e-05, |
| "loss": 0.3601, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.547008547008547, |
| "grad_norm": 0.233651783121331, |
| "learning_rate": 2.6979936642027458e-05, |
| "loss": 0.3629, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.54985754985755, |
| "grad_norm": 0.23142549801608311, |
| "learning_rate": 2.6927138331573394e-05, |
| "loss": 0.3756, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.5527065527065527, |
| "grad_norm": 0.23654232452295837, |
| "learning_rate": 2.6874340021119327e-05, |
| "loss": 0.3836, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 0.23955118714830956, |
| "learning_rate": 2.682154171066526e-05, |
| "loss": 0.3599, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.5584045584045585, |
| "grad_norm": 0.22135467807774808, |
| "learning_rate": 2.6768743400211193e-05, |
| "loss": 0.3747, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.5612535612535612, |
| "grad_norm": 0.21590468574896746, |
| "learning_rate": 2.6715945089757126e-05, |
| "loss": 0.3469, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.564102564102564, |
| "grad_norm": 0.24081611853474838, |
| "learning_rate": 2.6663146779303063e-05, |
| "loss": 0.3448, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.566951566951567, |
| "grad_norm": 0.22050273169428228, |
| "learning_rate": 2.6610348468849e-05, |
| "loss": 0.3665, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5698005698005697, |
| "grad_norm": 0.2358630867918145, |
| "learning_rate": 2.6557550158394935e-05, |
| "loss": 0.3693, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.5726495726495726, |
| "grad_norm": 0.2256231077467112, |
| "learning_rate": 2.650475184794087e-05, |
| "loss": 0.3575, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.5754985754985755, |
| "grad_norm": 0.2193287720903084, |
| "learning_rate": 2.64519535374868e-05, |
| "loss": 0.3602, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.5783475783475782, |
| "grad_norm": 0.23352787405471498, |
| "learning_rate": 2.6399155227032734e-05, |
| "loss": 0.3697, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.5811965811965814, |
| "grad_norm": 0.2395424874218671, |
| "learning_rate": 2.6346356916578667e-05, |
| "loss": 0.3635, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.584045584045584, |
| "grad_norm": 0.43346665073783924, |
| "learning_rate": 2.6293558606124607e-05, |
| "loss": 0.3691, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.5868945868945867, |
| "grad_norm": 0.23220070707086665, |
| "learning_rate": 2.624076029567054e-05, |
| "loss": 0.3731, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.5897435897435899, |
| "grad_norm": 0.20943796833119865, |
| "learning_rate": 2.6187961985216473e-05, |
| "loss": 0.3698, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.5925925925925926, |
| "grad_norm": 0.2257633484876588, |
| "learning_rate": 2.613516367476241e-05, |
| "loss": 0.3635, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.5954415954415955, |
| "grad_norm": 0.22794583966236925, |
| "learning_rate": 2.6082365364308343e-05, |
| "loss": 0.3619, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5982905982905984, |
| "grad_norm": 0.21844307238312177, |
| "learning_rate": 2.6029567053854276e-05, |
| "loss": 0.3735, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.601139601139601, |
| "grad_norm": 0.25182987504775967, |
| "learning_rate": 2.5976768743400215e-05, |
| "loss": 0.3657, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.603988603988604, |
| "grad_norm": 0.2328339817261743, |
| "learning_rate": 2.5923970432946148e-05, |
| "loss": 0.3617, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.606837606837607, |
| "grad_norm": 0.2198747745202425, |
| "learning_rate": 2.587117212249208e-05, |
| "loss": 0.3567, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.6096866096866096, |
| "grad_norm": 0.22458365808076017, |
| "learning_rate": 2.5818373812038014e-05, |
| "loss": 0.3633, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.6125356125356125, |
| "grad_norm": 0.2050810486479959, |
| "learning_rate": 2.576557550158395e-05, |
| "loss": 0.3662, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.23109142394514665, |
| "learning_rate": 2.5712777191129884e-05, |
| "loss": 0.377, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.618233618233618, |
| "grad_norm": 0.20488685566526976, |
| "learning_rate": 2.5659978880675823e-05, |
| "loss": 0.365, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.6210826210826212, |
| "grad_norm": 0.22434853437066676, |
| "learning_rate": 2.5607180570221756e-05, |
| "loss": 0.359, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.623931623931624, |
| "grad_norm": 0.22035628663691662, |
| "learning_rate": 2.555438225976769e-05, |
| "loss": 0.3613, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6267806267806266, |
| "grad_norm": 0.2296131512377323, |
| "learning_rate": 2.5501583949313622e-05, |
| "loss": 0.3583, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.6296296296296298, |
| "grad_norm": 0.22699499325399286, |
| "learning_rate": 2.5448785638859555e-05, |
| "loss": 0.3615, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.6324786324786325, |
| "grad_norm": 0.23137613302922347, |
| "learning_rate": 2.5395987328405492e-05, |
| "loss": 0.3493, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.6353276353276354, |
| "grad_norm": 0.23513034060758498, |
| "learning_rate": 2.5343189017951425e-05, |
| "loss": 0.3536, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.6381766381766383, |
| "grad_norm": 0.20556474209048517, |
| "learning_rate": 2.5290390707497365e-05, |
| "loss": 0.3494, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.641025641025641, |
| "grad_norm": 0.20951535308942912, |
| "learning_rate": 2.5237592397043298e-05, |
| "loss": 0.3608, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.6438746438746439, |
| "grad_norm": 0.21051061589323755, |
| "learning_rate": 2.518479408658923e-05, |
| "loss": 0.343, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.6467236467236468, |
| "grad_norm": 0.20389495741565292, |
| "learning_rate": 2.5131995776135164e-05, |
| "loss": 0.3515, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.6495726495726495, |
| "grad_norm": 0.20666357754178133, |
| "learning_rate": 2.5079197465681097e-05, |
| "loss": 0.3637, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.6524216524216524, |
| "grad_norm": 0.2124678103193664, |
| "learning_rate": 2.5026399155227033e-05, |
| "loss": 0.3519, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6552706552706553, |
| "grad_norm": 0.21745017494419377, |
| "learning_rate": 2.497360084477297e-05, |
| "loss": 0.3594, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.658119658119658, |
| "grad_norm": 0.1903592780911563, |
| "learning_rate": 2.4920802534318902e-05, |
| "loss": 0.336, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.660968660968661, |
| "grad_norm": 0.23741085468942869, |
| "learning_rate": 2.486800422386484e-05, |
| "loss": 0.3543, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.6638176638176638, |
| "grad_norm": 0.20157828540341033, |
| "learning_rate": 2.4815205913410772e-05, |
| "loss": 0.3342, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.2204911302874124, |
| "learning_rate": 2.4762407602956705e-05, |
| "loss": 0.3586, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.6695156695156697, |
| "grad_norm": 0.21109017199979024, |
| "learning_rate": 2.470960929250264e-05, |
| "loss": 0.3646, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.6723646723646723, |
| "grad_norm": 0.20646510827582915, |
| "learning_rate": 2.4656810982048574e-05, |
| "loss": 0.3563, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.6752136752136753, |
| "grad_norm": 0.21584768343148578, |
| "learning_rate": 2.460401267159451e-05, |
| "loss": 0.3535, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.6780626780626782, |
| "grad_norm": 0.23185019886698816, |
| "learning_rate": 2.4551214361140447e-05, |
| "loss": 0.3697, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.6809116809116809, |
| "grad_norm": 0.21268814828277405, |
| "learning_rate": 2.449841605068638e-05, |
| "loss": 0.3551, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6837606837606838, |
| "grad_norm": 0.21604754434232787, |
| "learning_rate": 2.4445617740232313e-05, |
| "loss": 0.3655, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.6866096866096867, |
| "grad_norm": 0.22278622809811766, |
| "learning_rate": 2.439281942977825e-05, |
| "loss": 0.3538, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.6894586894586894, |
| "grad_norm": 0.2052145744246248, |
| "learning_rate": 2.4340021119324182e-05, |
| "loss": 0.356, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 0.19837577056881597, |
| "learning_rate": 2.4287222808870115e-05, |
| "loss": 0.356, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.6951566951566952, |
| "grad_norm": 0.2605768702541718, |
| "learning_rate": 2.4234424498416052e-05, |
| "loss": 0.376, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.698005698005698, |
| "grad_norm": 0.2150615912164544, |
| "learning_rate": 2.4181626187961988e-05, |
| "loss": 0.3442, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.7008547008547008, |
| "grad_norm": 0.20499211512651233, |
| "learning_rate": 2.412882787750792e-05, |
| "loss": 0.3699, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.7037037037037037, |
| "grad_norm": 0.21382275219015714, |
| "learning_rate": 2.4076029567053857e-05, |
| "loss": 0.3565, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.7065527065527064, |
| "grad_norm": 0.20994705780047032, |
| "learning_rate": 2.402323125659979e-05, |
| "loss": 0.3598, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "grad_norm": 0.20135559643209863, |
| "learning_rate": 2.3970432946145723e-05, |
| "loss": 0.3596, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7122507122507122, |
| "grad_norm": 0.20036899067341168, |
| "learning_rate": 2.391763463569166e-05, |
| "loss": 0.3569, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.7150997150997151, |
| "grad_norm": 0.21002141879928451, |
| "learning_rate": 2.3864836325237593e-05, |
| "loss": 0.3654, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.717948717948718, |
| "grad_norm": 0.22764067995733198, |
| "learning_rate": 2.381203801478353e-05, |
| "loss": 0.3625, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.7207977207977208, |
| "grad_norm": 0.19291409387702949, |
| "learning_rate": 2.3759239704329462e-05, |
| "loss": 0.3712, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.7236467236467237, |
| "grad_norm": 0.19990638382401998, |
| "learning_rate": 2.37064413938754e-05, |
| "loss": 0.3466, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.7264957264957266, |
| "grad_norm": 0.21172503078878085, |
| "learning_rate": 2.365364308342133e-05, |
| "loss": 0.373, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.7293447293447293, |
| "grad_norm": 0.21340989500402005, |
| "learning_rate": 2.3600844772967265e-05, |
| "loss": 0.3607, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.7321937321937322, |
| "grad_norm": 0.21348773962215495, |
| "learning_rate": 2.35480464625132e-05, |
| "loss": 0.3618, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.735042735042735, |
| "grad_norm": 0.21866328753111697, |
| "learning_rate": 2.3495248152059134e-05, |
| "loss": 0.3556, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.7378917378917378, |
| "grad_norm": 0.19763848284771224, |
| "learning_rate": 2.3442449841605067e-05, |
| "loss": 0.3607, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7407407407407407, |
| "grad_norm": 0.2152332174618033, |
| "learning_rate": 2.3389651531151003e-05, |
| "loss": 0.3704, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.7435897435897436, |
| "grad_norm": 0.19269940169730435, |
| "learning_rate": 2.333685322069694e-05, |
| "loss": 0.3638, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.7464387464387463, |
| "grad_norm": 0.1962642279399981, |
| "learning_rate": 2.3284054910242873e-05, |
| "loss": 0.3482, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.7492877492877494, |
| "grad_norm": 0.2364856796721012, |
| "learning_rate": 2.323125659978881e-05, |
| "loss": 0.347, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.7521367521367521, |
| "grad_norm": 0.22509925838340694, |
| "learning_rate": 2.3178458289334742e-05, |
| "loss": 0.3604, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.7549857549857548, |
| "grad_norm": 0.2488709063431033, |
| "learning_rate": 2.3125659978880675e-05, |
| "loss": 0.3657, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.757834757834758, |
| "grad_norm": 0.2398318070372559, |
| "learning_rate": 2.307286166842661e-05, |
| "loss": 0.3629, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.7606837606837606, |
| "grad_norm": 0.22323781791119499, |
| "learning_rate": 2.3020063357972545e-05, |
| "loss": 0.359, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.7635327635327636, |
| "grad_norm": 0.2022321694186785, |
| "learning_rate": 2.296726504751848e-05, |
| "loss": 0.3604, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.7663817663817665, |
| "grad_norm": 0.21110134786679816, |
| "learning_rate": 2.2914466737064417e-05, |
| "loss": 0.3687, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 0.19625112710435982, |
| "learning_rate": 2.286166842661035e-05, |
| "loss": 0.3599, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.772079772079772, |
| "grad_norm": 0.20619851901285047, |
| "learning_rate": 2.2808870116156283e-05, |
| "loss": 0.3723, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.774928774928775, |
| "grad_norm": 0.20469389194337959, |
| "learning_rate": 2.275607180570222e-05, |
| "loss": 0.3661, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.20778072555494564, |
| "learning_rate": 2.2703273495248153e-05, |
| "loss": 0.3585, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.7806267806267806, |
| "grad_norm": 0.19870626339902325, |
| "learning_rate": 2.2650475184794086e-05, |
| "loss": 0.3441, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.7834757834757835, |
| "grad_norm": 0.19114151393977027, |
| "learning_rate": 2.2597676874340022e-05, |
| "loss": 0.3591, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.7863247863247862, |
| "grad_norm": 0.1982172563967693, |
| "learning_rate": 2.254487856388596e-05, |
| "loss": 0.3854, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.7891737891737893, |
| "grad_norm": 0.20351344941450986, |
| "learning_rate": 2.249208025343189e-05, |
| "loss": 0.3822, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.792022792022792, |
| "grad_norm": 0.19819562909909946, |
| "learning_rate": 2.2439281942977825e-05, |
| "loss": 0.3645, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.7948717948717947, |
| "grad_norm": 0.20300969334790384, |
| "learning_rate": 2.238648363252376e-05, |
| "loss": 0.3631, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7977207977207978, |
| "grad_norm": 0.20862867991682074, |
| "learning_rate": 2.2333685322069694e-05, |
| "loss": 0.3619, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.8005698005698005, |
| "grad_norm": 0.21833179546935874, |
| "learning_rate": 2.2280887011615627e-05, |
| "loss": 0.3608, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.8034188034188035, |
| "grad_norm": 0.6005070390326573, |
| "learning_rate": 2.2228088701161563e-05, |
| "loss": 0.3965, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.8062678062678064, |
| "grad_norm": 0.35943818916514836, |
| "learning_rate": 2.21752903907075e-05, |
| "loss": 0.3594, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.809116809116809, |
| "grad_norm": 0.2159038665602852, |
| "learning_rate": 2.2122492080253433e-05, |
| "loss": 0.3755, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.811965811965812, |
| "grad_norm": 0.20549883442788655, |
| "learning_rate": 2.206969376979937e-05, |
| "loss": 0.3451, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.8148148148148149, |
| "grad_norm": 0.20440135853564803, |
| "learning_rate": 2.2016895459345302e-05, |
| "loss": 0.3497, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.8176638176638176, |
| "grad_norm": 0.25722335101398636, |
| "learning_rate": 2.1964097148891235e-05, |
| "loss": 0.3639, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.8205128205128205, |
| "grad_norm": 0.19953052748781105, |
| "learning_rate": 2.191129883843717e-05, |
| "loss": 0.3481, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.8233618233618234, |
| "grad_norm": 0.2341568009909009, |
| "learning_rate": 2.1858500527983104e-05, |
| "loss": 0.3593, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.826210826210826, |
| "grad_norm": 0.21325737340547804, |
| "learning_rate": 2.180570221752904e-05, |
| "loss": 0.3636, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.8290598290598292, |
| "grad_norm": 0.22284714887271365, |
| "learning_rate": 2.1752903907074977e-05, |
| "loss": 0.3797, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.831908831908832, |
| "grad_norm": 0.20422527863661058, |
| "learning_rate": 2.170010559662091e-05, |
| "loss": 0.3527, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.8347578347578346, |
| "grad_norm": 0.22652999934815843, |
| "learning_rate": 2.1647307286166843e-05, |
| "loss": 0.3717, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.8376068376068377, |
| "grad_norm": 0.24779116320854475, |
| "learning_rate": 2.159450897571278e-05, |
| "loss": 0.3706, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8404558404558404, |
| "grad_norm": 0.21671225191720797, |
| "learning_rate": 2.1541710665258713e-05, |
| "loss": 0.3386, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.8433048433048433, |
| "grad_norm": 0.22555743360580618, |
| "learning_rate": 2.1488912354804646e-05, |
| "loss": 0.3804, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 0.23617998499869738, |
| "learning_rate": 2.1436114044350582e-05, |
| "loss": 0.3723, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.849002849002849, |
| "grad_norm": 0.23014596171154433, |
| "learning_rate": 2.138331573389652e-05, |
| "loss": 0.3526, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 0.22916111003439332, |
| "learning_rate": 2.133051742344245e-05, |
| "loss": 0.3451, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8547008547008548, |
| "grad_norm": 0.19810306429063848, |
| "learning_rate": 2.1277719112988384e-05, |
| "loss": 0.349, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.8575498575498575, |
| "grad_norm": 0.2202335756589606, |
| "learning_rate": 2.122492080253432e-05, |
| "loss": 0.3653, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.8603988603988604, |
| "grad_norm": 0.19827409095956613, |
| "learning_rate": 2.1172122492080254e-05, |
| "loss": 0.3457, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.8632478632478633, |
| "grad_norm": 0.19935795905418485, |
| "learning_rate": 2.1119324181626187e-05, |
| "loss": 0.3554, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.866096866096866, |
| "grad_norm": 0.20847219512744983, |
| "learning_rate": 2.1066525871172123e-05, |
| "loss": 0.351, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.868945868945869, |
| "grad_norm": 0.2156361373347463, |
| "learning_rate": 2.1013727560718056e-05, |
| "loss": 0.3618, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.8717948717948718, |
| "grad_norm": 0.20100877639931164, |
| "learning_rate": 2.0960929250263993e-05, |
| "loss": 0.3527, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.8746438746438745, |
| "grad_norm": 0.21961338320882828, |
| "learning_rate": 2.090813093980993e-05, |
| "loss": 0.3655, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.8774928774928776, |
| "grad_norm": 0.22970963785057988, |
| "learning_rate": 2.0855332629355862e-05, |
| "loss": 0.3578, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.8803418803418803, |
| "grad_norm": 0.1985917394092499, |
| "learning_rate": 2.0802534318901795e-05, |
| "loss": 0.3555, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8831908831908832, |
| "grad_norm": 0.19782225715456844, |
| "learning_rate": 2.074973600844773e-05, |
| "loss": 0.3544, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.8860398860398861, |
| "grad_norm": 0.24471883420488305, |
| "learning_rate": 2.0696937697993664e-05, |
| "loss": 0.3689, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.22271236910553516, |
| "learning_rate": 2.0644139387539597e-05, |
| "loss": 0.3871, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.8917378917378918, |
| "grad_norm": 0.19560513729734122, |
| "learning_rate": 2.0591341077085534e-05, |
| "loss": 0.3561, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.8945868945868947, |
| "grad_norm": 0.21617290509605866, |
| "learning_rate": 2.053854276663147e-05, |
| "loss": 0.3286, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.8974358974358974, |
| "grad_norm": 0.22798617788173128, |
| "learning_rate": 2.0485744456177403e-05, |
| "loss": 0.3688, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.9002849002849003, |
| "grad_norm": 0.21021348614356922, |
| "learning_rate": 2.043294614572334e-05, |
| "loss": 0.3562, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.9031339031339032, |
| "grad_norm": 0.20996089592223216, |
| "learning_rate": 2.0380147835269273e-05, |
| "loss": 0.361, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.9059829059829059, |
| "grad_norm": 0.23565900871607817, |
| "learning_rate": 2.0327349524815206e-05, |
| "loss": 0.3754, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.9088319088319088, |
| "grad_norm": 0.18858686585228232, |
| "learning_rate": 2.0274551214361142e-05, |
| "loss": 0.3516, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.9116809116809117, |
| "grad_norm": 0.1893165779352585, |
| "learning_rate": 2.0221752903907075e-05, |
| "loss": 0.3366, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.9145299145299144, |
| "grad_norm": 0.21668943186182438, |
| "learning_rate": 2.016895459345301e-05, |
| "loss": 0.3502, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.9173789173789175, |
| "grad_norm": 0.20149473922057556, |
| "learning_rate": 2.0116156282998944e-05, |
| "loss": 0.3615, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.9202279202279202, |
| "grad_norm": 0.25869865465945596, |
| "learning_rate": 2.006335797254488e-05, |
| "loss": 0.3691, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.2036066715728498, |
| "learning_rate": 2.0010559662090814e-05, |
| "loss": 0.3502, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.925925925925926, |
| "grad_norm": 0.2004788360097017, |
| "learning_rate": 1.9957761351636747e-05, |
| "loss": 0.3594, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.9287749287749287, |
| "grad_norm": 0.21360843600832238, |
| "learning_rate": 1.9904963041182683e-05, |
| "loss": 0.3585, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.9316239316239316, |
| "grad_norm": 0.19762168727225163, |
| "learning_rate": 1.9852164730728616e-05, |
| "loss": 0.3675, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.9344729344729346, |
| "grad_norm": 0.20229203944486152, |
| "learning_rate": 1.9799366420274552e-05, |
| "loss": 0.355, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.9373219373219372, |
| "grad_norm": 0.2022552654861909, |
| "learning_rate": 1.974656810982049e-05, |
| "loss": 0.3547, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9401709401709402, |
| "grad_norm": 0.2217881214201902, |
| "learning_rate": 1.9693769799366422e-05, |
| "loss": 0.3662, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.943019943019943, |
| "grad_norm": 0.21089292714234747, |
| "learning_rate": 1.9640971488912355e-05, |
| "loss": 0.3632, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.9458689458689458, |
| "grad_norm": 0.21183549408551403, |
| "learning_rate": 1.958817317845829e-05, |
| "loss": 0.3685, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.9487179487179487, |
| "grad_norm": 0.22399399219360525, |
| "learning_rate": 1.9535374868004224e-05, |
| "loss": 0.389, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.9515669515669516, |
| "grad_norm": 0.23478634085970435, |
| "learning_rate": 1.9482576557550157e-05, |
| "loss": 0.349, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9544159544159543, |
| "grad_norm": 0.18663083025931687, |
| "learning_rate": 1.9429778247096094e-05, |
| "loss": 0.3638, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.9572649572649574, |
| "grad_norm": 0.20785309716673614, |
| "learning_rate": 1.937697993664203e-05, |
| "loss": 0.3529, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.96011396011396, |
| "grad_norm": 0.22463804321134564, |
| "learning_rate": 1.9324181626187963e-05, |
| "loss": 0.3594, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.9629629629629628, |
| "grad_norm": 0.1906326731349908, |
| "learning_rate": 1.92713833157339e-05, |
| "loss": 0.3502, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.965811965811966, |
| "grad_norm": 0.2066700222331773, |
| "learning_rate": 1.9218585005279832e-05, |
| "loss": 0.3672, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9686609686609686, |
| "grad_norm": 0.1998642686060524, |
| "learning_rate": 1.9165786694825765e-05, |
| "loss": 0.364, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.9715099715099715, |
| "grad_norm": 0.1887608555452163, |
| "learning_rate": 1.9112988384371702e-05, |
| "loss": 0.3509, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.9743589743589745, |
| "grad_norm": 0.196768422003315, |
| "learning_rate": 1.9060190073917635e-05, |
| "loss": 0.342, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.9772079772079771, |
| "grad_norm": 0.19163494415125434, |
| "learning_rate": 1.900739176346357e-05, |
| "loss": 0.3461, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.98005698005698, |
| "grad_norm": 0.22754534461400272, |
| "learning_rate": 1.8954593453009508e-05, |
| "loss": 0.3764, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.982905982905983, |
| "grad_norm": 0.23834935791341716, |
| "learning_rate": 1.890179514255544e-05, |
| "loss": 0.3484, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.9857549857549857, |
| "grad_norm": 0.21264794974633236, |
| "learning_rate": 1.8848996832101374e-05, |
| "loss": 0.368, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.9886039886039886, |
| "grad_norm": 0.21453842577290422, |
| "learning_rate": 1.8796198521647307e-05, |
| "loss": 0.3593, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.9914529914529915, |
| "grad_norm": 0.20799231966171178, |
| "learning_rate": 1.8743400211193243e-05, |
| "loss": 0.3523, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.9943019943019942, |
| "grad_norm": 0.2080913590301314, |
| "learning_rate": 1.8690601900739176e-05, |
| "loss": 0.3768, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9971509971509973, |
| "grad_norm": 0.20083753257614662, |
| "learning_rate": 1.863780359028511e-05, |
| "loss": 0.373, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.25026072457519927, |
| "learning_rate": 1.8585005279831045e-05, |
| "loss": 0.3476, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.0028490028490027, |
| "grad_norm": 0.31749430934031786, |
| "learning_rate": 1.8532206969376982e-05, |
| "loss": 0.2939, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.005698005698006, |
| "grad_norm": 0.24903718426554183, |
| "learning_rate": 1.8479408658922915e-05, |
| "loss": 0.2786, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.0085470085470085, |
| "grad_norm": 0.32095802283737634, |
| "learning_rate": 1.842661034846885e-05, |
| "loss": 0.2838, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.011396011396011, |
| "grad_norm": 0.29328887995626335, |
| "learning_rate": 1.8373812038014784e-05, |
| "loss": 0.2801, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.0142450142450143, |
| "grad_norm": 0.20925973424050445, |
| "learning_rate": 1.8321013727560717e-05, |
| "loss": 0.2631, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.017094017094017, |
| "grad_norm": 0.28435426480239495, |
| "learning_rate": 1.8268215417106653e-05, |
| "loss": 0.2843, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.0199430199430197, |
| "grad_norm": 0.2500899473941078, |
| "learning_rate": 1.8215417106652586e-05, |
| "loss": 0.2874, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.022792022792023, |
| "grad_norm": 0.2642215470553276, |
| "learning_rate": 1.8162618796198523e-05, |
| "loss": 0.2812, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.0256410256410255, |
| "grad_norm": 0.2787623192910837, |
| "learning_rate": 1.810982048574446e-05, |
| "loss": 0.2852, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.0284900284900287, |
| "grad_norm": 0.2532708996249064, |
| "learning_rate": 1.8057022175290392e-05, |
| "loss": 0.2767, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.0313390313390314, |
| "grad_norm": 0.24900158666379366, |
| "learning_rate": 1.8004223864836325e-05, |
| "loss": 0.2737, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.034188034188034, |
| "grad_norm": 0.2635713749224828, |
| "learning_rate": 1.795142555438226e-05, |
| "loss": 0.2705, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.037037037037037, |
| "grad_norm": 0.20086792181553093, |
| "learning_rate": 1.7898627243928195e-05, |
| "loss": 0.2798, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.03988603988604, |
| "grad_norm": 0.2432885568665745, |
| "learning_rate": 1.7845828933474128e-05, |
| "loss": 0.281, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.0427350427350426, |
| "grad_norm": 0.21507703672726577, |
| "learning_rate": 1.7793030623020064e-05, |
| "loss": 0.2762, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.0455840455840457, |
| "grad_norm": 0.21933858359248162, |
| "learning_rate": 1.7740232312566e-05, |
| "loss": 0.2729, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.0484330484330484, |
| "grad_norm": 0.22017588537800314, |
| "learning_rate": 1.7687434002111933e-05, |
| "loss": 0.2855, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.051282051282051, |
| "grad_norm": 0.20337914754232117, |
| "learning_rate": 1.7634635691657866e-05, |
| "loss": 0.2715, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0541310541310542, |
| "grad_norm": 0.19719485220504646, |
| "learning_rate": 1.7581837381203803e-05, |
| "loss": 0.2611, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.056980056980057, |
| "grad_norm": 0.2031620606156548, |
| "learning_rate": 1.7529039070749736e-05, |
| "loss": 0.2853, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.0598290598290596, |
| "grad_norm": 0.22411987580889667, |
| "learning_rate": 1.747624076029567e-05, |
| "loss": 0.2807, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.0626780626780628, |
| "grad_norm": 0.1997733419374099, |
| "learning_rate": 1.7423442449841605e-05, |
| "loss": 0.2749, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.0655270655270654, |
| "grad_norm": 0.20442777603565418, |
| "learning_rate": 1.737064413938754e-05, |
| "loss": 0.2709, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.0683760683760686, |
| "grad_norm": 0.2053651992461902, |
| "learning_rate": 1.7317845828933475e-05, |
| "loss": 0.2739, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.0712250712250713, |
| "grad_norm": 0.2041941282028662, |
| "learning_rate": 1.726504751847941e-05, |
| "loss": 0.2787, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.074074074074074, |
| "grad_norm": 0.2289119309925019, |
| "learning_rate": 1.7212249208025344e-05, |
| "loss": 0.276, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 0.20075289027919085, |
| "learning_rate": 1.7159450897571277e-05, |
| "loss": 0.2754, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.07977207977208, |
| "grad_norm": 0.1910485513723653, |
| "learning_rate": 1.7106652587117213e-05, |
| "loss": 0.274, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.0826210826210825, |
| "grad_norm": 0.2075379358650543, |
| "learning_rate": 1.7053854276663146e-05, |
| "loss": 0.2724, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.0854700854700856, |
| "grad_norm": 0.21528112160170912, |
| "learning_rate": 1.7001055966209083e-05, |
| "loss": 0.2726, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.0883190883190883, |
| "grad_norm": 0.2110995481791853, |
| "learning_rate": 1.694825765575502e-05, |
| "loss": 0.2772, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.091168091168091, |
| "grad_norm": 0.1829351870654865, |
| "learning_rate": 1.6895459345300952e-05, |
| "loss": 0.2704, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.094017094017094, |
| "grad_norm": 0.19540194440796962, |
| "learning_rate": 1.6842661034846885e-05, |
| "loss": 0.2658, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.096866096866097, |
| "grad_norm": 0.21657896164501325, |
| "learning_rate": 1.678986272439282e-05, |
| "loss": 0.2746, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.0997150997150995, |
| "grad_norm": 0.19563365784205655, |
| "learning_rate": 1.6737064413938755e-05, |
| "loss": 0.2652, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.1025641025641026, |
| "grad_norm": 0.19212356110155093, |
| "learning_rate": 1.6684266103484688e-05, |
| "loss": 0.263, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.1054131054131053, |
| "grad_norm": 0.20460727383155605, |
| "learning_rate": 1.6631467793030624e-05, |
| "loss": 0.2742, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.1082621082621085, |
| "grad_norm": 0.21427650674256282, |
| "learning_rate": 1.657866948257656e-05, |
| "loss": 0.2825, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 0.20672190281174416, |
| "learning_rate": 1.6525871172122493e-05, |
| "loss": 0.2839, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.113960113960114, |
| "grad_norm": 0.20211237414090671, |
| "learning_rate": 1.647307286166843e-05, |
| "loss": 0.2826, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.116809116809117, |
| "grad_norm": 0.19195099076895822, |
| "learning_rate": 1.6420274551214363e-05, |
| "loss": 0.2759, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.1196581196581197, |
| "grad_norm": 0.20560314548089362, |
| "learning_rate": 1.6367476240760296e-05, |
| "loss": 0.2681, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.1225071225071224, |
| "grad_norm": 0.21253013838923676, |
| "learning_rate": 1.631467793030623e-05, |
| "loss": 0.2691, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.1253561253561255, |
| "grad_norm": 0.19213504270638337, |
| "learning_rate": 1.6261879619852165e-05, |
| "loss": 0.2709, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.128205128205128, |
| "grad_norm": 0.19810360169754043, |
| "learning_rate": 1.6209081309398098e-05, |
| "loss": 0.2681, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.131054131054131, |
| "grad_norm": 0.8925954469518941, |
| "learning_rate": 1.6156282998944034e-05, |
| "loss": 0.3358, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.133903133903134, |
| "grad_norm": 0.21118384990812827, |
| "learning_rate": 1.610348468848997e-05, |
| "loss": 0.2834, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.1367521367521367, |
| "grad_norm": 0.22061422429768562, |
| "learning_rate": 1.6050686378035904e-05, |
| "loss": 0.2782, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.1396011396011394, |
| "grad_norm": 0.2124786481646926, |
| "learning_rate": 1.5997888067581837e-05, |
| "loss": 0.2998, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.1424501424501425, |
| "grad_norm": 0.20503289997556293, |
| "learning_rate": 1.5945089757127773e-05, |
| "loss": 0.2807, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.1452991452991452, |
| "grad_norm": 0.22522249962157764, |
| "learning_rate": 1.5892291446673706e-05, |
| "loss": 0.2806, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.148148148148148, |
| "grad_norm": 0.22270532746805854, |
| "learning_rate": 1.583949313621964e-05, |
| "loss": 0.273, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.150997150997151, |
| "grad_norm": 0.19772734044545245, |
| "learning_rate": 1.5786694825765576e-05, |
| "loss": 0.272, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 0.1967943644277297, |
| "learning_rate": 1.5733896515311512e-05, |
| "loss": 0.2753, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.156695156695157, |
| "grad_norm": 0.19154676700453233, |
| "learning_rate": 1.5681098204857445e-05, |
| "loss": 0.2709, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.1595441595441596, |
| "grad_norm": 0.2221016196227244, |
| "learning_rate": 1.562829989440338e-05, |
| "loss": 0.2823, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.1623931623931623, |
| "grad_norm": 0.1937702666618624, |
| "learning_rate": 1.5575501583949314e-05, |
| "loss": 0.2645, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.1652421652421654, |
| "grad_norm": 0.1969269538576539, |
| "learning_rate": 1.5522703273495247e-05, |
| "loss": 0.2761, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.168091168091168, |
| "grad_norm": 0.20395502697309204, |
| "learning_rate": 1.5469904963041184e-05, |
| "loss": 0.2792, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.1709401709401708, |
| "grad_norm": 0.20751492020213136, |
| "learning_rate": 1.5417106652587117e-05, |
| "loss": 0.2623, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.173789173789174, |
| "grad_norm": 0.19379484649828319, |
| "learning_rate": 1.5364308342133053e-05, |
| "loss": 0.2606, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.1766381766381766, |
| "grad_norm": 0.18718035792100948, |
| "learning_rate": 1.531151003167899e-05, |
| "loss": 0.2751, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.1794871794871793, |
| "grad_norm": 0.19066401480212206, |
| "learning_rate": 1.5258711721224923e-05, |
| "loss": 0.2732, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.1823361823361824, |
| "grad_norm": 0.20925353974218774, |
| "learning_rate": 1.5205913410770856e-05, |
| "loss": 0.2881, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.185185185185185, |
| "grad_norm": 0.20547467764539895, |
| "learning_rate": 1.5153115100316789e-05, |
| "loss": 0.2638, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.1880341880341883, |
| "grad_norm": 0.19527197604782076, |
| "learning_rate": 1.5100316789862725e-05, |
| "loss": 0.2783, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.190883190883191, |
| "grad_norm": 0.1942190592627724, |
| "learning_rate": 1.504751847940866e-05, |
| "loss": 0.2709, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.1937321937321936, |
| "grad_norm": 0.2103546914669498, |
| "learning_rate": 1.4994720168954593e-05, |
| "loss": 0.2703, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.1965811965811968, |
| "grad_norm": 0.20158953401192725, |
| "learning_rate": 1.4941921858500529e-05, |
| "loss": 0.2786, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.1994301994301995, |
| "grad_norm": 0.18874398829424063, |
| "learning_rate": 1.4889123548046464e-05, |
| "loss": 0.2625, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.202279202279202, |
| "grad_norm": 0.1919940823226646, |
| "learning_rate": 1.4836325237592397e-05, |
| "loss": 0.2819, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.2051282051282053, |
| "grad_norm": 0.20270992368788238, |
| "learning_rate": 1.4783526927138333e-05, |
| "loss": 0.2746, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.207977207977208, |
| "grad_norm": 0.19972156431692997, |
| "learning_rate": 1.4730728616684266e-05, |
| "loss": 0.277, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.2108262108262107, |
| "grad_norm": 0.2060428169408646, |
| "learning_rate": 1.46779303062302e-05, |
| "loss": 0.2866, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.213675213675214, |
| "grad_norm": 0.2180940293343127, |
| "learning_rate": 1.4625131995776137e-05, |
| "loss": 0.2664, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.2165242165242165, |
| "grad_norm": 0.19764083611553848, |
| "learning_rate": 1.457233368532207e-05, |
| "loss": 0.2698, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.219373219373219, |
| "grad_norm": 0.18295950495897126, |
| "learning_rate": 1.4519535374868005e-05, |
| "loss": 0.266, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.20138778009514838, |
| "learning_rate": 1.4466737064413941e-05, |
| "loss": 0.2732, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.225071225071225, |
| "grad_norm": 0.21789436891588496, |
| "learning_rate": 1.4413938753959874e-05, |
| "loss": 0.2842, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.2279202279202277, |
| "grad_norm": 0.19845024094048266, |
| "learning_rate": 1.4361140443505807e-05, |
| "loss": 0.2617, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 0.20136450371041287, |
| "learning_rate": 1.4308342133051744e-05, |
| "loss": 0.2673, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.2336182336182335, |
| "grad_norm": 0.20064270708774173, |
| "learning_rate": 1.4255543822597678e-05, |
| "loss": 0.2635, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.2364672364672367, |
| "grad_norm": 0.20230154595496835, |
| "learning_rate": 1.4202745512143611e-05, |
| "loss": 0.2929, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.2393162393162394, |
| "grad_norm": 0.201820653024005, |
| "learning_rate": 1.4149947201689548e-05, |
| "loss": 0.2834, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.242165242165242, |
| "grad_norm": 0.19121793530935455, |
| "learning_rate": 1.409714889123548e-05, |
| "loss": 0.2625, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.245014245014245, |
| "grad_norm": 0.19330950900844268, |
| "learning_rate": 1.4044350580781415e-05, |
| "loss": 0.2772, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.247863247863248, |
| "grad_norm": 0.19306556715969658, |
| "learning_rate": 1.3991552270327348e-05, |
| "loss": 0.2627, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.2507122507122506, |
| "grad_norm": 0.19901233397623183, |
| "learning_rate": 1.3938753959873285e-05, |
| "loss": 0.2696, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.2535612535612537, |
| "grad_norm": 0.19925310664104037, |
| "learning_rate": 1.388595564941922e-05, |
| "loss": 0.2584, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.2564102564102564, |
| "grad_norm": 0.2034999029639149, |
| "learning_rate": 1.3833157338965153e-05, |
| "loss": 0.2966, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.259259259259259, |
| "grad_norm": 0.19015133424936778, |
| "learning_rate": 1.3780359028511089e-05, |
| "loss": 0.2865, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.262108262108262, |
| "grad_norm": 0.22328712353887642, |
| "learning_rate": 1.3727560718057022e-05, |
| "loss": 0.2772, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.264957264957265, |
| "grad_norm": 0.19798853546327766, |
| "learning_rate": 1.3674762407602957e-05, |
| "loss": 0.2632, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.267806267806268, |
| "grad_norm": 0.18631225104292679, |
| "learning_rate": 1.3621964097148893e-05, |
| "loss": 0.2775, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.2706552706552707, |
| "grad_norm": 0.1909878525568524, |
| "learning_rate": 1.3569165786694826e-05, |
| "loss": 0.2848, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.2735042735042734, |
| "grad_norm": 0.2068674181031461, |
| "learning_rate": 1.351636747624076e-05, |
| "loss": 0.2754, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.2763532763532766, |
| "grad_norm": 0.20247748116558922, |
| "learning_rate": 1.3463569165786697e-05, |
| "loss": 0.2681, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.2792022792022792, |
| "grad_norm": 0.19577250906503782, |
| "learning_rate": 1.341077085533263e-05, |
| "loss": 0.2698, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.282051282051282, |
| "grad_norm": 0.192461914460849, |
| "learning_rate": 1.3357972544878563e-05, |
| "loss": 0.2624, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.284900284900285, |
| "grad_norm": 0.21973379890078096, |
| "learning_rate": 1.33051742344245e-05, |
| "loss": 0.2754, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.2877492877492878, |
| "grad_norm": 0.19582967491065492, |
| "learning_rate": 1.3252375923970434e-05, |
| "loss": 0.277, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.2905982905982905, |
| "grad_norm": 0.19580304220995717, |
| "learning_rate": 1.3199577613516367e-05, |
| "loss": 0.2716, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.2934472934472936, |
| "grad_norm": 0.2588055907574961, |
| "learning_rate": 1.3146779303062304e-05, |
| "loss": 0.2772, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.2962962962962963, |
| "grad_norm": 0.1985796106477907, |
| "learning_rate": 1.3093980992608237e-05, |
| "loss": 0.2725, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.299145299145299, |
| "grad_norm": 0.21599405795203994, |
| "learning_rate": 1.3041182682154171e-05, |
| "loss": 0.2924, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.301994301994302, |
| "grad_norm": 0.20158022481963558, |
| "learning_rate": 1.2988384371700108e-05, |
| "loss": 0.2885, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.304843304843305, |
| "grad_norm": 0.18745667148509224, |
| "learning_rate": 1.293558606124604e-05, |
| "loss": 0.2943, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.2065772682516858, |
| "learning_rate": 1.2882787750791975e-05, |
| "loss": 0.2778, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.3105413105413106, |
| "grad_norm": 0.2170049193233547, |
| "learning_rate": 1.2829989440337912e-05, |
| "loss": 0.2842, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.3133903133903133, |
| "grad_norm": 0.20199308409143069, |
| "learning_rate": 1.2777191129883845e-05, |
| "loss": 0.2717, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.316239316239316, |
| "grad_norm": 0.20699440291292984, |
| "learning_rate": 1.2724392819429778e-05, |
| "loss": 0.2925, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.319088319088319, |
| "grad_norm": 0.19508346223108827, |
| "learning_rate": 1.2671594508975712e-05, |
| "loss": 0.277, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.321937321937322, |
| "grad_norm": 0.19079264766629775, |
| "learning_rate": 1.2618796198521649e-05, |
| "loss": 0.2778, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.324786324786325, |
| "grad_norm": 0.18964264325936672, |
| "learning_rate": 1.2565997888067582e-05, |
| "loss": 0.2782, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.3276353276353277, |
| "grad_norm": 0.20280215258566556, |
| "learning_rate": 1.2513199577613517e-05, |
| "loss": 0.2893, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.3304843304843303, |
| "grad_norm": 0.2156167038301731, |
| "learning_rate": 1.2460401267159451e-05, |
| "loss": 0.2854, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.1964934145520702, |
| "learning_rate": 1.2407602956705386e-05, |
| "loss": 0.2789, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.336182336182336, |
| "grad_norm": 0.20506845724808467, |
| "learning_rate": 1.235480464625132e-05, |
| "loss": 0.2837, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.339031339031339, |
| "grad_norm": 0.20608756180445, |
| "learning_rate": 1.2302006335797255e-05, |
| "loss": 0.2867, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.341880341880342, |
| "grad_norm": 0.22728463316725367, |
| "learning_rate": 1.224920802534319e-05, |
| "loss": 0.2879, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.3447293447293447, |
| "grad_norm": 0.21134582426808612, |
| "learning_rate": 1.2196409714889125e-05, |
| "loss": 0.2706, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.347578347578348, |
| "grad_norm": 0.2117202491888567, |
| "learning_rate": 1.2143611404435058e-05, |
| "loss": 0.2901, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.3504273504273505, |
| "grad_norm": 0.20417464138588973, |
| "learning_rate": 1.2090813093980994e-05, |
| "loss": 0.2838, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.353276353276353, |
| "grad_norm": 0.1996078258358169, |
| "learning_rate": 1.2038014783526929e-05, |
| "loss": 0.2669, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.3561253561253563, |
| "grad_norm": 0.22247317080848442, |
| "learning_rate": 1.1985216473072862e-05, |
| "loss": 0.2855, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.358974358974359, |
| "grad_norm": 0.20677945710130288, |
| "learning_rate": 1.1932418162618796e-05, |
| "loss": 0.2617, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.3618233618233617, |
| "grad_norm": 0.22160179441363054, |
| "learning_rate": 1.1879619852164731e-05, |
| "loss": 0.2756, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.364672364672365, |
| "grad_norm": 0.22587987413436983, |
| "learning_rate": 1.1826821541710666e-05, |
| "loss": 0.2817, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.3675213675213675, |
| "grad_norm": 0.20154794971028572, |
| "learning_rate": 1.17740232312566e-05, |
| "loss": 0.2651, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.3703703703703702, |
| "grad_norm": 0.20403481294874842, |
| "learning_rate": 1.1721224920802534e-05, |
| "loss": 0.2775, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.3732193732193734, |
| "grad_norm": 0.22073919271992984, |
| "learning_rate": 1.166842661034847e-05, |
| "loss": 0.2778, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.376068376068376, |
| "grad_norm": 0.22085826451951224, |
| "learning_rate": 1.1615628299894405e-05, |
| "loss": 0.2716, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.3789173789173788, |
| "grad_norm": 0.20286442068216853, |
| "learning_rate": 1.1562829989440338e-05, |
| "loss": 0.2725, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.381766381766382, |
| "grad_norm": 0.20066512198655337, |
| "learning_rate": 1.1510031678986272e-05, |
| "loss": 0.2791, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 0.2138581117365153, |
| "learning_rate": 1.1457233368532209e-05, |
| "loss": 0.2748, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.3874643874643873, |
| "grad_norm": 0.21527795194888655, |
| "learning_rate": 1.1404435058078142e-05, |
| "loss": 0.2742, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.3903133903133904, |
| "grad_norm": 0.20801036949744942, |
| "learning_rate": 1.1351636747624076e-05, |
| "loss": 0.2797, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.393162393162393, |
| "grad_norm": 0.20792622413478573, |
| "learning_rate": 1.1298838437170011e-05, |
| "loss": 0.282, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.396011396011396, |
| "grad_norm": 0.22033246310576216, |
| "learning_rate": 1.1246040126715946e-05, |
| "loss": 0.279, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.398860398860399, |
| "grad_norm": 0.1967422885160002, |
| "learning_rate": 1.119324181626188e-05, |
| "loss": 0.2811, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.4017094017094016, |
| "grad_norm": 0.1814858766804614, |
| "learning_rate": 1.1140443505807813e-05, |
| "loss": 0.2608, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.4045584045584047, |
| "grad_norm": 0.19499452238873044, |
| "learning_rate": 1.108764519535375e-05, |
| "loss": 0.2678, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.4074074074074074, |
| "grad_norm": 0.2102124248647648, |
| "learning_rate": 1.1034846884899685e-05, |
| "loss": 0.2818, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.41025641025641, |
| "grad_norm": 0.19814574523643397, |
| "learning_rate": 1.0982048574445618e-05, |
| "loss": 0.2653, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.4131054131054133, |
| "grad_norm": 0.18054162144741087, |
| "learning_rate": 1.0929250263991552e-05, |
| "loss": 0.2695, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.415954415954416, |
| "grad_norm": 0.1808318357229325, |
| "learning_rate": 1.0876451953537489e-05, |
| "loss": 0.2801, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.4188034188034186, |
| "grad_norm": 0.19987403185338987, |
| "learning_rate": 1.0823653643083422e-05, |
| "loss": 0.2721, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.421652421652422, |
| "grad_norm": 0.2248142211042877, |
| "learning_rate": 1.0770855332629356e-05, |
| "loss": 0.2843, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.4245014245014245, |
| "grad_norm": 0.1964174663784782, |
| "learning_rate": 1.0718057022175291e-05, |
| "loss": 0.2834, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.427350427350427, |
| "grad_norm": 0.18315937428538173, |
| "learning_rate": 1.0665258711721226e-05, |
| "loss": 0.267, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.4301994301994303, |
| "grad_norm": 0.1926077433772735, |
| "learning_rate": 1.061246040126716e-05, |
| "loss": 0.2748, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.433048433048433, |
| "grad_norm": 0.20408583222643678, |
| "learning_rate": 1.0559662090813093e-05, |
| "loss": 0.2776, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.435897435897436, |
| "grad_norm": 0.1973007081934127, |
| "learning_rate": 1.0506863780359028e-05, |
| "loss": 0.2748, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.438746438746439, |
| "grad_norm": 0.18467705527546302, |
| "learning_rate": 1.0454065469904964e-05, |
| "loss": 0.2767, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.4415954415954415, |
| "grad_norm": 0.18392804629969942, |
| "learning_rate": 1.0401267159450897e-05, |
| "loss": 0.2803, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.19726145099036824, |
| "learning_rate": 1.0348468848996832e-05, |
| "loss": 0.2763, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.4472934472934473, |
| "grad_norm": 0.19595567470607117, |
| "learning_rate": 1.0295670538542767e-05, |
| "loss": 0.288, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.45014245014245, |
| "grad_norm": 0.19049627669313532, |
| "learning_rate": 1.0242872228088702e-05, |
| "loss": 0.2804, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.452991452991453, |
| "grad_norm": 0.19465038042198368, |
| "learning_rate": 1.0190073917634636e-05, |
| "loss": 0.2853, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.455840455840456, |
| "grad_norm": 0.18907018318647903, |
| "learning_rate": 1.0137275607180571e-05, |
| "loss": 0.262, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.4586894586894585, |
| "grad_norm": 0.2117775134978185, |
| "learning_rate": 1.0084477296726506e-05, |
| "loss": 0.2832, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 0.18369057037488082, |
| "learning_rate": 1.003167898627244e-05, |
| "loss": 0.2818, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.4643874643874644, |
| "grad_norm": 0.1938139353246623, |
| "learning_rate": 9.978880675818373e-06, |
| "loss": 0.2687, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.467236467236467, |
| "grad_norm": 0.21926317040236148, |
| "learning_rate": 9.926082365364308e-06, |
| "loss": 0.2762, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.47008547008547, |
| "grad_norm": 0.20036724393686023, |
| "learning_rate": 9.873284054910244e-06, |
| "loss": 0.2788, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.472934472934473, |
| "grad_norm": 0.195455632630333, |
| "learning_rate": 9.820485744456177e-06, |
| "loss": 0.2712, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.4757834757834756, |
| "grad_norm": 0.19688632965075772, |
| "learning_rate": 9.767687434002112e-06, |
| "loss": 0.2928, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.4786324786324787, |
| "grad_norm": 0.196362275701545, |
| "learning_rate": 9.714889123548047e-06, |
| "loss": 0.2856, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.4814814814814814, |
| "grad_norm": 0.19446432785612933, |
| "learning_rate": 9.662090813093982e-06, |
| "loss": 0.279, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.484330484330484, |
| "grad_norm": 0.1879688895780496, |
| "learning_rate": 9.609292502639916e-06, |
| "loss": 0.2854, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.4871794871794872, |
| "grad_norm": 0.19437610153561674, |
| "learning_rate": 9.556494192185851e-06, |
| "loss": 0.2738, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.49002849002849, |
| "grad_norm": 0.20143365503846905, |
| "learning_rate": 9.503695881731786e-06, |
| "loss": 0.2759, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.492877492877493, |
| "grad_norm": 0.1935287651459583, |
| "learning_rate": 9.45089757127772e-06, |
| "loss": 0.2835, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.4957264957264957, |
| "grad_norm": 0.19317522985073163, |
| "learning_rate": 9.398099260823653e-06, |
| "loss": 0.2858, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.4985754985754984, |
| "grad_norm": 0.2012616552901727, |
| "learning_rate": 9.345300950369588e-06, |
| "loss": 0.2686, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.5014245014245016, |
| "grad_norm": 0.18496934505004578, |
| "learning_rate": 9.292502639915523e-06, |
| "loss": 0.2649, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.5042735042735043, |
| "grad_norm": 0.1952709023463477, |
| "learning_rate": 9.239704329461457e-06, |
| "loss": 0.2765, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.5071225071225074, |
| "grad_norm": 0.1879189333509987, |
| "learning_rate": 9.186906019007392e-06, |
| "loss": 0.2832, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.50997150997151, |
| "grad_norm": 0.18950447218698951, |
| "learning_rate": 9.134107708553327e-06, |
| "loss": 0.2843, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.5128205128205128, |
| "grad_norm": 0.18653191496871754, |
| "learning_rate": 9.081309398099261e-06, |
| "loss": 0.277, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.515669515669516, |
| "grad_norm": 0.17645739186127074, |
| "learning_rate": 9.028511087645196e-06, |
| "loss": 0.2686, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.5185185185185186, |
| "grad_norm": 0.18764217708119302, |
| "learning_rate": 8.97571277719113e-06, |
| "loss": 0.284, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.5213675213675213, |
| "grad_norm": 0.19467937140297095, |
| "learning_rate": 8.922914466737064e-06, |
| "loss": 0.2717, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.5242165242165244, |
| "grad_norm": 0.19792747219286208, |
| "learning_rate": 8.870116156283e-06, |
| "loss": 0.2855, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.527065527065527, |
| "grad_norm": 0.18656641235510737, |
| "learning_rate": 8.817317845828933e-06, |
| "loss": 0.274, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.52991452991453, |
| "grad_norm": 0.19826113067880127, |
| "learning_rate": 8.764519535374868e-06, |
| "loss": 0.2973, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.532763532763533, |
| "grad_norm": 0.19491004642921803, |
| "learning_rate": 8.711721224920803e-06, |
| "loss": 0.2755, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.5356125356125356, |
| "grad_norm": 0.2012953347472746, |
| "learning_rate": 8.658922914466737e-06, |
| "loss": 0.2836, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 0.19357544104347177, |
| "learning_rate": 8.606124604012672e-06, |
| "loss": 0.2671, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.5413105413105415, |
| "grad_norm": 0.18892143464854252, |
| "learning_rate": 8.553326293558607e-06, |
| "loss": 0.2733, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.544159544159544, |
| "grad_norm": 0.6265508514551001, |
| "learning_rate": 8.500527983104541e-06, |
| "loss": 0.3024, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.547008547008547, |
| "grad_norm": 0.180843319457107, |
| "learning_rate": 8.447729672650476e-06, |
| "loss": 0.2758, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.54985754985755, |
| "grad_norm": 0.18824519820336835, |
| "learning_rate": 8.39493136219641e-06, |
| "loss": 0.2777, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.5527065527065527, |
| "grad_norm": 0.18679719864151892, |
| "learning_rate": 8.342133051742344e-06, |
| "loss": 0.273, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 0.18954916069648725, |
| "learning_rate": 8.28933474128828e-06, |
| "loss": 0.271, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.5584045584045585, |
| "grad_norm": 0.18840475356548164, |
| "learning_rate": 8.236536430834215e-06, |
| "loss": 0.2634, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.561253561253561, |
| "grad_norm": 0.19100598305938737, |
| "learning_rate": 8.183738120380148e-06, |
| "loss": 0.281, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 0.1759624339368046, |
| "learning_rate": 8.130939809926083e-06, |
| "loss": 0.2651, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.566951566951567, |
| "grad_norm": 0.19734582647437332, |
| "learning_rate": 8.078141499472017e-06, |
| "loss": 0.2638, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.5698005698005697, |
| "grad_norm": 0.19411668207145158, |
| "learning_rate": 8.025343189017952e-06, |
| "loss": 0.2736, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.5726495726495724, |
| "grad_norm": 0.19879584464405567, |
| "learning_rate": 7.972544878563887e-06, |
| "loss": 0.2769, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.5754985754985755, |
| "grad_norm": 0.1927813426522461, |
| "learning_rate": 7.91974656810982e-06, |
| "loss": 0.2764, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.578347578347578, |
| "grad_norm": 0.19856911323488508, |
| "learning_rate": 7.866948257655756e-06, |
| "loss": 0.292, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.5811965811965814, |
| "grad_norm": 0.17679245135236604, |
| "learning_rate": 7.81414994720169e-06, |
| "loss": 0.2785, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.584045584045584, |
| "grad_norm": 0.19567448364102336, |
| "learning_rate": 7.761351636747624e-06, |
| "loss": 0.2867, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.5868945868945867, |
| "grad_norm": 0.20115854063114447, |
| "learning_rate": 7.708553326293558e-06, |
| "loss": 0.2713, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.58974358974359, |
| "grad_norm": 0.18545206529638672, |
| "learning_rate": 7.655755015839495e-06, |
| "loss": 0.2634, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.5925925925925926, |
| "grad_norm": 0.19495758015732068, |
| "learning_rate": 7.602956705385428e-06, |
| "loss": 0.2723, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.5954415954415957, |
| "grad_norm": 0.18613675067205288, |
| "learning_rate": 7.5501583949313625e-06, |
| "loss": 0.29, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.5982905982905984, |
| "grad_norm": 0.19754545334787374, |
| "learning_rate": 7.497360084477296e-06, |
| "loss": 0.2716, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.601139601139601, |
| "grad_norm": 0.1949333474464277, |
| "learning_rate": 7.444561774023232e-06, |
| "loss": 0.2786, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.603988603988604, |
| "grad_norm": 0.1866184206464396, |
| "learning_rate": 7.3917634635691666e-06, |
| "loss": 0.2845, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.606837606837607, |
| "grad_norm": 0.195676886907894, |
| "learning_rate": 7.3389651531151e-06, |
| "loss": 0.2769, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.6096866096866096, |
| "grad_norm": 0.18429715351385098, |
| "learning_rate": 7.286166842661035e-06, |
| "loss": 0.2756, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.6125356125356127, |
| "grad_norm": 0.18772973110071034, |
| "learning_rate": 7.233368532206971e-06, |
| "loss": 0.2559, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 0.1836610271469343, |
| "learning_rate": 7.180570221752904e-06, |
| "loss": 0.2681, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.618233618233618, |
| "grad_norm": 0.1754155052969751, |
| "learning_rate": 7.127771911298839e-06, |
| "loss": 0.272, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.6210826210826212, |
| "grad_norm": 0.19652318272004426, |
| "learning_rate": 7.074973600844774e-06, |
| "loss": 0.2686, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.623931623931624, |
| "grad_norm": 0.17933290326611778, |
| "learning_rate": 7.022175290390708e-06, |
| "loss": 0.2759, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.6267806267806266, |
| "grad_norm": 0.18155759050319922, |
| "learning_rate": 6.9693769799366424e-06, |
| "loss": 0.2701, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.6296296296296298, |
| "grad_norm": 0.18892897612236667, |
| "learning_rate": 6.916578669482576e-06, |
| "loss": 0.2769, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.6324786324786325, |
| "grad_norm": 0.18477414784333626, |
| "learning_rate": 6.863780359028511e-06, |
| "loss": 0.2696, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.635327635327635, |
| "grad_norm": 0.1797448137751472, |
| "learning_rate": 6.8109820485744465e-06, |
| "loss": 0.2721, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.6381766381766383, |
| "grad_norm": 0.18868102200145584, |
| "learning_rate": 6.75818373812038e-06, |
| "loss": 0.2654, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.641025641025641, |
| "grad_norm": 0.18760249991357172, |
| "learning_rate": 6.705385427666315e-06, |
| "loss": 0.2883, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.6438746438746437, |
| "grad_norm": 0.1792089439057089, |
| "learning_rate": 6.65258711721225e-06, |
| "loss": 0.2631, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.646723646723647, |
| "grad_norm": 0.18382428762667938, |
| "learning_rate": 6.599788806758184e-06, |
| "loss": 0.2832, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.6495726495726495, |
| "grad_norm": 0.18732937470339528, |
| "learning_rate": 6.546990496304118e-06, |
| "loss": 0.2885, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.652421652421652, |
| "grad_norm": 0.18238422692769168, |
| "learning_rate": 6.494192185850054e-06, |
| "loss": 0.2714, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.6552706552706553, |
| "grad_norm": 0.6796049281047194, |
| "learning_rate": 6.441393875395988e-06, |
| "loss": 0.2736, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.658119658119658, |
| "grad_norm": 0.1843964132149436, |
| "learning_rate": 6.388595564941922e-06, |
| "loss": 0.2812, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.6609686609686607, |
| "grad_norm": 0.1883869870506764, |
| "learning_rate": 6.335797254487856e-06, |
| "loss": 0.2876, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.663817663817664, |
| "grad_norm": 0.19372865429873587, |
| "learning_rate": 6.282998944033791e-06, |
| "loss": 0.2721, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.17475184508734368, |
| "learning_rate": 6.230200633579726e-06, |
| "loss": 0.2678, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.6695156695156697, |
| "grad_norm": 0.19140736568353386, |
| "learning_rate": 6.17740232312566e-06, |
| "loss": 0.2694, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.6723646723646723, |
| "grad_norm": 0.19095212031796968, |
| "learning_rate": 6.124604012671595e-06, |
| "loss": 0.2714, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.6752136752136755, |
| "grad_norm": 0.18688967708133333, |
| "learning_rate": 6.071805702217529e-06, |
| "loss": 0.2777, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.678062678062678, |
| "grad_norm": 0.1808730509793612, |
| "learning_rate": 6.019007391763464e-06, |
| "loss": 0.2867, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.680911680911681, |
| "grad_norm": 0.1914570882813849, |
| "learning_rate": 5.966209081309398e-06, |
| "loss": 0.2752, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.683760683760684, |
| "grad_norm": 0.18796118324257888, |
| "learning_rate": 5.913410770855333e-06, |
| "loss": 0.282, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.6866096866096867, |
| "grad_norm": 0.19242846471973257, |
| "learning_rate": 5.860612460401267e-06, |
| "loss": 0.2815, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.6894586894586894, |
| "grad_norm": 0.19514299446898078, |
| "learning_rate": 5.807814149947202e-06, |
| "loss": 0.2655, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 0.1879893908540662, |
| "learning_rate": 5.755015839493136e-06, |
| "loss": 0.2857, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.695156695156695, |
| "grad_norm": 0.19017667637491129, |
| "learning_rate": 5.702217529039071e-06, |
| "loss": 0.2807, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.698005698005698, |
| "grad_norm": 0.18886407972543068, |
| "learning_rate": 5.6494192185850055e-06, |
| "loss": 0.2822, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.700854700854701, |
| "grad_norm": 0.2030341917370533, |
| "learning_rate": 5.59662090813094e-06, |
| "loss": 0.2979, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.7037037037037037, |
| "grad_norm": 0.18801018868503175, |
| "learning_rate": 5.543822597676875e-06, |
| "loss": 0.2754, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.7065527065527064, |
| "grad_norm": 0.18722591061825838, |
| "learning_rate": 5.491024287222809e-06, |
| "loss": 0.291, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7094017094017095, |
| "grad_norm": 0.18419313321990788, |
| "learning_rate": 5.438225976768744e-06, |
| "loss": 0.2786, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.7122507122507122, |
| "grad_norm": 0.1795053639312283, |
| "learning_rate": 5.385427666314678e-06, |
| "loss": 0.2616, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.715099715099715, |
| "grad_norm": 0.19092668654205353, |
| "learning_rate": 5.332629355860613e-06, |
| "loss": 0.266, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.717948717948718, |
| "grad_norm": 0.18515878208885198, |
| "learning_rate": 5.279831045406547e-06, |
| "loss": 0.2683, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.7207977207977208, |
| "grad_norm": 0.18512141190512552, |
| "learning_rate": 5.227032734952482e-06, |
| "loss": 0.2613, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.7236467236467234, |
| "grad_norm": 0.17864087041783647, |
| "learning_rate": 5.174234424498416e-06, |
| "loss": 0.2728, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.7264957264957266, |
| "grad_norm": 0.18397723270469116, |
| "learning_rate": 5.121436114044351e-06, |
| "loss": 0.2878, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.7293447293447293, |
| "grad_norm": 0.19206914593194782, |
| "learning_rate": 5.0686378035902855e-06, |
| "loss": 0.2898, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.732193732193732, |
| "grad_norm": 0.18145822655495156, |
| "learning_rate": 5.01583949313622e-06, |
| "loss": 0.2811, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.735042735042735, |
| "grad_norm": 0.18983105326320673, |
| "learning_rate": 4.963041182682154e-06, |
| "loss": 0.2961, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.737891737891738, |
| "grad_norm": 0.18514943684352905, |
| "learning_rate": 4.910242872228089e-06, |
| "loss": 0.2687, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.7407407407407405, |
| "grad_norm": 0.19048747070950345, |
| "learning_rate": 4.857444561774023e-06, |
| "loss": 0.2742, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.7435897435897436, |
| "grad_norm": 0.18667552629580247, |
| "learning_rate": 4.804646251319958e-06, |
| "loss": 0.2778, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.7464387464387463, |
| "grad_norm": 0.34574568987357907, |
| "learning_rate": 4.751847940865893e-06, |
| "loss": 0.2956, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.7492877492877494, |
| "grad_norm": 0.18370077080612704, |
| "learning_rate": 4.699049630411827e-06, |
| "loss": 0.2873, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.752136752136752, |
| "grad_norm": 0.18353727774729006, |
| "learning_rate": 4.646251319957761e-06, |
| "loss": 0.2729, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.754985754985755, |
| "grad_norm": 0.19726651604825912, |
| "learning_rate": 4.593453009503696e-06, |
| "loss": 0.2774, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.757834757834758, |
| "grad_norm": 0.18831766661505295, |
| "learning_rate": 4.540654699049631e-06, |
| "loss": 0.2918, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.7606837606837606, |
| "grad_norm": 0.1822011598025739, |
| "learning_rate": 4.487856388595565e-06, |
| "loss": 0.2686, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.763532763532764, |
| "grad_norm": 0.1845843568198463, |
| "learning_rate": 4.4350580781415e-06, |
| "loss": 0.2835, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.7663817663817665, |
| "grad_norm": 0.1801611398952323, |
| "learning_rate": 4.382259767687434e-06, |
| "loss": 0.2738, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 0.1798470399085861, |
| "learning_rate": 4.329461457233369e-06, |
| "loss": 0.2668, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.7720797720797723, |
| "grad_norm": 0.18404634330655054, |
| "learning_rate": 4.276663146779303e-06, |
| "loss": 0.2685, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.774928774928775, |
| "grad_norm": 0.19542597110088794, |
| "learning_rate": 4.223864836325238e-06, |
| "loss": 0.2854, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.17947114319794663, |
| "learning_rate": 4.171066525871172e-06, |
| "loss": 0.2757, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.780626780626781, |
| "grad_norm": 0.17988682607436737, |
| "learning_rate": 4.1182682154171074e-06, |
| "loss": 0.2623, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.7834757834757835, |
| "grad_norm": 0.18466891151924947, |
| "learning_rate": 4.065469904963041e-06, |
| "loss": 0.2673, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.786324786324786, |
| "grad_norm": 0.1803744986096463, |
| "learning_rate": 4.012671594508976e-06, |
| "loss": 0.2769, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.7891737891737893, |
| "grad_norm": 0.18375712425725002, |
| "learning_rate": 3.95987328405491e-06, |
| "loss": 0.2736, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.792022792022792, |
| "grad_norm": 0.1823276203279524, |
| "learning_rate": 3.907074973600845e-06, |
| "loss": 0.2689, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.7948717948717947, |
| "grad_norm": 0.1896182512304451, |
| "learning_rate": 3.854276663146779e-06, |
| "loss": 0.29, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.797720797720798, |
| "grad_norm": 0.20688431780425465, |
| "learning_rate": 3.801478352692714e-06, |
| "loss": 0.2788, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.8005698005698005, |
| "grad_norm": 0.1758754882132954, |
| "learning_rate": 3.748680042238648e-06, |
| "loss": 0.264, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.8034188034188032, |
| "grad_norm": 0.17784331145573304, |
| "learning_rate": 3.6958817317845833e-06, |
| "loss": 0.266, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.8062678062678064, |
| "grad_norm": 0.2198776065570171, |
| "learning_rate": 3.6430834213305176e-06, |
| "loss": 0.2898, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.809116809116809, |
| "grad_norm": 0.21195194560926758, |
| "learning_rate": 3.590285110876452e-06, |
| "loss": 0.2743, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.8119658119658117, |
| "grad_norm": 0.17653709190418312, |
| "learning_rate": 3.537486800422387e-06, |
| "loss": 0.2684, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.814814814814815, |
| "grad_norm": 0.17452382412070946, |
| "learning_rate": 3.4846884899683212e-06, |
| "loss": 0.2672, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.8176638176638176, |
| "grad_norm": 0.1710375478613801, |
| "learning_rate": 3.4318901795142555e-06, |
| "loss": 0.2704, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.8205128205128203, |
| "grad_norm": 0.17775988353031527, |
| "learning_rate": 3.37909186906019e-06, |
| "loss": 0.277, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.8233618233618234, |
| "grad_norm": 0.17938796003091406, |
| "learning_rate": 3.326293558606125e-06, |
| "loss": 0.2844, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.826210826210826, |
| "grad_norm": 0.1762865102421063, |
| "learning_rate": 3.273495248152059e-06, |
| "loss": 0.2761, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.8290598290598292, |
| "grad_norm": 0.18155573837120068, |
| "learning_rate": 3.220696937697994e-06, |
| "loss": 0.2783, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.831908831908832, |
| "grad_norm": 0.1815146018987946, |
| "learning_rate": 3.167898627243928e-06, |
| "loss": 0.264, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.8347578347578346, |
| "grad_norm": 0.17387563307830614, |
| "learning_rate": 3.115100316789863e-06, |
| "loss": 0.2672, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.8376068376068377, |
| "grad_norm": 0.17754589749967897, |
| "learning_rate": 3.0623020063357975e-06, |
| "loss": 0.2604, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.8404558404558404, |
| "grad_norm": 0.1788982942306512, |
| "learning_rate": 3.009503695881732e-06, |
| "loss": 0.2779, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.8433048433048436, |
| "grad_norm": 0.17594751406364015, |
| "learning_rate": 2.9567053854276665e-06, |
| "loss": 0.279, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 0.17837671634444546, |
| "learning_rate": 2.903907074973601e-06, |
| "loss": 0.2701, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.849002849002849, |
| "grad_norm": 0.17682311169747053, |
| "learning_rate": 2.8511087645195354e-06, |
| "loss": 0.2823, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.851851851851852, |
| "grad_norm": 0.18269585079770145, |
| "learning_rate": 2.79831045406547e-06, |
| "loss": 0.2813, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.8547008547008548, |
| "grad_norm": 0.17761877789216213, |
| "learning_rate": 2.7455121436114044e-06, |
| "loss": 0.2776, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.8575498575498575, |
| "grad_norm": 0.1827216995625995, |
| "learning_rate": 2.692713833157339e-06, |
| "loss": 0.2763, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.8603988603988606, |
| "grad_norm": 0.17763354260264558, |
| "learning_rate": 2.6399155227032734e-06, |
| "loss": 0.2717, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.8632478632478633, |
| "grad_norm": 0.17915576491895774, |
| "learning_rate": 2.587117212249208e-06, |
| "loss": 0.2806, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.866096866096866, |
| "grad_norm": 0.18114152761979074, |
| "learning_rate": 2.5343189017951427e-06, |
| "loss": 0.2666, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.868945868945869, |
| "grad_norm": 0.18225553283170726, |
| "learning_rate": 2.481520591341077e-06, |
| "loss": 0.2624, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.871794871794872, |
| "grad_norm": 0.1728604793697752, |
| "learning_rate": 2.4287222808870117e-06, |
| "loss": 0.2737, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.8746438746438745, |
| "grad_norm": 0.18389395414319457, |
| "learning_rate": 2.3759239704329464e-06, |
| "loss": 0.2775, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.8774928774928776, |
| "grad_norm": 0.1796113765481641, |
| "learning_rate": 2.3231256599788807e-06, |
| "loss": 0.2817, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.8803418803418803, |
| "grad_norm": 0.18478152892147015, |
| "learning_rate": 2.2703273495248154e-06, |
| "loss": 0.2618, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.883190883190883, |
| "grad_norm": 0.17453347268398672, |
| "learning_rate": 2.21752903907075e-06, |
| "loss": 0.2749, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.886039886039886, |
| "grad_norm": 0.1839452873289296, |
| "learning_rate": 2.1647307286166843e-06, |
| "loss": 0.2713, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.2010428027513303, |
| "learning_rate": 2.111932418162619e-06, |
| "loss": 0.2704, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.8917378917378915, |
| "grad_norm": 0.18767478898568274, |
| "learning_rate": 2.0591341077085537e-06, |
| "loss": 0.2806, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.8945868945868947, |
| "grad_norm": 0.1752592201926614, |
| "learning_rate": 2.006335797254488e-06, |
| "loss": 0.2781, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.8974358974358974, |
| "grad_norm": 0.18503198686977332, |
| "learning_rate": 1.9535374868004227e-06, |
| "loss": 0.2829, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.9002849002849, |
| "grad_norm": 0.17491938775507743, |
| "learning_rate": 1.900739176346357e-06, |
| "loss": 0.2735, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.903133903133903, |
| "grad_norm": 0.18604953590453358, |
| "learning_rate": 1.8479408658922916e-06, |
| "loss": 0.2822, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.905982905982906, |
| "grad_norm": 0.17680948230251936, |
| "learning_rate": 1.795142555438226e-06, |
| "loss": 0.2654, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.9088319088319086, |
| "grad_norm": 0.17779057298202855, |
| "learning_rate": 1.7423442449841606e-06, |
| "loss": 0.2809, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.9116809116809117, |
| "grad_norm": 0.17367317767184776, |
| "learning_rate": 1.689545934530095e-06, |
| "loss": 0.2553, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.9145299145299144, |
| "grad_norm": 0.1849086381558193, |
| "learning_rate": 1.6367476240760296e-06, |
| "loss": 0.2704, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.9173789173789175, |
| "grad_norm": 0.2504975572538142, |
| "learning_rate": 1.583949313621964e-06, |
| "loss": 0.2855, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.92022792022792, |
| "grad_norm": 0.17838279902935134, |
| "learning_rate": 1.5311510031678987e-06, |
| "loss": 0.2633, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 0.187133440325405, |
| "learning_rate": 1.4783526927138332e-06, |
| "loss": 0.2681, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.925925925925926, |
| "grad_norm": 0.17495304668665154, |
| "learning_rate": 1.4255543822597677e-06, |
| "loss": 0.2747, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.9287749287749287, |
| "grad_norm": 0.1768663958665078, |
| "learning_rate": 1.3727560718057022e-06, |
| "loss": 0.2646, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.931623931623932, |
| "grad_norm": 0.17508470828848635, |
| "learning_rate": 1.3199577613516367e-06, |
| "loss": 0.2664, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.9344729344729346, |
| "grad_norm": 0.17563477250519552, |
| "learning_rate": 1.2671594508975714e-06, |
| "loss": 0.276, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.9373219373219372, |
| "grad_norm": 0.18333810882390714, |
| "learning_rate": 1.2143611404435059e-06, |
| "loss": 0.2724, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.9401709401709404, |
| "grad_norm": 0.18258423976716023, |
| "learning_rate": 1.1615628299894403e-06, |
| "loss": 0.2688, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.943019943019943, |
| "grad_norm": 0.1740940942606421, |
| "learning_rate": 1.108764519535375e-06, |
| "loss": 0.2628, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.9458689458689458, |
| "grad_norm": 0.18677299556807853, |
| "learning_rate": 1.0559662090813095e-06, |
| "loss": 0.2727, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.948717948717949, |
| "grad_norm": 0.17102730623765852, |
| "learning_rate": 1.003167898627244e-06, |
| "loss": 0.2601, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.9515669515669516, |
| "grad_norm": 0.17386263200587151, |
| "learning_rate": 9.503695881731785e-07, |
| "loss": 0.2657, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.9544159544159543, |
| "grad_norm": 0.1693040202829608, |
| "learning_rate": 8.97571277719113e-07, |
| "loss": 0.2639, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.9572649572649574, |
| "grad_norm": 0.1823134515198666, |
| "learning_rate": 8.447729672650475e-07, |
| "loss": 0.2809, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.96011396011396, |
| "grad_norm": 0.18713660780573363, |
| "learning_rate": 7.91974656810982e-07, |
| "loss": 0.2985, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.962962962962963, |
| "grad_norm": 0.17859164741168496, |
| "learning_rate": 7.391763463569166e-07, |
| "loss": 0.2728, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.965811965811966, |
| "grad_norm": 0.17765435111212308, |
| "learning_rate": 6.863780359028511e-07, |
| "loss": 0.273, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.9686609686609686, |
| "grad_norm": 0.172193961059641, |
| "learning_rate": 6.335797254487857e-07, |
| "loss": 0.2777, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.9715099715099713, |
| "grad_norm": 0.17703694074644172, |
| "learning_rate": 5.807814149947202e-07, |
| "loss": 0.2729, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.9743589743589745, |
| "grad_norm": 0.17094716083563843, |
| "learning_rate": 5.279831045406548e-07, |
| "loss": 0.2565, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.977207977207977, |
| "grad_norm": 0.17381058656887102, |
| "learning_rate": 4.7518479408658924e-07, |
| "loss": 0.2676, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.98005698005698, |
| "grad_norm": 0.17011520350387407, |
| "learning_rate": 4.2238648363252377e-07, |
| "loss": 0.2711, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.982905982905983, |
| "grad_norm": 0.1693608759086481, |
| "learning_rate": 3.695881731784583e-07, |
| "loss": 0.2777, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.9857549857549857, |
| "grad_norm": 0.1691539977152749, |
| "learning_rate": 3.1678986272439284e-07, |
| "loss": 0.258, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.9886039886039883, |
| "grad_norm": 0.1725795825235304, |
| "learning_rate": 2.639915522703274e-07, |
| "loss": 0.2698, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.9914529914529915, |
| "grad_norm": 0.17564106791013198, |
| "learning_rate": 2.1119324181626189e-07, |
| "loss": 0.2727, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.994301994301994, |
| "grad_norm": 0.17354288175962146, |
| "learning_rate": 1.5839493136219642e-07, |
| "loss": 0.2743, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.9971509971509973, |
| "grad_norm": 0.17083245194414953, |
| "learning_rate": 1.0559662090813094e-07, |
| "loss": 0.2757, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.17469201616735772, |
| "learning_rate": 5.279831045406547e-08, |
| "loss": 0.2663, |
| "step": 1053 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1053, |
| "total_flos": 1.1708723554781495e+19, |
| "train_loss": 0.4741822677856384, |
| "train_runtime": 33666.7653, |
| "train_samples_per_second": 0.5, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1053, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1708723554781495e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|