| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 340, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014705882352941176, |
| "grad_norm": 2.547836181388219, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 0.693, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 2.5270813414533366, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 0.6892, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04411764705882353, |
| "grad_norm": 2.355483907211656, |
| "learning_rate": 7.058823529411766e-06, |
| "loss": 0.6851, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 1.8130870424308736, |
| "learning_rate": 9.411764705882354e-06, |
| "loss": 0.6513, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.07352941176470588, |
| "grad_norm": 1.2216520407231852, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.6231, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 1.503718312141216, |
| "learning_rate": 1.4117647058823532e-05, |
| "loss": 0.6169, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.10294117647058823, |
| "grad_norm": 1.7964660884238277, |
| "learning_rate": 1.647058823529412e-05, |
| "loss": 0.5687, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.7269912533351832, |
| "learning_rate": 1.8823529411764708e-05, |
| "loss": 0.5586, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.1323529411764706, |
| "grad_norm": 0.7251113424301481, |
| "learning_rate": 2.1176470588235296e-05, |
| "loss": 0.5208, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 1.1160352667226052, |
| "learning_rate": 2.3529411764705884e-05, |
| "loss": 0.5086, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.16176470588235295, |
| "grad_norm": 0.9015674485810162, |
| "learning_rate": 2.5882352941176475e-05, |
| "loss": 0.4903, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.7153137232697248, |
| "learning_rate": 2.8235294117647063e-05, |
| "loss": 0.4841, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.19117647058823528, |
| "grad_norm": 0.8088993429376143, |
| "learning_rate": 3.0588235294117644e-05, |
| "loss": 0.4711, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 0.5653053624299016, |
| "learning_rate": 3.294117647058824e-05, |
| "loss": 0.4567, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.22058823529411764, |
| "grad_norm": 0.5534586516972508, |
| "learning_rate": 3.529411764705883e-05, |
| "loss": 0.4522, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.5736228520847013, |
| "learning_rate": 3.7647058823529415e-05, |
| "loss": 0.4415, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.4337208100637674, |
| "learning_rate": 4e-05, |
| "loss": 0.4327, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 0.4876347786470063, |
| "learning_rate": 4.235294117647059e-05, |
| "loss": 0.43, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.27941176470588236, |
| "grad_norm": 0.4061097022997626, |
| "learning_rate": 4.470588235294118e-05, |
| "loss": 0.4317, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.4103637458773689, |
| "learning_rate": 4.705882352941177e-05, |
| "loss": 0.43, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3088235294117647, |
| "grad_norm": 0.3888995331551954, |
| "learning_rate": 4.941176470588236e-05, |
| "loss": 0.4199, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 0.3268371484433097, |
| "learning_rate": 5.176470588235295e-05, |
| "loss": 0.4185, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3382352941176471, |
| "grad_norm": 0.39223698739269497, |
| "learning_rate": 5.411764705882354e-05, |
| "loss": 0.4128, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.2779761293219138, |
| "learning_rate": 5.6470588235294126e-05, |
| "loss": 0.4043, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.36764705882352944, |
| "grad_norm": 0.3989443078505564, |
| "learning_rate": 5.8823529411764714e-05, |
| "loss": 0.4089, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 0.38093200095401203, |
| "learning_rate": 6.117647058823529e-05, |
| "loss": 0.4017, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.39705882352941174, |
| "grad_norm": 0.4064320796173135, |
| "learning_rate": 6.352941176470589e-05, |
| "loss": 0.4011, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.4971422289820653, |
| "learning_rate": 6.588235294117648e-05, |
| "loss": 0.3989, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.4264705882352941, |
| "grad_norm": 0.7406572881589351, |
| "learning_rate": 6.823529411764707e-05, |
| "loss": 0.4051, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 0.8206793701783639, |
| "learning_rate": 7.058823529411765e-05, |
| "loss": 0.406, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.45588235294117646, |
| "grad_norm": 0.46999206766579715, |
| "learning_rate": 7.294117647058824e-05, |
| "loss": 0.3959, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.7832181937714577, |
| "learning_rate": 7.529411764705883e-05, |
| "loss": 0.3979, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.4852941176470588, |
| "grad_norm": 0.6729630572440819, |
| "learning_rate": 7.764705882352942e-05, |
| "loss": 0.4037, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.5845843059576046, |
| "learning_rate": 8e-05, |
| "loss": 0.3981, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.5147058823529411, |
| "grad_norm": 0.5179039508771461, |
| "learning_rate": 7.999789193948694e-05, |
| "loss": 0.3899, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.8446247503414208, |
| "learning_rate": 7.999156798014366e-05, |
| "loss": 0.3919, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5441176470588235, |
| "grad_norm": 0.5811147704566235, |
| "learning_rate": 7.998102878853464e-05, |
| "loss": 0.3842, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 0.7042141225204365, |
| "learning_rate": 7.996627547552256e-05, |
| "loss": 0.3887, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5735294117647058, |
| "grad_norm": 0.5162453656112366, |
| "learning_rate": 7.994730959615125e-05, |
| "loss": 0.3801, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.6306069956051037, |
| "learning_rate": 7.992413314948177e-05, |
| "loss": 0.3872, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6029411764705882, |
| "grad_norm": 0.4954620508512063, |
| "learning_rate": 7.989674857838173e-05, |
| "loss": 0.3799, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 0.4655956434769946, |
| "learning_rate": 7.986515876926777e-05, |
| "loss": 0.3795, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6323529411764706, |
| "grad_norm": 0.4180599863638677, |
| "learning_rate": 7.982936705180139e-05, |
| "loss": 0.3797, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.3370827795593815, |
| "learning_rate": 7.978937719853786e-05, |
| "loss": 0.3713, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6617647058823529, |
| "grad_norm": 0.302709681434872, |
| "learning_rate": 7.974519342452872e-05, |
| "loss": 0.37, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 0.35605679848702854, |
| "learning_rate": 7.969682038687744e-05, |
| "loss": 0.3706, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6911764705882353, |
| "grad_norm": 0.3386095408702867, |
| "learning_rate": 7.964426318424855e-05, |
| "loss": 0.3717, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.3740443449230433, |
| "learning_rate": 7.958752735633022e-05, |
| "loss": 0.3678, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.7205882352941176, |
| "grad_norm": 0.291877051555131, |
| "learning_rate": 7.952661888325038e-05, |
| "loss": 0.3667, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 0.22798970572724098, |
| "learning_rate": 7.946154418494639e-05, |
| "loss": 0.3658, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.3017794055631277, |
| "learning_rate": 7.939231012048833e-05, |
| "loss": 0.366, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.2183488887265105, |
| "learning_rate": 7.931892398735608e-05, |
| "loss": 0.364, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7794117647058824, |
| "grad_norm": 0.2651745781496878, |
| "learning_rate": 7.92413935206701e-05, |
| "loss": 0.36, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 0.25030378975718287, |
| "learning_rate": 7.915972689237618e-05, |
| "loss": 0.3629, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.8088235294117647, |
| "grad_norm": 0.2531618636697317, |
| "learning_rate": 7.907393271038403e-05, |
| "loss": 0.3548, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.33260642234735066, |
| "learning_rate": 7.898402001766002e-05, |
| "loss": 0.364, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.8382352941176471, |
| "grad_norm": 0.41115101334491694, |
| "learning_rate": 7.888999829127398e-05, |
| "loss": 0.3578, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 0.44590061545961934, |
| "learning_rate": 7.879187744140039e-05, |
| "loss": 0.3607, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.8676470588235294, |
| "grad_norm": 0.522962751461851, |
| "learning_rate": 7.868966781027367e-05, |
| "loss": 0.3592, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.6601512591505266, |
| "learning_rate": 7.858338017109822e-05, |
| "loss": 0.3657, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8970588235294118, |
| "grad_norm": 0.5825651454852345, |
| "learning_rate": 7.847302572691277e-05, |
| "loss": 0.3625, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 0.42428578770971376, |
| "learning_rate": 7.835861610940965e-05, |
| "loss": 0.3613, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.9264705882352942, |
| "grad_norm": 0.3717888932030909, |
| "learning_rate": 7.824016337770872e-05, |
| "loss": 0.3573, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.42447915248441553, |
| "learning_rate": 7.811768001708627e-05, |
| "loss": 0.362, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.9558823529411765, |
| "grad_norm": 0.3876778292466403, |
| "learning_rate": 7.799117893765913e-05, |
| "loss": 0.3557, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 0.26608755562026015, |
| "learning_rate": 7.786067347302379e-05, |
| "loss": 0.3545, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.9852941176470589, |
| "grad_norm": 0.3434111610187913, |
| "learning_rate": 7.77261773788511e-05, |
| "loss": 0.353, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3497537599784584, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 0.3545, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.0147058823529411, |
| "grad_norm": 0.3391085192527439, |
| "learning_rate": 7.744527042620496e-05, |
| "loss": 0.3419, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.0294117647058822, |
| "grad_norm": 0.5077627087325893, |
| "learning_rate": 7.729888917617424e-05, |
| "loss": 0.3471, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0441176470588236, |
| "grad_norm": 0.5532598557050799, |
| "learning_rate": 7.714857651037081e-05, |
| "loss": 0.3412, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 0.5780960309028261, |
| "learning_rate": 7.699434827220448e-05, |
| "loss": 0.3428, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.0735294117647058, |
| "grad_norm": 0.49974865026294507, |
| "learning_rate": 7.683622071779816e-05, |
| "loss": 0.3364, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.088235294117647, |
| "grad_norm": 0.36747177299557693, |
| "learning_rate": 7.667421051427453e-05, |
| "loss": 0.3389, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.1029411764705883, |
| "grad_norm": 0.3669907335719105, |
| "learning_rate": 7.650833473799922e-05, |
| "loss": 0.3312, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 0.3922235437218937, |
| "learning_rate": 7.633861087278095e-05, |
| "loss": 0.3293, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.1323529411764706, |
| "grad_norm": 0.3719011690300337, |
| "learning_rate": 7.616505680802863e-05, |
| "loss": 0.3375, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.1470588235294117, |
| "grad_norm": 0.3377098123124342, |
| "learning_rate": 7.598769083686582e-05, |
| "loss": 0.3405, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.161764705882353, |
| "grad_norm": 0.29899552501628507, |
| "learning_rate": 7.58065316542025e-05, |
| "loss": 0.3351, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.32828082894171606, |
| "learning_rate": 7.562159835476466e-05, |
| "loss": 0.3389, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1911764705882353, |
| "grad_norm": 0.3445874209931112, |
| "learning_rate": 7.543291043108159e-05, |
| "loss": 0.3378, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.2058823529411764, |
| "grad_norm": 0.3023978628206444, |
| "learning_rate": 7.524048777143139e-05, |
| "loss": 0.3387, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.2205882352941178, |
| "grad_norm": 0.25558483521205383, |
| "learning_rate": 7.504435065774455e-05, |
| "loss": 0.3306, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 0.32953601128265847, |
| "learning_rate": 7.48445197634663e-05, |
| "loss": 0.3349, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.33756326919937646, |
| "learning_rate": 7.464101615137756e-05, |
| "loss": 0.3324, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.2647058823529411, |
| "grad_norm": 0.24120088917587124, |
| "learning_rate": 7.443386127137472e-05, |
| "loss": 0.3317, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.2794117647058822, |
| "grad_norm": 0.2914122682482861, |
| "learning_rate": 7.422307695820893e-05, |
| "loss": 0.3346, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 0.32020365300061576, |
| "learning_rate": 7.400868542918457e-05, |
| "loss": 0.3303, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.3088235294117647, |
| "grad_norm": 0.2516025057415832, |
| "learning_rate": 7.379070928181747e-05, |
| "loss": 0.3351, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.3235294117647058, |
| "grad_norm": 0.25001986331755705, |
| "learning_rate": 7.356917149145308e-05, |
| "loss": 0.3353, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.3382352941176472, |
| "grad_norm": 0.2792197235528977, |
| "learning_rate": 7.334409540884479e-05, |
| "loss": 0.3294, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 0.28078783596981044, |
| "learning_rate": 7.311550475769272e-05, |
| "loss": 0.3248, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.3676470588235294, |
| "grad_norm": 0.37967936203746205, |
| "learning_rate": 7.288342363214313e-05, |
| "loss": 0.3328, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.3823529411764706, |
| "grad_norm": 0.5149190442137033, |
| "learning_rate": 7.264787649424888e-05, |
| "loss": 0.3312, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.3970588235294117, |
| "grad_norm": 0.6122843221799549, |
| "learning_rate": 7.240888817139094e-05, |
| "loss": 0.3348, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 0.5886981731078635, |
| "learning_rate": 7.216648385366167e-05, |
| "loss": 0.3395, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.4264705882352942, |
| "grad_norm": 0.34409848388448383, |
| "learning_rate": 7.192068909120959e-05, |
| "loss": 0.3306, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.4411764705882353, |
| "grad_norm": 0.2978091494500524, |
| "learning_rate": 7.167152979154634e-05, |
| "loss": 0.3334, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.4558823529411764, |
| "grad_norm": 0.4400729737039225, |
| "learning_rate": 7.141903221681595e-05, |
| "loss": 0.3404, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.3694283042397561, |
| "learning_rate": 7.116322298102681e-05, |
| "loss": 0.3332, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4852941176470589, |
| "grad_norm": 0.2639067391873521, |
| "learning_rate": 7.090412904724636e-05, |
| "loss": 0.3313, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.37416820779021187, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 0.3285, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.5147058823529411, |
| "grad_norm": 0.33275488946271775, |
| "learning_rate": 7.037619666618829e-05, |
| "loss": 0.3361, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 0.23894606263596457, |
| "learning_rate": 7.010741386458099e-05, |
| "loss": 0.3388, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.5441176470588234, |
| "grad_norm": 0.3668325550727754, |
| "learning_rate": 6.983545765045774e-05, |
| "loss": 0.3311, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.5588235294117647, |
| "grad_norm": 0.2399104316842811, |
| "learning_rate": 6.956035668882637e-05, |
| "loss": 0.3297, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.5735294117647058, |
| "grad_norm": 0.2357871283562126, |
| "learning_rate": 6.928213997616059e-05, |
| "loss": 0.3318, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 0.2849131707817035, |
| "learning_rate": 6.900083683734372e-05, |
| "loss": 0.3304, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.6029411764705883, |
| "grad_norm": 0.17423798370987492, |
| "learning_rate": 6.871647692257768e-05, |
| "loss": 0.3276, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.6176470588235294, |
| "grad_norm": 0.2548925157793133, |
| "learning_rate": 6.842909020425789e-05, |
| "loss": 0.334, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.6323529411764706, |
| "grad_norm": 0.24942793454876405, |
| "learning_rate": 6.8138706973814e-05, |
| "loss": 0.3286, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 0.18689295644396056, |
| "learning_rate": 6.784535783851708e-05, |
| "loss": 0.3266, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.6617647058823528, |
| "grad_norm": 0.2911151755362797, |
| "learning_rate": 6.754907371825355e-05, |
| "loss": 0.3262, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.6764705882352942, |
| "grad_norm": 0.27837463790987516, |
| "learning_rate": 6.724988584226616e-05, |
| "loss": 0.3279, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.6911764705882353, |
| "grad_norm": 0.28908336337027807, |
| "learning_rate": 6.69478257458623e-05, |
| "loss": 0.3281, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 0.34403162565227147, |
| "learning_rate": 6.664292526709001e-05, |
| "loss": 0.3313, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.7205882352941178, |
| "grad_norm": 0.29186151217355694, |
| "learning_rate": 6.633521654338233e-05, |
| "loss": 0.3334, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.7352941176470589, |
| "grad_norm": 0.3213370329801203, |
| "learning_rate": 6.602473200816969e-05, |
| "loss": 0.3267, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.30160846132752556, |
| "learning_rate": 6.571150438746157e-05, |
| "loss": 0.3242, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 0.26892209589390914, |
| "learning_rate": 6.539556669639691e-05, |
| "loss": 0.3244, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.7794117647058822, |
| "grad_norm": 0.2440948850515908, |
| "learning_rate": 6.507695223576428e-05, |
| "loss": 0.3229, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.7941176470588234, |
| "grad_norm": 0.20672290158202897, |
| "learning_rate": 6.475569458849178e-05, |
| "loss": 0.331, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.8088235294117647, |
| "grad_norm": 0.2459670080944092, |
| "learning_rate": 6.443182761610752e-05, |
| "loss": 0.3321, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 0.2704407966484609, |
| "learning_rate": 6.410538545517026e-05, |
| "loss": 0.3288, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.8382352941176472, |
| "grad_norm": 0.23662938749712678, |
| "learning_rate": 6.377640251367148e-05, |
| "loss": 0.3285, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.8529411764705883, |
| "grad_norm": 0.23508733004486604, |
| "learning_rate": 6.344491346740859e-05, |
| "loss": 0.3265, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.8676470588235294, |
| "grad_norm": 0.15905797582955938, |
| "learning_rate": 6.311095325633006e-05, |
| "loss": 0.3287, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 0.16194725388549308, |
| "learning_rate": 6.277455708085255e-05, |
| "loss": 0.3193, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.8970588235294117, |
| "grad_norm": 0.19431781452308503, |
| "learning_rate": 6.24357603981508e-05, |
| "loss": 0.3218, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.9117647058823528, |
| "grad_norm": 0.1530375931457355, |
| "learning_rate": 6.209459891842023e-05, |
| "loss": 0.3232, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.9264705882352942, |
| "grad_norm": 0.20552563428946305, |
| "learning_rate": 6.175110860111307e-05, |
| "loss": 0.3291, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 0.16800259989430927, |
| "learning_rate": 6.140532565114801e-05, |
| "loss": 0.3255, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.9558823529411766, |
| "grad_norm": 0.15539283189220082, |
| "learning_rate": 6.105728651509424e-05, |
| "loss": 0.3254, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.9705882352941178, |
| "grad_norm": 0.16103547056804585, |
| "learning_rate": 6.070702787732971e-05, |
| "loss": 0.3249, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.9852941176470589, |
| "grad_norm": 0.16239367515131709, |
| "learning_rate": 6.0354586656174606e-05, |
| "loss": 0.3288, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.20166055674169706, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.3099, |
| "step": 136 |
| }, |
| { |
| "epoch": 2.014705882352941, |
| "grad_norm": 0.24260127096827225, |
| "learning_rate": 5.964330528331234e-05, |
| "loss": 0.3056, |
| "step": 137 |
| }, |
| { |
| "epoch": 2.0294117647058822, |
| "grad_norm": 0.33127332158017103, |
| "learning_rate": 5.9284540102813964e-05, |
| "loss": 0.3, |
| "step": 138 |
| }, |
| { |
| "epoch": 2.0441176470588234, |
| "grad_norm": 0.43353541229148657, |
| "learning_rate": 5.892374227344041e-05, |
| "loss": 0.308, |
| "step": 139 |
| }, |
| { |
| "epoch": 2.0588235294117645, |
| "grad_norm": 0.49315921894412995, |
| "learning_rate": 5.856094982437454e-05, |
| "loss": 0.3067, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.073529411764706, |
| "grad_norm": 0.450512729936066, |
| "learning_rate": 5.819620099503818e-05, |
| "loss": 0.3101, |
| "step": 141 |
| }, |
| { |
| "epoch": 2.088235294117647, |
| "grad_norm": 0.343920919271517, |
| "learning_rate": 5.782953423106154e-05, |
| "loss": 0.3046, |
| "step": 142 |
| }, |
| { |
| "epoch": 2.1029411764705883, |
| "grad_norm": 0.2814078808698335, |
| "learning_rate": 5.746098818023093e-05, |
| "loss": 0.2988, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.1176470588235294, |
| "grad_norm": 0.3529797938787429, |
| "learning_rate": 5.709060168841524e-05, |
| "loss": 0.3033, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.1323529411764706, |
| "grad_norm": 0.33042737999638, |
| "learning_rate": 5.6718413795471346e-05, |
| "loss": 0.3028, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.1470588235294117, |
| "grad_norm": 0.16519735683489525, |
| "learning_rate": 5.634446373112926e-05, |
| "loss": 0.3035, |
| "step": 146 |
| }, |
| { |
| "epoch": 2.161764705882353, |
| "grad_norm": 0.23206099454265375, |
| "learning_rate": 5.596879091085724e-05, |
| "loss": 0.3001, |
| "step": 147 |
| }, |
| { |
| "epoch": 2.176470588235294, |
| "grad_norm": 0.2771142062084271, |
| "learning_rate": 5.5591434931707176e-05, |
| "loss": 0.3005, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.1911764705882355, |
| "grad_norm": 0.2431821354495366, |
| "learning_rate": 5.5212435568141036e-05, |
| "loss": 0.3045, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.2058823529411766, |
| "grad_norm": 0.18676533826823546, |
| "learning_rate": 5.4831832767838436e-05, |
| "loss": 0.2993, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.2205882352941178, |
| "grad_norm": 0.21179962464182328, |
| "learning_rate": 5.444966664748613e-05, |
| "loss": 0.2967, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 0.2277766014598405, |
| "learning_rate": 5.406597748854947e-05, |
| "loss": 0.2993, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.19342351829324006, |
| "learning_rate": 5.368080573302676e-05, |
| "loss": 0.3044, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.264705882352941, |
| "grad_norm": 0.21064512781524572, |
| "learning_rate": 5.329419197918639e-05, |
| "loss": 0.3062, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.2794117647058822, |
| "grad_norm": 0.15702816077320908, |
| "learning_rate": 5.29061769772878e-05, |
| "loss": 0.2995, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.2941176470588234, |
| "grad_norm": 0.17947407344934332, |
| "learning_rate": 5.251680162528618e-05, |
| "loss": 0.3013, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.3088235294117645, |
| "grad_norm": 0.18466536636530687, |
| "learning_rate": 5.212610696452174e-05, |
| "loss": 0.3036, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.323529411764706, |
| "grad_norm": 0.1680929482668945, |
| "learning_rate": 5.173413417539385e-05, |
| "loss": 0.3029, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.338235294117647, |
| "grad_norm": 0.1654731382812225, |
| "learning_rate": 5.134092457302044e-05, |
| "loss": 0.3024, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 0.15258036230353525, |
| "learning_rate": 5.0946519602883326e-05, |
| "loss": 0.3037, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.3676470588235294, |
| "grad_norm": 0.1491955575134207, |
| "learning_rate": 5.0550960836459674e-05, |
| "loss": 0.3044, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.3823529411764706, |
| "grad_norm": 0.14545439455177017, |
| "learning_rate": 5.0154289966840315e-05, |
| "loss": 0.2954, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.3970588235294117, |
| "grad_norm": 0.13499905174566654, |
| "learning_rate": 4.975654880433509e-05, |
| "loss": 0.2991, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.411764705882353, |
| "grad_norm": 0.13529530394095032, |
| "learning_rate": 4.935777927206595e-05, |
| "loss": 0.301, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.426470588235294, |
| "grad_norm": 0.14447147193835977, |
| "learning_rate": 4.895802340154813e-05, |
| "loss": 0.3038, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.4411764705882355, |
| "grad_norm": 0.1368779663418461, |
| "learning_rate": 4.85573233282599e-05, |
| "loss": 0.3022, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.4558823529411766, |
| "grad_norm": 0.13420641137376216, |
| "learning_rate": 4.815572128720138e-05, |
| "loss": 0.3049, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.4705882352941178, |
| "grad_norm": 0.12163815573974876, |
| "learning_rate": 4.7753259608442804e-05, |
| "loss": 0.2998, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.485294117647059, |
| "grad_norm": 0.15356753682422525, |
| "learning_rate": 4.734998071266282e-05, |
| "loss": 0.298, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.11474062387695687, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 0.3068, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.514705882352941, |
| "grad_norm": 0.15293267687494932, |
| "learning_rate": 4.65411413789586e-05, |
| "loss": 0.3005, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.5294117647058822, |
| "grad_norm": 0.13885259280209167, |
| "learning_rate": 4.6135666195147426e-05, |
| "loss": 0.2994, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.5441176470588234, |
| "grad_norm": 0.11212797930990655, |
| "learning_rate": 4.572954429355487e-05, |
| "loss": 0.3026, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.5588235294117645, |
| "grad_norm": 0.1441373090862667, |
| "learning_rate": 4.532281848065816e-05, |
| "loss": 0.3014, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.5735294117647056, |
| "grad_norm": 0.1438877611890079, |
| "learning_rate": 4.491553162658857e-05, |
| "loss": 0.3044, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.588235294117647, |
| "grad_norm": 0.12603413712308525, |
| "learning_rate": 4.450772666061285e-05, |
| "loss": 0.301, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.6029411764705883, |
| "grad_norm": 0.12633770238098366, |
| "learning_rate": 4.409944656660828e-05, |
| "loss": 0.2965, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.6176470588235294, |
| "grad_norm": 0.13789374969079662, |
| "learning_rate": 4.369073437853208e-05, |
| "loss": 0.3009, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.6323529411764706, |
| "grad_norm": 0.1402520516162208, |
| "learning_rate": 4.328163317588552e-05, |
| "loss": 0.298, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.6470588235294117, |
| "grad_norm": 0.09904937633936901, |
| "learning_rate": 4.2872186079173106e-05, |
| "loss": 0.3013, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.661764705882353, |
| "grad_norm": 0.1312793347901254, |
| "learning_rate": 4.2462436245357724e-05, |
| "loss": 0.3, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.6764705882352944, |
| "grad_norm": 0.09621059323368814, |
| "learning_rate": 4.205242686331159e-05, |
| "loss": 0.3029, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.6911764705882355, |
| "grad_norm": 0.10807537972219632, |
| "learning_rate": 4.164220114926414e-05, |
| "loss": 0.2978, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 0.10722934994246927, |
| "learning_rate": 4.123180234224682e-05, |
| "loss": 0.2998, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.7205882352941178, |
| "grad_norm": 0.1064704662992324, |
| "learning_rate": 4.0821273699535625e-05, |
| "loss": 0.3013, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.735294117647059, |
| "grad_norm": 0.10969792661155021, |
| "learning_rate": 4.04106584920916e-05, |
| "loss": 0.3006, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.12146189911454472, |
| "learning_rate": 4e-05, |
| "loss": 0.3031, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.764705882352941, |
| "grad_norm": 0.11305408445478238, |
| "learning_rate": 3.9589341507908415e-05, |
| "loss": 0.3037, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.7794117647058822, |
| "grad_norm": 0.13033799624199852, |
| "learning_rate": 3.917872630046439e-05, |
| "loss": 0.3032, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.7941176470588234, |
| "grad_norm": 0.10937208565515465, |
| "learning_rate": 3.8768197657753194e-05, |
| "loss": 0.3035, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.8088235294117645, |
| "grad_norm": 0.11386446489403948, |
| "learning_rate": 3.835779885073588e-05, |
| "loss": 0.2985, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.8235294117647056, |
| "grad_norm": 0.10573108090134906, |
| "learning_rate": 3.794757313668841e-05, |
| "loss": 0.3025, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.838235294117647, |
| "grad_norm": 0.09522893522850687, |
| "learning_rate": 3.753756375464229e-05, |
| "loss": 0.2964, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.8529411764705883, |
| "grad_norm": 0.10763504421587497, |
| "learning_rate": 3.71278139208269e-05, |
| "loss": 0.2983, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.8676470588235294, |
| "grad_norm": 0.09673967344497875, |
| "learning_rate": 3.67183668241145e-05, |
| "loss": 0.303, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.8823529411764706, |
| "grad_norm": 0.11019580337657149, |
| "learning_rate": 3.630926562146792e-05, |
| "loss": 0.2977, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.8970588235294117, |
| "grad_norm": 0.09457792059050557, |
| "learning_rate": 3.5900553433391724e-05, |
| "loss": 0.2987, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.911764705882353, |
| "grad_norm": 0.08738325053473636, |
| "learning_rate": 3.549227333938716e-05, |
| "loss": 0.2986, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.9264705882352944, |
| "grad_norm": 0.09681306300823637, |
| "learning_rate": 3.5084468373411444e-05, |
| "loss": 0.3013, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 0.09249395424589109, |
| "learning_rate": 3.467718151934187e-05, |
| "loss": 0.2972, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.9558823529411766, |
| "grad_norm": 0.09703820251292773, |
| "learning_rate": 3.427045570644515e-05, |
| "loss": 0.2979, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.9705882352941178, |
| "grad_norm": 0.10540258782003765, |
| "learning_rate": 3.386433380485258e-05, |
| "loss": 0.2992, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.985294117647059, |
| "grad_norm": 0.08015062850340511, |
| "learning_rate": 3.34588586210414e-05, |
| "loss": 0.3011, |
| "step": 203 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.14706722768632058, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 0.2806, |
| "step": 204 |
| }, |
| { |
| "epoch": 3.014705882352941, |
| "grad_norm": 0.10603496844146229, |
| "learning_rate": 3.2650019287337184e-05, |
| "loss": 0.2799, |
| "step": 205 |
| }, |
| { |
| "epoch": 3.0294117647058822, |
| "grad_norm": 0.15530639149269276, |
| "learning_rate": 3.22467403915572e-05, |
| "loss": 0.2781, |
| "step": 206 |
| }, |
| { |
| "epoch": 3.0441176470588234, |
| "grad_norm": 0.1421223150968687, |
| "learning_rate": 3.184427871279863e-05, |
| "loss": 0.282, |
| "step": 207 |
| }, |
| { |
| "epoch": 3.0588235294117645, |
| "grad_norm": 0.14419949652005803, |
| "learning_rate": 3.144267667174011e-05, |
| "loss": 0.2776, |
| "step": 208 |
| }, |
| { |
| "epoch": 3.073529411764706, |
| "grad_norm": 0.12994406412161302, |
| "learning_rate": 3.1041976598451884e-05, |
| "loss": 0.2781, |
| "step": 209 |
| }, |
| { |
| "epoch": 3.088235294117647, |
| "grad_norm": 0.14614171687593527, |
| "learning_rate": 3.064222072793407e-05, |
| "loss": 0.2765, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.1029411764705883, |
| "grad_norm": 0.12016869114374024, |
| "learning_rate": 3.0243451195664914e-05, |
| "loss": 0.2783, |
| "step": 211 |
| }, |
| { |
| "epoch": 3.1176470588235294, |
| "grad_norm": 0.1310851029344073, |
| "learning_rate": 2.984571003315969e-05, |
| "loss": 0.2781, |
| "step": 212 |
| }, |
| { |
| "epoch": 3.1323529411764706, |
| "grad_norm": 0.11922587006224326, |
| "learning_rate": 2.944903916354032e-05, |
| "loss": 0.2795, |
| "step": 213 |
| }, |
| { |
| "epoch": 3.1470588235294117, |
| "grad_norm": 0.12710795880675843, |
| "learning_rate": 2.905348039711669e-05, |
| "loss": 0.2784, |
| "step": 214 |
| }, |
| { |
| "epoch": 3.161764705882353, |
| "grad_norm": 0.11445734817339984, |
| "learning_rate": 2.865907542697957e-05, |
| "loss": 0.2758, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.176470588235294, |
| "grad_norm": 0.1300689688100464, |
| "learning_rate": 2.8265865824606165e-05, |
| "loss": 0.2758, |
| "step": 216 |
| }, |
| { |
| "epoch": 3.1911764705882355, |
| "grad_norm": 0.09831374725437492, |
| "learning_rate": 2.7873893035478265e-05, |
| "loss": 0.2748, |
| "step": 217 |
| }, |
| { |
| "epoch": 3.2058823529411766, |
| "grad_norm": 0.1272913228663667, |
| "learning_rate": 2.7483198374713836e-05, |
| "loss": 0.2746, |
| "step": 218 |
| }, |
| { |
| "epoch": 3.2205882352941178, |
| "grad_norm": 0.0993483978526809, |
| "learning_rate": 2.7093823022712217e-05, |
| "loss": 0.2739, |
| "step": 219 |
| }, |
| { |
| "epoch": 3.235294117647059, |
| "grad_norm": 0.09886695511684795, |
| "learning_rate": 2.6705808020813622e-05, |
| "loss": 0.2832, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.0998986520318052, |
| "learning_rate": 2.6319194266973256e-05, |
| "loss": 0.2743, |
| "step": 221 |
| }, |
| { |
| "epoch": 3.264705882352941, |
| "grad_norm": 0.0964816428580283, |
| "learning_rate": 2.5934022511450528e-05, |
| "loss": 0.2762, |
| "step": 222 |
| }, |
| { |
| "epoch": 3.2794117647058822, |
| "grad_norm": 0.09255980694012796, |
| "learning_rate": 2.5550333352513885e-05, |
| "loss": 0.2782, |
| "step": 223 |
| }, |
| { |
| "epoch": 3.2941176470588234, |
| "grad_norm": 0.0988289109757557, |
| "learning_rate": 2.5168167232161574e-05, |
| "loss": 0.2748, |
| "step": 224 |
| }, |
| { |
| "epoch": 3.3088235294117645, |
| "grad_norm": 0.0884234456508421, |
| "learning_rate": 2.4787564431858977e-05, |
| "loss": 0.2753, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.323529411764706, |
| "grad_norm": 0.09774336930093334, |
| "learning_rate": 2.4408565068292827e-05, |
| "loss": 0.2751, |
| "step": 226 |
| }, |
| { |
| "epoch": 3.338235294117647, |
| "grad_norm": 0.0883002824327121, |
| "learning_rate": 2.4031209089142773e-05, |
| "loss": 0.2773, |
| "step": 227 |
| }, |
| { |
| "epoch": 3.3529411764705883, |
| "grad_norm": 0.09215211229010013, |
| "learning_rate": 2.3655536268870744e-05, |
| "loss": 0.2752, |
| "step": 228 |
| }, |
| { |
| "epoch": 3.3676470588235294, |
| "grad_norm": 0.08094179064778044, |
| "learning_rate": 2.328158620452868e-05, |
| "loss": 0.2729, |
| "step": 229 |
| }, |
| { |
| "epoch": 3.3823529411764706, |
| "grad_norm": 0.09692135730954927, |
| "learning_rate": 2.2909398311584775e-05, |
| "loss": 0.2731, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.3970588235294117, |
| "grad_norm": 0.0813183018452283, |
| "learning_rate": 2.2539011819769056e-05, |
| "loss": 0.2782, |
| "step": 231 |
| }, |
| { |
| "epoch": 3.411764705882353, |
| "grad_norm": 0.08149910156743696, |
| "learning_rate": 2.2170465768938473e-05, |
| "loss": 0.275, |
| "step": 232 |
| }, |
| { |
| "epoch": 3.426470588235294, |
| "grad_norm": 0.08503586167349093, |
| "learning_rate": 2.1803799004961824e-05, |
| "loss": 0.2766, |
| "step": 233 |
| }, |
| { |
| "epoch": 3.4411764705882355, |
| "grad_norm": 0.07599564665486494, |
| "learning_rate": 2.1439050175625474e-05, |
| "loss": 0.2759, |
| "step": 234 |
| }, |
| { |
| "epoch": 3.4558823529411766, |
| "grad_norm": 0.08451424035355402, |
| "learning_rate": 2.1076257726559603e-05, |
| "loss": 0.2795, |
| "step": 235 |
| }, |
| { |
| "epoch": 3.4705882352941178, |
| "grad_norm": 0.07467914550010295, |
| "learning_rate": 2.0715459897186046e-05, |
| "loss": 0.2767, |
| "step": 236 |
| }, |
| { |
| "epoch": 3.485294117647059, |
| "grad_norm": 0.09085541610657201, |
| "learning_rate": 2.0356694716687687e-05, |
| "loss": 0.2785, |
| "step": 237 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.0734615712405951, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.2739, |
| "step": 238 |
| }, |
| { |
| "epoch": 3.514705882352941, |
| "grad_norm": 0.07717163991035296, |
| "learning_rate": 1.964541334382541e-05, |
| "loss": 0.2729, |
| "step": 239 |
| }, |
| { |
| "epoch": 3.5294117647058822, |
| "grad_norm": 0.07496301816518236, |
| "learning_rate": 1.9292972122670303e-05, |
| "loss": 0.2752, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.5441176470588234, |
| "grad_norm": 0.08080347359960849, |
| "learning_rate": 1.8942713484905762e-05, |
| "loss": 0.2801, |
| "step": 241 |
| }, |
| { |
| "epoch": 3.5588235294117645, |
| "grad_norm": 0.0738257225960406, |
| "learning_rate": 1.8594674348851992e-05, |
| "loss": 0.2767, |
| "step": 242 |
| }, |
| { |
| "epoch": 3.5735294117647056, |
| "grad_norm": 0.07389506773645138, |
| "learning_rate": 1.824889139888694e-05, |
| "loss": 0.2773, |
| "step": 243 |
| }, |
| { |
| "epoch": 3.588235294117647, |
| "grad_norm": 0.070294801390775, |
| "learning_rate": 1.790540108157977e-05, |
| "loss": 0.2763, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.6029411764705883, |
| "grad_norm": 0.068704655766595, |
| "learning_rate": 1.756423960184922e-05, |
| "loss": 0.2781, |
| "step": 245 |
| }, |
| { |
| "epoch": 3.6176470588235294, |
| "grad_norm": 0.06594242125553404, |
| "learning_rate": 1.7225442919147467e-05, |
| "loss": 0.2757, |
| "step": 246 |
| }, |
| { |
| "epoch": 3.6323529411764706, |
| "grad_norm": 0.06969707402390993, |
| "learning_rate": 1.6889046743669957e-05, |
| "loss": 0.2776, |
| "step": 247 |
| }, |
| { |
| "epoch": 3.6470588235294117, |
| "grad_norm": 0.06492677835336866, |
| "learning_rate": 1.6555086532591425e-05, |
| "loss": 0.2781, |
| "step": 248 |
| }, |
| { |
| "epoch": 3.661764705882353, |
| "grad_norm": 0.06331911499148075, |
| "learning_rate": 1.6223597486328534e-05, |
| "loss": 0.279, |
| "step": 249 |
| }, |
| { |
| "epoch": 3.6764705882352944, |
| "grad_norm": 0.06722098051730128, |
| "learning_rate": 1.589461454482975e-05, |
| "loss": 0.2802, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.6911764705882355, |
| "grad_norm": 0.06266474278629547, |
| "learning_rate": 1.556817238389249e-05, |
| "loss": 0.2781, |
| "step": 251 |
| }, |
| { |
| "epoch": 3.7058823529411766, |
| "grad_norm": 0.07000155789305507, |
| "learning_rate": 1.5244305411508217e-05, |
| "loss": 0.278, |
| "step": 252 |
| }, |
| { |
| "epoch": 3.7205882352941178, |
| "grad_norm": 0.061574644987438566, |
| "learning_rate": 1.4923047764235752e-05, |
| "loss": 0.2767, |
| "step": 253 |
| }, |
| { |
| "epoch": 3.735294117647059, |
| "grad_norm": 0.0682684963649869, |
| "learning_rate": 1.4604433303603092e-05, |
| "loss": 0.2732, |
| "step": 254 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.0642818898133848, |
| "learning_rate": 1.4288495612538427e-05, |
| "loss": 0.2743, |
| "step": 255 |
| }, |
| { |
| "epoch": 3.764705882352941, |
| "grad_norm": 0.06713851434875447, |
| "learning_rate": 1.3975267991830327e-05, |
| "loss": 0.2817, |
| "step": 256 |
| }, |
| { |
| "epoch": 3.7794117647058822, |
| "grad_norm": 0.06828899506125703, |
| "learning_rate": 1.3664783456617703e-05, |
| "loss": 0.2725, |
| "step": 257 |
| }, |
| { |
| "epoch": 3.7941176470588234, |
| "grad_norm": 0.07069453573008218, |
| "learning_rate": 1.3357074732909996e-05, |
| "loss": 0.2775, |
| "step": 258 |
| }, |
| { |
| "epoch": 3.8088235294117645, |
| "grad_norm": 0.06390051742059577, |
| "learning_rate": 1.3052174254137713e-05, |
| "loss": 0.2771, |
| "step": 259 |
| }, |
| { |
| "epoch": 3.8235294117647056, |
| "grad_norm": 0.06129666220940109, |
| "learning_rate": 1.275011415773383e-05, |
| "loss": 0.2789, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.838235294117647, |
| "grad_norm": 0.065817213580326, |
| "learning_rate": 1.2450926281746458e-05, |
| "loss": 0.274, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.8529411764705883, |
| "grad_norm": 0.06519904576862424, |
| "learning_rate": 1.2154642161482939e-05, |
| "loss": 0.2771, |
| "step": 262 |
| }, |
| { |
| "epoch": 3.8676470588235294, |
| "grad_norm": 0.06313901880567484, |
| "learning_rate": 1.1861293026186007e-05, |
| "loss": 0.2754, |
| "step": 263 |
| }, |
| { |
| "epoch": 3.8823529411764706, |
| "grad_norm": 0.06296057703800705, |
| "learning_rate": 1.1570909795742118e-05, |
| "loss": 0.2732, |
| "step": 264 |
| }, |
| { |
| "epoch": 3.8970588235294117, |
| "grad_norm": 0.06544715501817833, |
| "learning_rate": 1.1283523077422327e-05, |
| "loss": 0.2797, |
| "step": 265 |
| }, |
| { |
| "epoch": 3.911764705882353, |
| "grad_norm": 0.0650053871447722, |
| "learning_rate": 1.0999163162656296e-05, |
| "loss": 0.279, |
| "step": 266 |
| }, |
| { |
| "epoch": 3.9264705882352944, |
| "grad_norm": 0.06600064138970108, |
| "learning_rate": 1.0717860023839424e-05, |
| "loss": 0.276, |
| "step": 267 |
| }, |
| { |
| "epoch": 3.9411764705882355, |
| "grad_norm": 0.06210716465473454, |
| "learning_rate": 1.0439643311173642e-05, |
| "loss": 0.2768, |
| "step": 268 |
| }, |
| { |
| "epoch": 3.9558823529411766, |
| "grad_norm": 0.06401894182204264, |
| "learning_rate": 1.0164542349542273e-05, |
| "loss": 0.2788, |
| "step": 269 |
| }, |
| { |
| "epoch": 3.9705882352941178, |
| "grad_norm": 0.06372281222639016, |
| "learning_rate": 9.892586135419022e-06, |
| "loss": 0.2777, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.985294117647059, |
| "grad_norm": 0.05892561579737352, |
| "learning_rate": 9.623803333811713e-06, |
| "loss": 0.2771, |
| "step": 271 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.12086246389399692, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 0.2599, |
| "step": 272 |
| }, |
| { |
| "epoch": 4.014705882352941, |
| "grad_norm": 0.08602389508274347, |
| "learning_rate": 9.095870952753647e-06, |
| "loss": 0.2593, |
| "step": 273 |
| }, |
| { |
| "epoch": 4.029411764705882, |
| "grad_norm": 0.07197077150088015, |
| "learning_rate": 8.83677701897318e-06, |
| "loss": 0.262, |
| "step": 274 |
| }, |
| { |
| "epoch": 4.044117647058823, |
| "grad_norm": 0.0968580360252349, |
| "learning_rate": 8.580967783184055e-06, |
| "loss": 0.261, |
| "step": 275 |
| }, |
| { |
| "epoch": 4.0588235294117645, |
| "grad_norm": 0.09014393537599821, |
| "learning_rate": 8.328470208453683e-06, |
| "loss": 0.2622, |
| "step": 276 |
| }, |
| { |
| "epoch": 4.073529411764706, |
| "grad_norm": 0.0789945543394142, |
| "learning_rate": 8.07931090879042e-06, |
| "loss": 0.2577, |
| "step": 277 |
| }, |
| { |
| "epoch": 4.088235294117647, |
| "grad_norm": 0.08815856516590073, |
| "learning_rate": 7.833516146338329e-06, |
| "loss": 0.2617, |
| "step": 278 |
| }, |
| { |
| "epoch": 4.102941176470588, |
| "grad_norm": 0.08015266822256034, |
| "learning_rate": 7.591111828609059e-06, |
| "loss": 0.2641, |
| "step": 279 |
| }, |
| { |
| "epoch": 4.117647058823529, |
| "grad_norm": 0.08154629673882077, |
| "learning_rate": 7.3521235057511364e-06, |
| "loss": 0.2638, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.132352941176471, |
| "grad_norm": 0.08290035992126818, |
| "learning_rate": 7.116576367856871e-06, |
| "loss": 0.2606, |
| "step": 281 |
| }, |
| { |
| "epoch": 4.147058823529412, |
| "grad_norm": 0.07674741527367407, |
| "learning_rate": 6.884495242307285e-06, |
| "loss": 0.2613, |
| "step": 282 |
| }, |
| { |
| "epoch": 4.161764705882353, |
| "grad_norm": 0.07292754427620451, |
| "learning_rate": 6.655904591155224e-06, |
| "loss": 0.2618, |
| "step": 283 |
| }, |
| { |
| "epoch": 4.176470588235294, |
| "grad_norm": 0.0736899911159241, |
| "learning_rate": 6.430828508546936e-06, |
| "loss": 0.2637, |
| "step": 284 |
| }, |
| { |
| "epoch": 4.1911764705882355, |
| "grad_norm": 0.07217050587422956, |
| "learning_rate": 6.209290718182539e-06, |
| "loss": 0.2615, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.205882352941177, |
| "grad_norm": 0.07553232921098052, |
| "learning_rate": 5.991314570815441e-06, |
| "loss": 0.265, |
| "step": 286 |
| }, |
| { |
| "epoch": 4.220588235294118, |
| "grad_norm": 0.06886285952389541, |
| "learning_rate": 5.776923041791076e-06, |
| "loss": 0.2602, |
| "step": 287 |
| }, |
| { |
| "epoch": 4.235294117647059, |
| "grad_norm": 0.06173078424810273, |
| "learning_rate": 5.566138728625294e-06, |
| "loss": 0.2575, |
| "step": 288 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.06592763687997258, |
| "learning_rate": 5.358983848622452e-06, |
| "loss": 0.2566, |
| "step": 289 |
| }, |
| { |
| "epoch": 4.264705882352941, |
| "grad_norm": 0.06767164583052036, |
| "learning_rate": 5.15548023653369e-06, |
| "loss": 0.2595, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.279411764705882, |
| "grad_norm": 0.06893875689961178, |
| "learning_rate": 4.955649342255462e-06, |
| "loss": 0.2622, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.294117647058823, |
| "grad_norm": 0.0627731194283305, |
| "learning_rate": 4.7595122285686215e-06, |
| "loss": 0.2605, |
| "step": 292 |
| }, |
| { |
| "epoch": 4.3088235294117645, |
| "grad_norm": 0.06183425105989223, |
| "learning_rate": 4.567089568918403e-06, |
| "loss": 0.262, |
| "step": 293 |
| }, |
| { |
| "epoch": 4.323529411764706, |
| "grad_norm": 0.08304190651695828, |
| "learning_rate": 4.3784016452353526e-06, |
| "loss": 0.2577, |
| "step": 294 |
| }, |
| { |
| "epoch": 4.338235294117647, |
| "grad_norm": 0.06162011756699104, |
| "learning_rate": 4.193468345797511e-06, |
| "loss": 0.2626, |
| "step": 295 |
| }, |
| { |
| "epoch": 4.352941176470588, |
| "grad_norm": 0.06077784990223975, |
| "learning_rate": 4.012309163134194e-06, |
| "loss": 0.2631, |
| "step": 296 |
| }, |
| { |
| "epoch": 4.367647058823529, |
| "grad_norm": 0.059344232351091784, |
| "learning_rate": 3.8349431919713655e-06, |
| "loss": 0.2606, |
| "step": 297 |
| }, |
| { |
| "epoch": 4.382352941176471, |
| "grad_norm": 0.053110563118761965, |
| "learning_rate": 3.6613891272190506e-06, |
| "loss": 0.2584, |
| "step": 298 |
| }, |
| { |
| "epoch": 4.397058823529412, |
| "grad_norm": 0.0550813714546073, |
| "learning_rate": 3.49166526200079e-06, |
| "loss": 0.2623, |
| "step": 299 |
| }, |
| { |
| "epoch": 4.411764705882353, |
| "grad_norm": 0.05497952308193732, |
| "learning_rate": 3.325789485725488e-06, |
| "loss": 0.2607, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.426470588235294, |
| "grad_norm": 0.056505023992606394, |
| "learning_rate": 3.163779282201853e-06, |
| "loss": 0.2648, |
| "step": 301 |
| }, |
| { |
| "epoch": 4.4411764705882355, |
| "grad_norm": 0.055167426784272444, |
| "learning_rate": 3.0056517277955357e-06, |
| "loss": 0.2612, |
| "step": 302 |
| }, |
| { |
| "epoch": 4.455882352941177, |
| "grad_norm": 0.052033455176057224, |
| "learning_rate": 2.8514234896291904e-06, |
| "loss": 0.2617, |
| "step": 303 |
| }, |
| { |
| "epoch": 4.470588235294118, |
| "grad_norm": 0.05049118136446942, |
| "learning_rate": 2.7011108238257723e-06, |
| "loss": 0.2656, |
| "step": 304 |
| }, |
| { |
| "epoch": 4.485294117647059, |
| "grad_norm": 0.04973325645683792, |
| "learning_rate": 2.5547295737950475e-06, |
| "loss": 0.2651, |
| "step": 305 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.05470312702591553, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 0.2647, |
| "step": 306 |
| }, |
| { |
| "epoch": 4.514705882352941, |
| "grad_norm": 0.04975562581567009, |
| "learning_rate": 2.2738226211489024e-06, |
| "loss": 0.2588, |
| "step": 307 |
| }, |
| { |
| "epoch": 4.529411764705882, |
| "grad_norm": 0.04990586736807332, |
| "learning_rate": 2.1393265269762197e-06, |
| "loss": 0.2628, |
| "step": 308 |
| }, |
| { |
| "epoch": 4.544117647058823, |
| "grad_norm": 0.048763186017272454, |
| "learning_rate": 2.008821062340891e-06, |
| "loss": 0.2621, |
| "step": 309 |
| }, |
| { |
| "epoch": 4.5588235294117645, |
| "grad_norm": 0.04772811522858683, |
| "learning_rate": 1.8823199829137406e-06, |
| "loss": 0.2604, |
| "step": 310 |
| }, |
| { |
| "epoch": 4.573529411764706, |
| "grad_norm": 0.04514011191017183, |
| "learning_rate": 1.7598366222912933e-06, |
| "loss": 0.2626, |
| "step": 311 |
| }, |
| { |
| "epoch": 4.588235294117647, |
| "grad_norm": 0.0447568436808381, |
| "learning_rate": 1.6413838905903556e-06, |
| "loss": 0.2567, |
| "step": 312 |
| }, |
| { |
| "epoch": 4.602941176470588, |
| "grad_norm": 0.04828872191610463, |
| "learning_rate": 1.5269742730872384e-06, |
| "loss": 0.2618, |
| "step": 313 |
| }, |
| { |
| "epoch": 4.617647058823529, |
| "grad_norm": 0.048070892531791296, |
| "learning_rate": 1.4166198289017952e-06, |
| "loss": 0.2624, |
| "step": 314 |
| }, |
| { |
| "epoch": 4.632352941176471, |
| "grad_norm": 0.0481947096054082, |
| "learning_rate": 1.3103321897263421e-06, |
| "loss": 0.2624, |
| "step": 315 |
| }, |
| { |
| "epoch": 4.647058823529412, |
| "grad_norm": 0.0498006739485343, |
| "learning_rate": 1.2081225585996248e-06, |
| "loss": 0.2594, |
| "step": 316 |
| }, |
| { |
| "epoch": 4.661764705882353, |
| "grad_norm": 0.046785729335567995, |
| "learning_rate": 1.1100017087260205e-06, |
| "loss": 0.2622, |
| "step": 317 |
| }, |
| { |
| "epoch": 4.676470588235294, |
| "grad_norm": 0.04518076179941004, |
| "learning_rate": 1.015979982339994e-06, |
| "loss": 0.2636, |
| "step": 318 |
| }, |
| { |
| "epoch": 4.6911764705882355, |
| "grad_norm": 0.04627910929165888, |
| "learning_rate": 9.260672896159728e-07, |
| "loss": 0.2603, |
| "step": 319 |
| }, |
| { |
| "epoch": 4.705882352941177, |
| "grad_norm": 0.04505892744233274, |
| "learning_rate": 8.402731076238191e-07, |
| "loss": 0.2606, |
| "step": 320 |
| }, |
| { |
| "epoch": 4.720588235294118, |
| "grad_norm": 0.04552316822622753, |
| "learning_rate": 7.586064793298998e-07, |
| "loss": 0.2593, |
| "step": 321 |
| }, |
| { |
| "epoch": 4.735294117647059, |
| "grad_norm": 0.045512369955118724, |
| "learning_rate": 6.810760126439287e-07, |
| "loss": 0.2654, |
| "step": 322 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.0451772973693403, |
| "learning_rate": 6.076898795116792e-07, |
| "loss": 0.2621, |
| "step": 323 |
| }, |
| { |
| "epoch": 4.764705882352941, |
| "grad_norm": 0.044079852747311694, |
| "learning_rate": 5.384558150536201e-07, |
| "loss": 0.2615, |
| "step": 324 |
| }, |
| { |
| "epoch": 4.779411764705882, |
| "grad_norm": 0.04455441736236055, |
| "learning_rate": 4.7338111674962495e-07, |
| "loss": 0.2606, |
| "step": 325 |
| }, |
| { |
| "epoch": 4.794117647058823, |
| "grad_norm": 0.04405689466623866, |
| "learning_rate": 4.124726436697879e-07, |
| "loss": 0.2621, |
| "step": 326 |
| }, |
| { |
| "epoch": 4.8088235294117645, |
| "grad_norm": 0.044529538816460816, |
| "learning_rate": 3.557368157514596e-07, |
| "loss": 0.2646, |
| "step": 327 |
| }, |
| { |
| "epoch": 4.823529411764706, |
| "grad_norm": 0.04553425966459204, |
| "learning_rate": 3.031796131225706e-07, |
| "loss": 0.2625, |
| "step": 328 |
| }, |
| { |
| "epoch": 4.838235294117647, |
| "grad_norm": 0.04331163075687581, |
| "learning_rate": 2.548065754712914e-07, |
| "loss": 0.2592, |
| "step": 329 |
| }, |
| { |
| "epoch": 4.852941176470588, |
| "grad_norm": 0.045474788941501976, |
| "learning_rate": 2.1062280146215252e-07, |
| "loss": 0.2625, |
| "step": 330 |
| }, |
| { |
| "epoch": 4.867647058823529, |
| "grad_norm": 0.04615519000321619, |
| "learning_rate": 1.706329481986213e-07, |
| "loss": 0.2635, |
| "step": 331 |
| }, |
| { |
| "epoch": 4.882352941176471, |
| "grad_norm": 0.04427563364549322, |
| "learning_rate": 1.3484123073222332e-07, |
| "loss": 0.2637, |
| "step": 332 |
| }, |
| { |
| "epoch": 4.897058823529412, |
| "grad_norm": 0.04265147614885937, |
| "learning_rate": 1.0325142161827561e-07, |
| "loss": 0.2637, |
| "step": 333 |
| }, |
| { |
| "epoch": 4.911764705882353, |
| "grad_norm": 0.04295525693989313, |
| "learning_rate": 7.586685051823584e-08, |
| "loss": 0.2586, |
| "step": 334 |
| }, |
| { |
| "epoch": 4.926470588235294, |
| "grad_norm": 0.04243683788724124, |
| "learning_rate": 5.2690403848760785e-08, |
| "loss": 0.2603, |
| "step": 335 |
| }, |
| { |
| "epoch": 4.9411764705882355, |
| "grad_norm": 0.043654171470091575, |
| "learning_rate": 3.3724524477447564e-08, |
| "loss": 0.2622, |
| "step": 336 |
| }, |
| { |
| "epoch": 4.955882352941177, |
| "grad_norm": 0.04272527321797204, |
| "learning_rate": 1.897121146536396e-08, |
| "loss": 0.2542, |
| "step": 337 |
| }, |
| { |
| "epoch": 4.970588235294118, |
| "grad_norm": 0.04827925979026659, |
| "learning_rate": 8.432019856345896e-09, |
| "loss": 0.2582, |
| "step": 338 |
| }, |
| { |
| "epoch": 4.985294117647059, |
| "grad_norm": 0.042423900810093146, |
| "learning_rate": 2.1080605130752162e-09, |
| "loss": 0.2564, |
| "step": 339 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.07128915321863381, |
| "learning_rate": 0.0, |
| "loss": 0.2511, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 340, |
| "total_flos": 1.915314895847424e+16, |
| "train_loss": 0.3188589934040518, |
| "train_runtime": 19815.4609, |
| "train_samples_per_second": 8.67, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 340, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.915314895847424e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|