| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5026548672566372, |
| "eval_steps": 500, |
| "global_step": 284, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.807692307692308e-06, |
| "loss": 2.0125, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 1.9307, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.4423076923076924e-05, |
| "loss": 1.6104, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 1.9208, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.4038461538461542e-05, |
| "loss": 1.7502, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.884615384615385e-05, |
| "loss": 2.2112, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.365384615384615e-05, |
| "loss": 1.5895, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 1.9856, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3269230769230766e-05, |
| "loss": 2.1565, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8076923076923084e-05, |
| "loss": 1.6874, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.288461538461539e-05, |
| "loss": 1.614, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.76923076923077e-05, |
| "loss": 1.8226, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 6.25e-05, |
| "loss": 1.4058, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 6.73076923076923e-05, |
| "loss": 1.4717, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.211538461538461e-05, |
| "loss": 1.5335, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.692307692307693e-05, |
| "loss": 2.1125, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.173076923076923e-05, |
| "loss": 1.9451, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.653846153846153e-05, |
| "loss": 1.7484, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.134615384615384e-05, |
| "loss": 1.7573, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.615384615384617e-05, |
| "loss": 2.1968, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00010096153846153847, |
| "loss": 1.7941, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00010576923076923077, |
| "loss": 1.8685, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00011057692307692308, |
| "loss": 2.0065, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001153846153846154, |
| "loss": 1.9018, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001201923076923077, |
| "loss": 2.0752, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000125, |
| "loss": 1.716, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00012980769230769233, |
| "loss": 1.6542, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0001346153846153846, |
| "loss": 1.7198, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00013942307692307694, |
| "loss": 1.8383, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00014423076923076922, |
| "loss": 1.6938, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00014903846153846155, |
| "loss": 1.9142, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00015384615384615385, |
| "loss": 1.7715, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00015865384615384616, |
| "loss": 1.467, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00016346153846153846, |
| "loss": 1.7608, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0001682692307692308, |
| "loss": 1.5371, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00017307692307692307, |
| "loss": 1.6211, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001778846153846154, |
| "loss": 1.7275, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00018269230769230767, |
| "loss": 1.7063, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001875, |
| "loss": 1.9367, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019230769230769233, |
| "loss": 1.6608, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001971153846153846, |
| "loss": 1.9212, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00020192307692307694, |
| "loss": 1.611, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00020673076923076922, |
| "loss": 1.8582, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00021153846153846155, |
| "loss": 1.5658, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00021634615384615385, |
| "loss": 1.8543, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00022115384615384616, |
| "loss": 1.7291, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00022596153846153846, |
| "loss": 1.9411, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002307692307692308, |
| "loss": 1.8138, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00023557692307692307, |
| "loss": 1.708, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002403846153846154, |
| "loss": 1.675, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002451923076923077, |
| "loss": 1.658, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00025, |
| "loss": 1.9797, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.000249997656075194, |
| "loss": 1.5857, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002499906243886798, |
| "loss": 1.8837, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00024997890520416535, |
| "loss": 1.8022, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002499624989611527, |
| "loss": 1.9101, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00024994140627492207, |
| "loss": 1.5614, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00024991562793650793, |
| "loss": 1.8988, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002498851649126703, |
| "loss": 1.7089, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00024985001834585763, |
| "loss": 1.7782, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002498101895541645, |
| "loss": 1.5338, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002497656800312821, |
| "loss": 1.6484, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002497164914464419, |
| "loss": 1.733, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00024966262564435343, |
| "loss": 1.8893, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.000249604084645135, |
| "loss": 1.6361, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002495408706442377, |
| "loss": 1.9827, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00024947298601236343, |
| "loss": 1.6801, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002494004332953758, |
| "loss": 1.9678, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00024932321521420456, |
| "loss": 1.9245, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002492413346647437, |
| "loss": 1.5254, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00024915479471774286, |
| "loss": 1.7283, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00024906359861869216, |
| "loss": 1.9968, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002489677497877003, |
| "loss": 1.6559, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002488672518193665, |
| "loss": 1.5518, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002487621084826458, |
| "loss": 1.6201, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002486523237207072, |
| "loss": 1.7592, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00024853790165078654, |
| "loss": 1.5929, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002484188465640313, |
| "loss": 1.4261, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002482951629253403, |
| "loss": 1.8929, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002481668553731959, |
| "loss": 1.5752, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00024803392871949013, |
| "loss": 1.8596, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00024789638794934436, |
| "loss": 1.838, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00024775423822092214, |
| "loss": 1.7938, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002476074848652358, |
| "loss": 1.6448, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002474561333859467, |
| "loss": 1.7674, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00024730018945915864, |
| "loss": 1.6526, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.000247139658933205, |
| "loss": 1.6921, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00024697454782842944, |
| "loss": 1.8208, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002468048623369603, |
| "loss": 1.9528, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00024663060882247796, |
| "loss": 1.8188, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00024645179381997673, |
| "loss": 1.921, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00024626842403551927, |
| "loss": 1.9332, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002460805063459853, |
| "loss": 1.967, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00024588804779881383, |
| "loss": 1.4963, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00024569105561173866, |
| "loss": 1.6944, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00024548953717251783, |
| "loss": 1.9083, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002452835000386563, |
| "loss": 1.7368, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.000245072951937123, |
| "loss": 1.6183, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00024485790076406047, |
| "loss": 1.7917, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00024463835458448925, |
| "loss": 2.1032, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002444143216320052, |
| "loss": 1.8451, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002441858103084705, |
| "loss": 1.7479, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.000243952829183699, |
| "loss": 1.8158, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00024371538699513443, |
| "loss": 1.9275, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024347349264752303, |
| "loss": 1.7759, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024322715521257933, |
| "loss": 2.0328, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024297638392864617, |
| "loss": 1.8839, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024272118820034804, |
| "loss": 1.8443, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024246157759823855, |
| "loss": 1.9779, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00024219756185844132, |
| "loss": 1.6495, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00024192915088228512, |
| "loss": 1.6645, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00024165635473593215, |
| "loss": 1.4214, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00024137918365000095, |
| "loss": 1.6603, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00024109764801918244, |
| "loss": 1.4227, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002408117584018502, |
| "loss": 1.3033, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00024052152551966457, |
| "loss": 2.0312, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00024022696025717023, |
| "loss": 1.8867, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00023992807366138847, |
| "loss": 1.6906, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00023962487694140263, |
| "loss": 1.595, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00023931738146793763, |
| "loss": 1.5583, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00023900559877293383, |
| "loss": 1.9068, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00023868954054911428, |
| "loss": 1.4536, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00023836921864954635, |
| "loss": 1.7105, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002380446450871972, |
| "loss": 1.4355, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00023771583203448322, |
| "loss": 1.7782, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00023738279182281352, |
| "loss": 1.9977, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00023704553694212752, |
| "loss": 1.5658, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00023670408004042653, |
| "loss": 1.5389, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00023635843392329938, |
| "loss": 1.7435, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00023600861155344223, |
| "loss": 1.5706, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00023565462605017228, |
| "loss": 1.8396, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00023529649068893598, |
| "loss": 1.8073, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00023493421890081112, |
| "loss": 1.9954, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00023456782427200295, |
| "loss": 1.9553, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002341973205433348, |
| "loss": 1.2249, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002338227216097328, |
| "loss": 1.8021, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00023344404151970464, |
| "loss": 1.8086, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00023306129447481282, |
| "loss": 1.4283, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00023267449482914203, |
| "loss": 1.8477, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002322836570887608, |
| "loss": 1.3233, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002318887959111776, |
| "loss": 1.7511, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002314899261047909, |
| "loss": 1.9737, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00023108706262833407, |
| "loss": 1.6347, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00023068022059031425, |
| "loss": 1.9142, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00023026941524844572, |
| "loss": 1.7408, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00022985466200907783, |
| "loss": 1.3576, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00022943597642661705, |
| "loss": 1.6058, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00022901337420294378, |
| "loss": 1.6575, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002285868711868233, |
| "loss": 1.3605, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00022815648337331168, |
| "loss": 1.4417, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00022772222690315563, |
| "loss": 1.684, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002272841180621874, |
| "loss": 1.8368, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00022684217328071383, |
| "loss": 1.6624, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00022639640913290027, |
| "loss": 1.8384, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00022594684233614908, |
| "loss": 1.4881, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022549348975047252, |
| "loss": 1.7802, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022503636837786052, |
| "loss": 1.6981, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022457549536164306, |
| "loss": 1.6845, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022411088798584728, |
| "loss": 1.8524, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022364256367454923, |
| "loss": 1.7893, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00022317053999122038, |
| "loss": 1.5371, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00022269483463806917, |
| "loss": 1.7511, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00022221546545537674, |
| "loss": 1.4536, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00022173245042082822, |
| "loss": 1.7809, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002212458076488384, |
| "loss": 1.7447, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00022075555538987224, |
| "loss": 2.1818, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002202617120297607, |
| "loss": 1.8901, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00021976429608901093, |
| "loss": 1.6585, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00021926332622211205, |
| "loss": 1.9877, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002187588212168351, |
| "loss": 1.4926, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00021825079999352893, |
| "loss": 1.7696, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002177392816044102, |
| "loss": 1.9484, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00021722428523284927, |
| "loss": 1.6379, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00021670583019265034, |
| "loss": 1.4783, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002161839359273276, |
| "loss": 1.6047, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00021565862200937565, |
| "loss": 1.6504, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00021512990813953562, |
| "loss": 1.6248, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00021459781414605642, |
| "loss": 2.0182, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.000214062359983951, |
| "loss": 1.7979, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00021352356573424807, |
| "loss": 1.7023, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00021298145160323896, |
| "loss": 1.6911, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00021243603792171976, |
| "loss": 1.8985, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00021188734514422902, |
| "loss": 2.1085, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00021133539384828054, |
| "loss": 1.8299, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00021078020473359172, |
| "loss": 1.8975, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00021022179862130704, |
| "loss": 1.5791, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00020966019645321765, |
| "loss": 1.7475, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002090954192909755, |
| "loss": 1.6978, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00020852748831530382, |
| "loss": 1.7039, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00020795642482520266, |
| "loss": 1.5206, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00020738225023715013, |
| "loss": 1.6684, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00020680498608429914, |
| "loss": 1.7339, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00020622465401566999, |
| "loss": 1.6174, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00020564127579533831, |
| "loss": 1.6451, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00020505487330161915, |
| "loss": 1.6214, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00020446546852624604, |
| "loss": 1.7493, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00020387308357354655, |
| "loss": 1.7067, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002032777406596133, |
| "loss": 1.6918, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00020267946211147058, |
| "loss": 1.9448, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00020207827036623744, |
| "loss": 1.8517, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002014741879702856, |
| "loss": 2.0169, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002008672375783946, |
| "loss": 2.1721, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00020025744195290167, |
| "loss": 1.5175, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019964482396284827, |
| "loss": 1.6105, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001990294065831225, |
| "loss": 1.9778, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019841121289359737, |
| "loss": 1.6379, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0001977902660782652, |
| "loss": 1.7973, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019716658942436834, |
| "loss": 1.6022, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019654020632152563, |
| "loss": 1.4371, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019591114026085537, |
| "loss": 1.8489, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0001952794148340943, |
| "loss": 1.6484, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019464505373271274, |
| "loss": 1.841, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019400808074702624, |
| "loss": 1.4165, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019336851976530338, |
| "loss": 1.8259, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0001927263947728697, |
| "loss": 1.8661, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019208172985120837, |
| "loss": 1.4957, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.000191434549177057, |
| "loss": 1.3717, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00019078487702150102, |
| "loss": 1.5568, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001901327377490633, |
| "loss": 1.512, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00018947815581679052, |
| "loss": 1.7289, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00018882115577333592, |
| "loss": 1.6628, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00018816176225803876, |
| "loss": 1.8897, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001875, |
| "loss": 1.2269, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00018683589381715532, |
| "loss": 1.9278, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00018616946861534396, |
| "loss": 1.5643, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001855007493873749, |
| "loss": 1.8058, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001848297612120895, |
| "loss": 1.6883, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00018415652925342105, |
| "loss": 1.4985, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.000183481078759451, |
| "loss": 1.9467, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00018280343506146197, |
| "loss": 1.6599, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00018212362357298797, |
| "loss": 1.6791, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0001814416697888612, |
| "loss": 2.1714, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00018075759928425582, |
| "loss": 1.6677, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00018007143771372916, |
| "loss": 1.9834, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017938321081025917, |
| "loss": 1.8803, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017869294438427964, |
| "loss": 1.8268, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001780006643227121, |
| "loss": 1.9126, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017730639658799512, |
| "loss": 1.3397, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017661016721711063, |
| "loss": 1.5539, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017591200232060719, |
| "loss": 1.8692, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0001752119280816212, |
| "loss": 1.5348, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017450997075489462, |
| "loss": 1.9957, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017380615666579054, |
| "loss": 1.7454, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017310051220930574, |
| "loss": 1.8623, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017239306384908096, |
| "loss": 1.9291, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00017168383811640842, |
| "loss": 1.6059, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00017097286160923668, |
| "loss": 1.8271, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001702601609911733, |
| "loss": 1.8032, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001695457629904848, |
| "loss": 1.6174, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016882969439909433, |
| "loss": 1.4768, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001681119820715768, |
| "loss": 1.9159, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016739265292415185, |
| "loss": 1.5289, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016667173393367446, |
| "loss": 1.8026, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016594925213662303, |
| "loss": 1.6027, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016522523462808572, |
| "loss": 1.8634, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0001644997085607441, |
| "loss": 1.7965, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016377270114385485, |
| "loss": 1.8084, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001630442396422295, |
| "loss": 1.5158, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00016231435137521182, |
| "loss": 1.8523, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00016158306371565322, |
| "loss": 1.2613, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001608504040888863, |
| "loss": 1.6135, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001601163999716962, |
| "loss": 1.4411, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015938107889129023, |
| "loss": 1.7133, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015864446842426554, |
| "loss": 1.5927, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001579065961955749, |
| "loss": 1.5994, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015716748987749065, |
| "loss": 1.6315, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001564271771885668, |
| "loss": 2.0437, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001556856858925999, |
| "loss": 1.9095, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00015494304379758735, |
| "loss": 1.5502, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00015419927875468485, |
| "loss": 1.4151, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001534544186571617, |
| "loss": 1.6274, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00015270849143935483, |
| "loss": 1.6849, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00015196152507562127, |
| "loss": 1.9244, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001512135475792888, |
| "loss": 2.0862, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00015046458700160553, |
| "loss": 1.7078, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014971467143068791, |
| "loss": 1.4565, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001489638289904673, |
| "loss": 1.8075, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014821208783963522, |
| "loss": 2.2061, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014745947617058735, |
| "loss": 1.6676, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014670602220836633, |
| "loss": 2.0041, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014595175420960293, |
| "loss": 1.5538, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014519670046145685, |
| "loss": 1.5781, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001444408892805554, |
| "loss": 1.703, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001436843490119318, |
| "loss": 1.8105, |
| "step": 284 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 565, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 142, |
| "total_flos": 2.627459909913936e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|