| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0182835197862747, |
| "eval_steps": 500, |
| "global_step": 763, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-08, |
| "loss": 1.1427, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4e-08, |
| "loss": 1.1555, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.000000000000001e-08, |
| "loss": 1.1633, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8e-08, |
| "loss": 1.1405, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1e-07, |
| "loss": 1.1607, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2000000000000002e-07, |
| "loss": 1.1587, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4e-07, |
| "loss": 1.1463, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.6e-07, |
| "loss": 1.1764, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.8e-07, |
| "loss": 1.1509, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2e-07, |
| "loss": 1.1516, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.2e-07, |
| "loss": 1.1589, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.4000000000000003e-07, |
| "loss": 1.1416, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.6e-07, |
| "loss": 1.1474, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.8e-07, |
| "loss": 1.1785, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3e-07, |
| "loss": 1.1259, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3.2e-07, |
| "loss": 1.1578, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 3.4e-07, |
| "loss": 1.1406, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 3.6e-07, |
| "loss": 1.1533, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 3.7999999999999996e-07, |
| "loss": 1.1593, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4e-07, |
| "loss": 1.1401, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.2e-07, |
| "loss": 1.1595, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.4e-07, |
| "loss": 1.1482, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.6e-07, |
| "loss": 1.1682, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 1.1453, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 5e-07, |
| "loss": 1.1329, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 5.2e-07, |
| "loss": 1.1488, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 5.399999999999999e-07, |
| "loss": 1.1528, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 5.6e-07, |
| "loss": 1.1463, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 5.8e-07, |
| "loss": 1.1445, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 6e-07, |
| "loss": 1.1578, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 6.2e-07, |
| "loss": 1.1523, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 6.4e-07, |
| "loss": 1.1547, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 6.6e-07, |
| "loss": 1.1309, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 6.8e-07, |
| "loss": 1.1249, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.1774, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.2e-07, |
| "loss": 1.124, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.4e-07, |
| "loss": 1.1485, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.599999999999999e-07, |
| "loss": 1.1442, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.8e-07, |
| "loss": 1.1106, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 8e-07, |
| "loss": 1.1494, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 8.2e-07, |
| "loss": 1.1084, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 8.4e-07, |
| "loss": 1.1244, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 8.6e-07, |
| "loss": 1.1482, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 8.8e-07, |
| "loss": 1.1164, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 8.999999999999999e-07, |
| "loss": 1.1382, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.2e-07, |
| "loss": 1.131, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.4e-07, |
| "loss": 1.1241, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 1.1159, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.800000000000001e-07, |
| "loss": 1.1291, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1e-06, |
| "loss": 1.1361, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.02e-06, |
| "loss": 1.126, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.04e-06, |
| "loss": 1.1184, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.06e-06, |
| "loss": 1.1193, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.0799999999999998e-06, |
| "loss": 1.1187, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1e-06, |
| "loss": 1.1145, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.12e-06, |
| "loss": 1.1019, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.14e-06, |
| "loss": 1.1157, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.16e-06, |
| "loss": 1.1024, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.18e-06, |
| "loss": 1.098, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.2e-06, |
| "loss": 1.0946, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.22e-06, |
| "loss": 1.0818, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.24e-06, |
| "loss": 1.121, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.26e-06, |
| "loss": 1.09, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.28e-06, |
| "loss": 1.0958, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.3e-06, |
| "loss": 1.0888, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.32e-06, |
| "loss": 1.0793, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.3399999999999999e-06, |
| "loss": 1.0929, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.36e-06, |
| "loss": 1.0685, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.38e-06, |
| "loss": 1.0888, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.0688, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.42e-06, |
| "loss": 1.0627, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.44e-06, |
| "loss": 1.0627, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.46e-06, |
| "loss": 1.0858, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.48e-06, |
| "loss": 1.0517, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.4999999999999998e-06, |
| "loss": 1.0626, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.5199999999999998e-06, |
| "loss": 1.0708, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.54e-06, |
| "loss": 1.0391, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.56e-06, |
| "loss": 1.0436, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.58e-06, |
| "loss": 1.0396, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.6e-06, |
| "loss": 1.0334, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.62e-06, |
| "loss": 1.0291, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.64e-06, |
| "loss": 1.0421, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.66e-06, |
| "loss": 1.0493, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.68e-06, |
| "loss": 1.0293, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7e-06, |
| "loss": 1.0766, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.72e-06, |
| "loss": 1.058, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.74e-06, |
| "loss": 1.0244, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.76e-06, |
| "loss": 1.0399, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.78e-06, |
| "loss": 1.0272, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7999999999999997e-06, |
| "loss": 1.029, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.82e-06, |
| "loss": 1.0477, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.84e-06, |
| "loss": 1.0166, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.8600000000000002e-06, |
| "loss": 1.0309, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.88e-06, |
| "loss": 1.0135, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.8999999999999998e-06, |
| "loss": 1.0226, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9200000000000003e-06, |
| "loss": 1.0402, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.94e-06, |
| "loss": 1.0252, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 1.0064, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.98e-06, |
| "loss": 0.9967, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2e-06, |
| "loss": 1.0094, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.02e-06, |
| "loss": 0.9938, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.04e-06, |
| "loss": 1.0199, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.0599999999999998e-06, |
| "loss": 1.005, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.08e-06, |
| "loss": 1.0016, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.1e-06, |
| "loss": 1.0169, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.12e-06, |
| "loss": 0.978, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.14e-06, |
| "loss": 0.9787, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.1599999999999996e-06, |
| "loss": 0.9992, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.18e-06, |
| "loss": 0.985, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9832, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.22e-06, |
| "loss": 0.9922, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.24e-06, |
| "loss": 0.9882, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.26e-06, |
| "loss": 0.979, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 2.28e-06, |
| "loss": 0.9866, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.3e-06, |
| "loss": 0.97, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.32e-06, |
| "loss": 1.0042, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.34e-06, |
| "loss": 0.9778, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.36e-06, |
| "loss": 0.9906, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.38e-06, |
| "loss": 0.9547, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.4e-06, |
| "loss": 0.9792, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.4199999999999997e-06, |
| "loss": 0.964, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.44e-06, |
| "loss": 0.9797, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.4599999999999997e-06, |
| "loss": 0.9576, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.48e-06, |
| "loss": 0.9795, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.5e-06, |
| "loss": 0.9619, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.52e-06, |
| "loss": 0.9756, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.5400000000000002e-06, |
| "loss": 0.9678, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.56e-06, |
| "loss": 0.973, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.58e-06, |
| "loss": 0.9581, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2.6e-06, |
| "loss": 0.9559, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2.62e-06, |
| "loss": 0.9623, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 2.64e-06, |
| "loss": 0.9608, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.66e-06, |
| "loss": 0.9599, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.6799999999999998e-06, |
| "loss": 0.9481, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.7e-06, |
| "loss": 0.951, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.72e-06, |
| "loss": 0.9553, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.7399999999999996e-06, |
| "loss": 0.9471, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.76e-06, |
| "loss": 0.9228, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.78e-06, |
| "loss": 0.9411, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.9541, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.82e-06, |
| "loss": 0.9318, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.84e-06, |
| "loss": 0.9113, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.86e-06, |
| "loss": 0.9316, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.88e-06, |
| "loss": 0.9326, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.9e-06, |
| "loss": 0.9471, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.92e-06, |
| "loss": 0.9234, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.94e-06, |
| "loss": 0.9384, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.96e-06, |
| "loss": 0.9317, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.98e-06, |
| "loss": 0.9157, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.9999999999999997e-06, |
| "loss": 0.9411, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 3.02e-06, |
| "loss": 0.9566, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.0399999999999997e-06, |
| "loss": 0.9443, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.06e-06, |
| "loss": 0.9215, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.08e-06, |
| "loss": 0.9129, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.1e-06, |
| "loss": 0.9175, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.12e-06, |
| "loss": 0.9309, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.14e-06, |
| "loss": 0.9185, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.16e-06, |
| "loss": 0.9196, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.18e-06, |
| "loss": 0.9038, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 3.2e-06, |
| "loss": 0.9205, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 3.22e-06, |
| "loss": 0.9308, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 3.24e-06, |
| "loss": 0.939, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.2599999999999997e-06, |
| "loss": 0.9211, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.28e-06, |
| "loss": 0.9214, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.2999999999999997e-06, |
| "loss": 0.9243, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.32e-06, |
| "loss": 0.9098, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.3399999999999998e-06, |
| "loss": 0.9223, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.36e-06, |
| "loss": 0.9213, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.3800000000000002e-06, |
| "loss": 0.9059, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.4e-06, |
| "loss": 0.8938, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.42e-06, |
| "loss": 0.9139, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.44e-06, |
| "loss": 0.898, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.46e-06, |
| "loss": 0.9103, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.48e-06, |
| "loss": 0.9042, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3.5e-06, |
| "loss": 0.8833, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3.52e-06, |
| "loss": 0.8933, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3.5399999999999996e-06, |
| "loss": 0.9022, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.56e-06, |
| "loss": 0.9141, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.58e-06, |
| "loss": 0.9089, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.5999999999999994e-06, |
| "loss": 0.9007, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.6199999999999996e-06, |
| "loss": 0.8985, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 3.64e-06, |
| "loss": 0.8977, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 3.66e-06, |
| "loss": 0.8996, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 3.68e-06, |
| "loss": 0.8999, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 3.7e-06, |
| "loss": 0.9141, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.7200000000000004e-06, |
| "loss": 0.8918, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.7399999999999998e-06, |
| "loss": 0.9008, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.76e-06, |
| "loss": 0.9021, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.8951, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.7999999999999996e-06, |
| "loss": 0.9089, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.82e-06, |
| "loss": 0.8966, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.8400000000000005e-06, |
| "loss": 0.8937, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.8599999999999995e-06, |
| "loss": 0.8969, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.88e-06, |
| "loss": 0.8719, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.9e-06, |
| "loss": 0.884, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.920000000000001e-06, |
| "loss": 0.8809, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.9399999999999995e-06, |
| "loss": 0.8711, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.96e-06, |
| "loss": 0.877, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.98e-06, |
| "loss": 0.8892, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4e-06, |
| "loss": 0.8781, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.02e-06, |
| "loss": 0.8697, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.04e-06, |
| "loss": 0.8745, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.059999999999999e-06, |
| "loss": 0.8738, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.08e-06, |
| "loss": 0.8804, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.1000000000000006e-06, |
| "loss": 0.8843, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.1199999999999995e-06, |
| "loss": 0.8736, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.14e-06, |
| "loss": 0.8689, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.16e-06, |
| "loss": 0.8614, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.180000000000001e-06, |
| "loss": 0.866, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.2e-06, |
| "loss": 0.8598, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.22e-06, |
| "loss": 0.878, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.24e-06, |
| "loss": 0.8624, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.26e-06, |
| "loss": 0.8788, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.28e-06, |
| "loss": 0.8713, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.3e-06, |
| "loss": 0.8682, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.319999999999999e-06, |
| "loss": 0.8827, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.34e-06, |
| "loss": 0.8754, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.36e-06, |
| "loss": 0.8867, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.3799999999999996e-06, |
| "loss": 0.8707, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.4e-06, |
| "loss": 0.8655, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.42e-06, |
| "loss": 0.8503, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.44e-06, |
| "loss": 0.8674, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.46e-06, |
| "loss": 0.8507, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.48e-06, |
| "loss": 0.8668, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.5e-06, |
| "loss": 0.8548, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.52e-06, |
| "loss": 0.8756, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.54e-06, |
| "loss": 0.8424, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.56e-06, |
| "loss": 0.8501, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.579999999999999e-06, |
| "loss": 0.8633, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.6e-06, |
| "loss": 0.859, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.62e-06, |
| "loss": 0.8497, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.64e-06, |
| "loss": 0.8491, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.6599999999999994e-06, |
| "loss": 0.8596, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.68e-06, |
| "loss": 0.8636, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.7e-06, |
| "loss": 0.8677, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.72e-06, |
| "loss": 0.8605, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.74e-06, |
| "loss": 0.8493, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.76e-06, |
| "loss": 0.8367, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.78e-06, |
| "loss": 0.8443, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.8e-06, |
| "loss": 0.8464, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.8200000000000004e-06, |
| "loss": 0.8588, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.839999999999999e-06, |
| "loss": 0.8502, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.86e-06, |
| "loss": 0.849, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.88e-06, |
| "loss": 0.8576, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.9e-06, |
| "loss": 0.8418, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.9199999999999995e-06, |
| "loss": 0.8717, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.94e-06, |
| "loss": 0.8529, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.96e-06, |
| "loss": 0.8328, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.98e-06, |
| "loss": 0.85, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5e-06, |
| "loss": 0.8376, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.02e-06, |
| "loss": 0.8431, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.04e-06, |
| "loss": 0.8446, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.06e-06, |
| "loss": 0.8424, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.0800000000000005e-06, |
| "loss": 0.8377, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.0999999999999995e-06, |
| "loss": 0.8453, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.12e-06, |
| "loss": 0.8591, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 5.14e-06, |
| "loss": 0.8536, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 5.16e-06, |
| "loss": 0.8399, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 5.1799999999999995e-06, |
| "loss": 0.8259, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 5.2e-06, |
| "loss": 0.8399, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 5.219999999999999e-06, |
| "loss": 0.8443, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 5.24e-06, |
| "loss": 0.8331, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 5.2600000000000005e-06, |
| "loss": 0.8268, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.28e-06, |
| "loss": 0.8338, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.3e-06, |
| "loss": 0.8287, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.32e-06, |
| "loss": 0.8273, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.3400000000000005e-06, |
| "loss": 0.8471, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.3599999999999995e-06, |
| "loss": 0.8364, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.38e-06, |
| "loss": 0.8167, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.4e-06, |
| "loss": 0.8433, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.42e-06, |
| "loss": 0.8163, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.44e-06, |
| "loss": 0.8186, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.46e-06, |
| "loss": 0.8491, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.479999999999999e-06, |
| "loss": 0.8222, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.5e-06, |
| "loss": 0.8362, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 5.52e-06, |
| "loss": 0.8375, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 5.54e-06, |
| "loss": 0.7934, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 5.56e-06, |
| "loss": 0.8096, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 5.58e-06, |
| "loss": 0.8185, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.8126, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 5.6199999999999996e-06, |
| "loss": 0.8297, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 5.64e-06, |
| "loss": 0.8196, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 5.66e-06, |
| "loss": 0.8251, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 5.68e-06, |
| "loss": 0.8291, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 5.7e-06, |
| "loss": 0.8067, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 5.72e-06, |
| "loss": 0.8171, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 5.739999999999999e-06, |
| "loss": 0.8091, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 5.76e-06, |
| "loss": 0.8149, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 5.78e-06, |
| "loss": 0.8393, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 5.8e-06, |
| "loss": 0.8157, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5.819999999999999e-06, |
| "loss": 0.8043, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5.84e-06, |
| "loss": 0.8166, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5.860000000000001e-06, |
| "loss": 0.8022, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.88e-06, |
| "loss": 0.7949, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.9e-06, |
| "loss": 0.8102, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.92e-06, |
| "loss": 0.81, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.94e-06, |
| "loss": 0.8104, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 5.96e-06, |
| "loss": 0.8089, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 5.98e-06, |
| "loss": 0.824, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 5.999999999999999e-06, |
| "loss": 0.8145, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 6.02e-06, |
| "loss": 0.8085, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 6.04e-06, |
| "loss": 0.8183, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 6.0600000000000004e-06, |
| "loss": 0.8114, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 6.079999999999999e-06, |
| "loss": 0.8112, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 6.1e-06, |
| "loss": 0.8308, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 6.12e-06, |
| "loss": 0.8303, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 6.14e-06, |
| "loss": 0.8287, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 6.16e-06, |
| "loss": 0.7957, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 6.18e-06, |
| "loss": 0.817, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 6.2e-06, |
| "loss": 0.8209, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 6.22e-06, |
| "loss": 0.8019, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 6.24e-06, |
| "loss": 0.8099, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.259999999999999e-06, |
| "loss": 0.8275, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.28e-06, |
| "loss": 0.814, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.3e-06, |
| "loss": 0.7992, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.32e-06, |
| "loss": 0.7964, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.3399999999999994e-06, |
| "loss": 0.8013, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.36e-06, |
| "loss": 0.8023, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.38e-06, |
| "loss": 0.8074, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.4e-06, |
| "loss": 0.8154, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 6.42e-06, |
| "loss": 0.7984, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 6.44e-06, |
| "loss": 0.7987, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 6.46e-06, |
| "loss": 0.8007, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 6.48e-06, |
| "loss": 0.7999, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.8107, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 6.519999999999999e-06, |
| "loss": 0.7928, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 6.54e-06, |
| "loss": 0.8033, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.56e-06, |
| "loss": 0.802, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.58e-06, |
| "loss": 0.8056, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.5999999999999995e-06, |
| "loss": 0.7857, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.62e-06, |
| "loss": 0.8046, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.64e-06, |
| "loss": 0.7865, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.66e-06, |
| "loss": 0.814, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.6799999999999996e-06, |
| "loss": 0.7934, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.7e-06, |
| "loss": 0.7893, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.72e-06, |
| "loss": 0.7984, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.74e-06, |
| "loss": 0.7932, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.7600000000000005e-06, |
| "loss": 0.7886, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.7799999999999995e-06, |
| "loss": 0.7925, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.8e-06, |
| "loss": 0.7872, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.82e-06, |
| "loss": 0.8191, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.84e-06, |
| "loss": 0.7994, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.8599999999999995e-06, |
| "loss": 0.8014, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.88e-06, |
| "loss": 0.7998, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.9e-06, |
| "loss": 0.7809, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.92e-06, |
| "loss": 0.7845, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6.94e-06, |
| "loss": 0.7795, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6.96e-06, |
| "loss": 0.7959, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6.979999999999999e-06, |
| "loss": 0.7848, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 7e-06, |
| "loss": 0.7915, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 6.999986848712049e-06, |
| "loss": 0.7891, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 6.999947394947031e-06, |
| "loss": 0.7838, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 6.999881639001441e-06, |
| "loss": 0.8101, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.999789581369437e-06, |
| "loss": 0.8062, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.999671222742834e-06, |
| "loss": 0.7923, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.999526564011098e-06, |
| "loss": 0.8033, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.999355606261343e-06, |
| "loss": 0.7905, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.999158350778321e-06, |
| "loss": 0.7833, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.998934799044413e-06, |
| "loss": 0.7916, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.9986849527396114e-06, |
| "loss": 0.7848, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.998408813741518e-06, |
| "loss": 0.7888, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.9981063841253256e-06, |
| "loss": 0.7971, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.997777666163796e-06, |
| "loss": 0.7947, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.997422662327253e-06, |
| "loss": 0.7943, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 6.9970413752835595e-06, |
| "loss": 0.7864, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 6.996633807898094e-06, |
| "loss": 0.8132, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 6.996199963233736e-06, |
| "loss": 0.7814, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 6.995739844550836e-06, |
| "loss": 0.8083, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.995253455307197e-06, |
| "loss": 0.7831, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.994740799158044e-06, |
| "loss": 0.7775, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.994201879955999e-06, |
| "loss": 0.7969, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.993636701751052e-06, |
| "loss": 0.7726, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.993045268790529e-06, |
| "loss": 0.783, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.9924275855190615e-06, |
| "loss": 0.7802, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.991783656578554e-06, |
| "loss": 0.7885, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.991113486808145e-06, |
| "loss": 0.7916, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.990417081244175e-06, |
| "loss": 0.7961, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.989694445120147e-06, |
| "loss": 0.7912, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.9889455838666875e-06, |
| "loss": 0.7711, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.988170503111504e-06, |
| "loss": 0.7695, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.987369208679347e-06, |
| "loss": 0.7693, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.986541706591961e-06, |
| "loss": 0.7944, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.985688003068041e-06, |
| "loss": 0.7792, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.984808104523189e-06, |
| "loss": 0.7852, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.983902017569859e-06, |
| "loss": 0.7866, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.982969749017318e-06, |
| "loss": 0.7708, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 6.982011305871581e-06, |
| "loss": 0.7979, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.981026695335371e-06, |
| "loss": 0.7729, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.980015924808057e-06, |
| "loss": 0.7846, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.978979001885602e-06, |
| "loss": 0.8077, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.9779159343605024e-06, |
| "loss": 0.7805, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 6.9768267302217355e-06, |
| "loss": 0.7984, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 6.975711397654693e-06, |
| "loss": 0.775, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 6.974569945041124e-06, |
| "loss": 0.7771, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 6.97340238095907e-06, |
| "loss": 0.7963, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 6.972208714182799e-06, |
| "loss": 0.7831, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 6.970988953682744e-06, |
| "loss": 0.7892, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 6.96974310862543e-06, |
| "loss": 0.7852, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 6.9684711883734115e-06, |
| "loss": 0.7965, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 6.9671732024851965e-06, |
| "loss": 0.766, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 6.965849160715176e-06, |
| "loss": 0.7918, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 6.964499073013553e-06, |
| "loss": 0.767, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 6.963122949526267e-06, |
| "loss": 0.7807, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 6.961720800594914e-06, |
| "loss": 0.7976, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 6.960292636756675e-06, |
| "loss": 0.7732, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 6.9588384687442315e-06, |
| "loss": 0.7791, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 6.957358307485689e-06, |
| "loss": 0.7776, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 6.9558521641044894e-06, |
| "loss": 0.7885, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 6.954320049919333e-06, |
| "loss": 0.7852, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 6.952761976444094e-06, |
| "loss": 0.7919, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 6.9511779553877245e-06, |
| "loss": 0.7863, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 6.949567998654181e-06, |
| "loss": 0.7738, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 6.947932118342319e-06, |
| "loss": 0.7629, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 6.946270326745818e-06, |
| "loss": 0.7802, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 6.944582636353076e-06, |
| "loss": 0.7803, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 6.942869059847123e-06, |
| "loss": 0.8036, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 6.941129610105525e-06, |
| "loss": 0.772, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 6.939364300200283e-06, |
| "loss": 0.7958, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 6.93757314339774e-06, |
| "loss": 0.7837, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 6.935756153158478e-06, |
| "loss": 0.7632, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 6.933913343137219e-06, |
| "loss": 0.7921, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 6.9320447271827186e-06, |
| "loss": 0.793, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 6.930150319337667e-06, |
| "loss": 0.789, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 6.92823013383858e-06, |
| "loss": 0.7785, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 6.9262841851156935e-06, |
| "loss": 0.7719, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 6.924312487792855e-06, |
| "loss": 0.791, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 6.922315056687412e-06, |
| "loss": 0.795, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 6.920291906810102e-06, |
| "loss": 0.7837, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 6.918243053364941e-06, |
| "loss": 0.8042, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 6.916168511749106e-06, |
| "loss": 0.78, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 6.914068297552825e-06, |
| "loss": 0.7781, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 6.9119424265592495e-06, |
| "loss": 0.7679, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 6.909790914744349e-06, |
| "loss": 0.761, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 6.907613778276779e-06, |
| "loss": 0.7773, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 6.90541103351777e-06, |
| "loss": 0.7817, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 6.903182697020994e-06, |
| "loss": 0.7634, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 6.90092878553245e-06, |
| "loss": 0.7739, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 6.898649315990332e-06, |
| "loss": 0.7679, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 6.896344305524902e-06, |
| "loss": 0.7755, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 6.894013771458362e-06, |
| "loss": 0.7964, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 6.891657731304729e-06, |
| "loss": 0.7756, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 6.889276202769695e-06, |
| "loss": 0.7938, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 6.886869203750498e-06, |
| "loss": 0.7752, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 6.884436752335787e-06, |
| "loss": 0.7724, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 6.881978866805488e-06, |
| "loss": 0.7867, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 6.879495565630666e-06, |
| "loss": 0.7843, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 6.87698686747338e-06, |
| "loss": 0.7912, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 6.8744527911865535e-06, |
| "loss": 0.7851, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 6.871893355813823e-06, |
| "loss": 0.7682, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 6.869308580589402e-06, |
| "loss": 0.7538, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 6.866698484937932e-06, |
| "loss": 0.7906, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 6.864063088474338e-06, |
| "loss": 0.7771, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 6.861402411003682e-06, |
| "loss": 0.7749, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 6.858716472521012e-06, |
| "loss": 0.77, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 6.856005293211217e-06, |
| "loss": 0.7723, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 6.853268893448865e-06, |
| "loss": 0.7714, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 6.850507293798059e-06, |
| "loss": 0.7736, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 6.847720515012284e-06, |
| "loss": 0.7748, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 6.8449085780342395e-06, |
| "loss": 0.7886, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 6.842071503995695e-06, |
| "loss": 0.7771, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 6.839209314217322e-06, |
| "loss": 0.8013, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 6.83632203020854e-06, |
| "loss": 0.7785, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 6.8334096736673505e-06, |
| "loss": 0.778, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 6.830472266480176e-06, |
| "loss": 0.7811, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 6.827509830721694e-06, |
| "loss": 0.7668, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 6.824522388654676e-06, |
| "loss": 0.7761, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 6.821509962729811e-06, |
| "loss": 0.7784, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 6.818472575585549e-06, |
| "loss": 0.7844, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 6.8154102500479155e-06, |
| "loss": 0.7892, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 6.812323009130357e-06, |
| "loss": 0.7758, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 6.809210876033554e-06, |
| "loss": 0.7931, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 6.806073874145257e-06, |
| "loss": 0.771, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 6.802912027040099e-06, |
| "loss": 0.7969, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 6.799725358479433e-06, |
| "loss": 0.7696, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 6.79651389241114e-06, |
| "loss": 0.7863, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 6.793277652969458e-06, |
| "loss": 0.778, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 6.790016664474797e-06, |
| "loss": 0.768, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 6.786730951433557e-06, |
| "loss": 0.7675, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 6.78342053853794e-06, |
| "loss": 0.7711, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 6.780085450665775e-06, |
| "loss": 0.7677, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 6.776725712880315e-06, |
| "loss": 0.7544, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 6.773341350430065e-06, |
| "loss": 0.7687, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 6.769932388748583e-06, |
| "loss": 0.7768, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 6.766498853454287e-06, |
| "loss": 0.7783, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 6.763040770350272e-06, |
| "loss": 0.7832, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 6.759558165424105e-06, |
| "loss": 0.763, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 6.75605106484764e-06, |
| "loss": 0.7656, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 6.752519494976812e-06, |
| "loss": 0.7685, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 6.748963482351447e-06, |
| "loss": 0.7724, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 6.745383053695056e-06, |
| "loss": 0.7693, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 6.741778235914637e-06, |
| "loss": 0.7815, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 6.738149056100475e-06, |
| "loss": 0.7702, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 6.734495541525934e-06, |
| "loss": 0.7744, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 6.730817719647257e-06, |
| "loss": 0.766, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 6.727115618103354e-06, |
| "loss": 0.7582, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 6.7233892647156e-06, |
| "loss": 0.7821, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 6.719638687487618e-06, |
| "loss": 0.7831, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 6.71586391460508e-06, |
| "loss": 0.7501, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 6.712064974435485e-06, |
| "loss": 0.783, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 6.708241895527952e-06, |
| "loss": 0.7857, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 6.704394706613e-06, |
| "loss": 0.7757, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 6.700523436602338e-06, |
| "loss": 0.7651, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 6.696628114588645e-06, |
| "loss": 0.7759, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 6.69270876984535e-06, |
| "loss": 0.7801, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 6.688765431826413e-06, |
| "loss": 0.7662, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 6.6847981301661046e-06, |
| "loss": 0.7566, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 6.680806894678784e-06, |
| "loss": 0.7733, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 6.676791755358671e-06, |
| "loss": 0.754, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 6.6727527423796255e-06, |
| "loss": 0.7871, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 6.668689886094918e-06, |
| "loss": 0.7614, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 6.664603217037001e-06, |
| "loss": 0.7691, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 6.66049276591728e-06, |
| "loss": 0.7669, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 6.656358563625887e-06, |
| "loss": 0.7487, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 6.6522006412314404e-06, |
| "loss": 0.7689, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 6.648019029980816e-06, |
| "loss": 0.7628, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 6.643813761298915e-06, |
| "loss": 0.7426, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 6.6395848667884215e-06, |
| "loss": 0.7693, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 6.635332378229571e-06, |
| "loss": 0.7734, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 6.631056327579905e-06, |
| "loss": 0.7633, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 6.626756746974038e-06, |
| "loss": 0.7947, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 6.622433668723412e-06, |
| "loss": 0.7693, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 6.6180871253160525e-06, |
| "loss": 0.7843, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 6.613717149416329e-06, |
| "loss": 0.7569, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 6.609323773864704e-06, |
| "loss": 0.7652, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 6.60490703167749e-06, |
| "loss": 0.7715, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 6.6004669560466e-06, |
| "loss": 0.778, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 6.596003580339301e-06, |
| "loss": 0.7888, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 6.5915169380979565e-06, |
| "loss": 0.7567, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 6.58700706303978e-06, |
| "loss": 0.768, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 6.58247398905658e-06, |
| "loss": 0.7729, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 6.577917750214508e-06, |
| "loss": 0.767, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 6.573338380753795e-06, |
| "loss": 0.7601, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 6.568735915088504e-06, |
| "loss": 0.7714, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 6.564110387806263e-06, |
| "loss": 0.7579, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 6.5594618336680095e-06, |
| "loss": 0.7693, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 6.55479028760773e-06, |
| "loss": 0.7818, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 6.550095784732193e-06, |
| "loss": 0.7641, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 6.54537836032069e-06, |
| "loss": 0.7658, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 6.5406380498247695e-06, |
| "loss": 0.7655, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 6.535874888867964e-06, |
| "loss": 0.7664, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 6.531088913245536e-06, |
| "loss": 0.7706, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 6.526280158924192e-06, |
| "loss": 0.771, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 6.521448662041825e-06, |
| "loss": 0.7582, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 6.516594458907243e-06, |
| "loss": 0.7743, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 6.511717585999884e-06, |
| "loss": 0.7657, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 6.5068180799695556e-06, |
| "loss": 0.7699, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 6.501895977636151e-06, |
| "loss": 0.7675, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 6.4969513159893755e-06, |
| "loss": 0.7887, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 6.49198413218847e-06, |
| "loss": 0.7647, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 6.4869944635619266e-06, |
| "loss": 0.7672, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 6.4819823476072154e-06, |
| "loss": 0.7506, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 6.476947821990495e-06, |
| "loss": 0.7772, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 6.471890924546335e-06, |
| "loss": 0.7484, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 6.466811693277429e-06, |
| "loss": 0.7668, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 6.4617101663543115e-06, |
| "loss": 0.7762, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 6.456586382115066e-06, |
| "loss": 0.7797, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 6.4514403790650435e-06, |
| "loss": 0.7401, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 6.4462721958765695e-06, |
| "loss": 0.7589, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 6.441081871388649e-06, |
| "loss": 0.7648, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 6.435869444606687e-06, |
| "loss": 0.7714, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 6.430634954702182e-06, |
| "loss": 0.7544, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 6.42537844101244e-06, |
| "loss": 0.7706, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 6.420099943040274e-06, |
| "loss": 0.7833, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 6.414799500453712e-06, |
| "loss": 0.7769, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 6.409477153085693e-06, |
| "loss": 0.7537, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 6.404132940933776e-06, |
| "loss": 0.773, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 6.398766904159829e-06, |
| "loss": 0.7794, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 6.393379083089736e-06, |
| "loss": 0.7687, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 6.387969518213087e-06, |
| "loss": 0.7838, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 6.382538250182883e-06, |
| "loss": 0.7789, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 6.377085319815218e-06, |
| "loss": 0.7616, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 6.371610768088981e-06, |
| "loss": 0.772, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 6.366114636145552e-06, |
| "loss": 0.7507, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 6.360596965288476e-06, |
| "loss": 0.7573, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 6.3550577969831725e-06, |
| "loss": 0.7706, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 6.3494971728566115e-06, |
| "loss": 0.7783, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 6.343915134697002e-06, |
| "loss": 0.7667, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 6.338311724453484e-06, |
| "loss": 0.7681, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 6.332686984235804e-06, |
| "loss": 0.7756, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 6.327040956314011e-06, |
| "loss": 0.7616, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 6.321373683118123e-06, |
| "loss": 0.7762, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 6.315685207237823e-06, |
| "loss": 0.773, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.309975571422131e-06, |
| "loss": 0.7471, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.304244818579083e-06, |
| "loss": 0.7537, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.298492991775411e-06, |
| "loss": 0.7712, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.292720134236218e-06, |
| "loss": 0.7735, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.286926289344656e-06, |
| "loss": 0.7456, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.2811115006415916e-06, |
| "loss": 0.7729, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.275275811825288e-06, |
| "loss": 0.7844, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.269419266751077e-06, |
| "loss": 0.7748, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.263541909431018e-06, |
| "loss": 0.7601, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.257643784033581e-06, |
| "loss": 0.7783, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.251724934883305e-06, |
| "loss": 0.7934, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.245785406460471e-06, |
| "loss": 0.7638, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.239825243400762e-06, |
| "loss": 0.7636, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.233844490494934e-06, |
| "loss": 0.764, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.227843192688475e-06, |
| "loss": 0.7677, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 6.221821395081267e-06, |
| "loss": 0.7657, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 6.21577914292725e-06, |
| "loss": 0.7745, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 6.209716481634081e-06, |
| "loss": 0.7658, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 6.20363345676279e-06, |
| "loss": 0.7726, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 6.1975301140274415e-06, |
| "loss": 0.7627, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 6.1914064992947875e-06, |
| "loss": 0.7601, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 6.1852626585839265e-06, |
| "loss": 0.758, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 6.179098638065954e-06, |
| "loss": 0.7636, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 6.172914484063619e-06, |
| "loss": 0.7678, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 6.166710243050972e-06, |
| "loss": 0.7489, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 6.160485961653018e-06, |
| "loss": 0.7629, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 6.154241686645368e-06, |
| "loss": 0.7684, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 6.147977464953884e-06, |
| "loss": 0.7567, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 6.141693343654327e-06, |
| "loss": 0.7699, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 6.135389369972006e-06, |
| "loss": 0.7728, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 6.129065591281419e-06, |
| "loss": 0.7455, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 6.1227220551059e-06, |
| "loss": 0.7448, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 6.116358809117262e-06, |
| "loss": 0.7585, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 6.109975901135435e-06, |
| "loss": 0.7339, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 6.1035733791281125e-06, |
| "loss": 0.7715, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 6.097151291210385e-06, |
| "loss": 0.7648, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 6.090709685644382e-06, |
| "loss": 0.7566, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 6.084248610838911e-06, |
| "loss": 0.753, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 6.077768115349088e-06, |
| "loss": 0.7709, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 6.071268247875976e-06, |
| "loss": 0.756, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 6.064749057266223e-06, |
| "loss": 0.7812, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 6.058210592511687e-06, |
| "loss": 0.7728, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 6.05165290274907e-06, |
| "loss": 0.7775, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 6.045076037259554e-06, |
| "loss": 0.7655, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 6.038480045468426e-06, |
| "loss": 0.7668, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 6.03186497694471e-06, |
| "loss": 0.7593, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 6.025230881400785e-06, |
| "loss": 0.7666, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 6.0185778086920245e-06, |
| "loss": 0.7841, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 6.011905808816416e-06, |
| "loss": 0.7709, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 6.0052149319141795e-06, |
| "loss": 0.7763, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 5.998505228267403e-06, |
| "loss": 0.7759, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 5.991776748299656e-06, |
| "loss": 0.7607, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 5.985029542575608e-06, |
| "loss": 0.7639, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 5.978263661800657e-06, |
| "loss": 0.7745, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 5.971479156820547e-06, |
| "loss": 0.7635, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 5.964676078620975e-06, |
| "loss": 0.7504, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 5.957854478327225e-06, |
| "loss": 0.7548, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 5.95101440720377e-06, |
| "loss": 0.7677, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 5.944155916653893e-06, |
| "loss": 0.7543, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 5.9372790582192985e-06, |
| "loss": 0.7651, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 5.930383883579727e-06, |
| "loss": 0.7578, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 5.923470444552567e-06, |
| "loss": 0.7588, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 5.91653879309246e-06, |
| "loss": 0.755, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 5.909588981290919e-06, |
| "loss": 0.7546, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 5.90262106137593e-06, |
| "loss": 0.752, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 5.895635085711562e-06, |
| "loss": 0.7806, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 5.8886311067975745e-06, |
| "loss": 0.7765, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 5.8816091772690205e-06, |
| "loss": 0.7637, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 5.87456934989585e-06, |
| "loss": 0.7792, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 5.867511677582522e-06, |
| "loss": 0.7492, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 5.860436213367597e-06, |
| "loss": 0.7509, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 5.85334301042334e-06, |
| "loss": 0.7514, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 5.846232122055325e-06, |
| "loss": 0.7695, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 5.839103601702033e-06, |
| "loss": 0.7693, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 5.83195750293445e-06, |
| "loss": 0.7466, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 5.824793879455662e-06, |
| "loss": 0.7652, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 5.817612785100454e-06, |
| "loss": 0.7558, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 5.8104142738349076e-06, |
| "loss": 0.7638, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 5.803198399755989e-06, |
| "loss": 0.7717, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 5.795965217091152e-06, |
| "loss": 0.7551, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 5.788714780197918e-06, |
| "loss": 0.7692, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 5.7814471435634775e-06, |
| "loss": 0.7454, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 5.774162361804279e-06, |
| "loss": 0.7591, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 5.7668604896656145e-06, |
| "loss": 0.7511, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 5.7595415820212105e-06, |
| "loss": 0.757, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 5.752205693872819e-06, |
| "loss": 0.7593, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 5.7448528803497975e-06, |
| "loss": 0.7667, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 5.737483196708698e-06, |
| "loss": 0.766, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 5.730096698332854e-06, |
| "loss": 0.7675, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 5.722693440731961e-06, |
| "loss": 0.7549, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 5.715273479541661e-06, |
| "loss": 0.7377, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 5.707836870523121e-06, |
| "loss": 0.748, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 5.700383669562621e-06, |
| "loss": 0.7765, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 5.692913932671127e-06, |
| "loss": 0.7583, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 5.685427715983874e-06, |
| "loss": 0.7747, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 5.677925075759944e-06, |
| "loss": 0.7733, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 5.670406068381839e-06, |
| "loss": 0.7632, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 5.662870750355062e-06, |
| "loss": 0.7824, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 5.655319178307693e-06, |
| "loss": 0.7623, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 5.647751408989961e-06, |
| "loss": 0.7601, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 5.640167499273813e-06, |
| "loss": 0.7556, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 5.632567506152498e-06, |
| "loss": 0.7711, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 5.624951486740128e-06, |
| "loss": 0.7567, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 5.617319498271254e-06, |
| "loss": 0.7585, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 5.609671598100434e-06, |
| "loss": 0.7467, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 5.6020078437018046e-06, |
| "loss": 0.7504, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 5.594328292668647e-06, |
| "loss": 0.7736, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 5.586633002712952e-06, |
| "loss": 0.7626, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 5.578922031664992e-06, |
| "loss": 0.7627, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 5.571195437472881e-06, |
| "loss": 0.7522, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 5.563453278202141e-06, |
| "loss": 0.7572, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 5.555695612035269e-06, |
| "loss": 0.7501, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 5.547922497271293e-06, |
| "loss": 0.7641, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 5.54013399232534e-06, |
| "loss": 0.769, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 5.532330155728192e-06, |
| "loss": 0.7741, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 5.524511046125852e-06, |
| "loss": 0.7799, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 5.516676722279097e-06, |
| "loss": 0.7538, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 5.50882724306304e-06, |
| "loss": 0.769, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 5.500962667466689e-06, |
| "loss": 0.7714, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 5.4930830545925e-06, |
| "loss": 0.7627, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 5.485188463655933e-06, |
| "loss": 0.7669, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 5.477278953985013e-06, |
| "loss": 0.7449, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 5.469354585019874e-06, |
| "loss": 0.7427, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.461415416312322e-06, |
| "loss": 0.7405, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.453461507525382e-06, |
| "loss": 0.7655, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.4454929184328505e-06, |
| "loss": 0.7592, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.437509708918847e-06, |
| "loss": 0.7678, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.429511938977365e-06, |
| "loss": 0.7652, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.421499668711819e-06, |
| "loss": 0.7534, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.4134729583345936e-06, |
| "loss": 0.7434, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.405431868166591e-06, |
| "loss": 0.7676, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 5.3973764586367775e-06, |
| "loss": 0.7609, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 5.389306790281732e-06, |
| "loss": 0.776, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 5.381222923745186e-06, |
| "loss": 0.7525, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 5.3731249197775734e-06, |
| "loss": 0.7476, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 5.365012839235568e-06, |
| "loss": 0.7695, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 5.356886743081632e-06, |
| "loss": 0.7484, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 5.348746692383553e-06, |
| "loss": 0.7758, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 5.34059274831399e-06, |
| "loss": 0.7764, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 5.33242497215001e-06, |
| "loss": 0.7557, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 5.324243425272624e-06, |
| "loss": 0.7578, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 5.316048169166339e-06, |
| "loss": 0.7841, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 5.30783926541868e-06, |
| "loss": 0.7584, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 5.299616775719737e-06, |
| "loss": 0.7628, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 5.2913807618616975e-06, |
| "loss": 0.7618, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 5.283131285738386e-06, |
| "loss": 0.7585, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.274868409344793e-06, |
| "loss": 0.7537, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.266592194776616e-06, |
| "loss": 0.7753, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.258302704229784e-06, |
| "loss": 0.7528, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.25e-06, |
| "loss": 0.7833, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.241684144482266e-06, |
| "loss": 0.7568, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.233355200170416e-06, |
| "loss": 0.7591, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.225013229656649e-06, |
| "loss": 0.7611, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.216658295631052e-06, |
| "loss": 0.7563, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.208290460881136e-06, |
| "loss": 0.7676, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.199909788291361e-06, |
| "loss": 0.7736, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.1915163408426645e-06, |
| "loss": 0.7544, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.183110181611983e-06, |
| "loss": 0.7441, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.174691373771788e-06, |
| "loss": 0.7579, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.166259980589603e-06, |
| "loss": 0.7773, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.157816065427529e-06, |
| "loss": 0.7492, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.149359691741772e-06, |
| "loss": 0.7707, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.1408909230821635e-06, |
| "loss": 0.7464, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.132409823091685e-06, |
| "loss": 0.7513, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.123916455505986e-06, |
| "loss": 0.7552, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.1154108841529075e-06, |
| "loss": 0.7706, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.106893172952004e-06, |
| "loss": 0.7581, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.098363385914059e-06, |
| "loss": 0.7519, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.089821587140607e-06, |
| "loss": 0.7698, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.081267840823451e-06, |
| "loss": 0.747, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.0727022112441795e-06, |
| "loss": 0.7374, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.064124762773687e-06, |
| "loss": 0.7555, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.055535559871682e-06, |
| "loss": 0.7688, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.046934667086212e-06, |
| "loss": 0.75, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.038322149053172e-06, |
| "loss": 0.7457, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.029698070495822e-06, |
| "loss": 0.7543, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.021062496224301e-06, |
| "loss": 0.7317, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.012415491135133e-06, |
| "loss": 0.7653, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.0037571202107525e-06, |
| "loss": 0.7702, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 4.995087448519005e-06, |
| "loss": 0.7647, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 4.986406541212659e-06, |
| "loss": 0.7484, |
| "step": 763 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1496, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "total_flos": 1.2700841023456543e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|