| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.19515368352577656, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003252561392096276, |
| "grad_norm": 7.644000053405762, |
| "learning_rate": 5e-05, |
| "loss": 3.7348, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0006505122784192552, |
| "grad_norm": 6.572476387023926, |
| "learning_rate": 5e-05, |
| "loss": 3.5202, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0009757684176288828, |
| "grad_norm": 5.915395736694336, |
| "learning_rate": 5e-05, |
| "loss": 3.2423, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0013010245568385104, |
| "grad_norm": 5.39382266998291, |
| "learning_rate": 5e-05, |
| "loss": 3.2308, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0016262806960481379, |
| "grad_norm": 5.474543571472168, |
| "learning_rate": 5e-05, |
| "loss": 3.3753, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0019515368352577655, |
| "grad_norm": 5.402736663818359, |
| "learning_rate": 5e-05, |
| "loss": 3.3042, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.002276792974467393, |
| "grad_norm": 5.727195739746094, |
| "learning_rate": 5e-05, |
| "loss": 3.2722, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.002602049113677021, |
| "grad_norm": 5.997256755828857, |
| "learning_rate": 5e-05, |
| "loss": 3.2708, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.002927305252886648, |
| "grad_norm": 8.144789695739746, |
| "learning_rate": 5e-05, |
| "loss": 3.4531, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0032525613920962758, |
| "grad_norm": 11.264220237731934, |
| "learning_rate": 5e-05, |
| "loss": 3.3755, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0035778175313059034, |
| "grad_norm": 10.535292625427246, |
| "learning_rate": 5e-05, |
| "loss": 3.5809, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.003903073670515531, |
| "grad_norm": 8.04301929473877, |
| "learning_rate": 5e-05, |
| "loss": 3.3787, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004228329809725159, |
| "grad_norm": 9.441449165344238, |
| "learning_rate": 5e-05, |
| "loss": 3.4919, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.004553585948934786, |
| "grad_norm": 9.322367668151855, |
| "learning_rate": 5e-05, |
| "loss": 3.6405, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.004878842088144414, |
| "grad_norm": 10.059698104858398, |
| "learning_rate": 5e-05, |
| "loss": 3.425, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005204098227354042, |
| "grad_norm": 11.285538673400879, |
| "learning_rate": 5e-05, |
| "loss": 3.6643, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0055293543665636685, |
| "grad_norm": 10.433178901672363, |
| "learning_rate": 5e-05, |
| "loss": 3.4485, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.005854610505773296, |
| "grad_norm": 11.724845886230469, |
| "learning_rate": 5e-05, |
| "loss": 3.8379, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.006179866644982924, |
| "grad_norm": 11.558403968811035, |
| "learning_rate": 5e-05, |
| "loss": 3.8067, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0065051227841925515, |
| "grad_norm": 9.274937629699707, |
| "learning_rate": 5e-05, |
| "loss": 3.5213, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006830378923402179, |
| "grad_norm": 11.487302780151367, |
| "learning_rate": 5e-05, |
| "loss": 3.8464, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.007155635062611807, |
| "grad_norm": 11.910959243774414, |
| "learning_rate": 5e-05, |
| "loss": 3.7159, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0074808912018214345, |
| "grad_norm": 10.441876411437988, |
| "learning_rate": 5e-05, |
| "loss": 4.3052, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.007806147341031062, |
| "grad_norm": 11.492648124694824, |
| "learning_rate": 5e-05, |
| "loss": 4.0298, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.00813140348024069, |
| "grad_norm": 11.218420028686523, |
| "learning_rate": 5e-05, |
| "loss": 3.9014, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.008456659619450317, |
| "grad_norm": 9.615971565246582, |
| "learning_rate": 5e-05, |
| "loss": 3.8972, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.008781915758659945, |
| "grad_norm": 9.325116157531738, |
| "learning_rate": 5e-05, |
| "loss": 4.0339, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.009107171897869573, |
| "grad_norm": 10.537034034729004, |
| "learning_rate": 5e-05, |
| "loss": 4.3475, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0094324280370792, |
| "grad_norm": 9.163812637329102, |
| "learning_rate": 5e-05, |
| "loss": 4.4702, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.009757684176288828, |
| "grad_norm": 8.232535362243652, |
| "learning_rate": 5e-05, |
| "loss": 4.5234, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.010082940315498456, |
| "grad_norm": 7.85621976852417, |
| "learning_rate": 5e-05, |
| "loss": 4.665, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.010408196454708083, |
| "grad_norm": 7.693171501159668, |
| "learning_rate": 5e-05, |
| "loss": 4.5524, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01073345259391771, |
| "grad_norm": 6.418099403381348, |
| "learning_rate": 5e-05, |
| "loss": 4.3471, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.011058708733127337, |
| "grad_norm": 9.231449127197266, |
| "learning_rate": 5e-05, |
| "loss": 4.5491, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.011383964872336965, |
| "grad_norm": 8.351770401000977, |
| "learning_rate": 5e-05, |
| "loss": 4.6851, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.011709221011546592, |
| "grad_norm": 8.412186622619629, |
| "learning_rate": 5e-05, |
| "loss": 4.4453, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01203447715075622, |
| "grad_norm": 7.4988932609558105, |
| "learning_rate": 5e-05, |
| "loss": 4.2595, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.012359733289965848, |
| "grad_norm": 6.0955424308776855, |
| "learning_rate": 5e-05, |
| "loss": 4.1402, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.012684989429175475, |
| "grad_norm": 5.853550434112549, |
| "learning_rate": 5e-05, |
| "loss": 4.2133, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.013010245568385103, |
| "grad_norm": 6.071702480316162, |
| "learning_rate": 5e-05, |
| "loss": 3.9406, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01333550170759473, |
| "grad_norm": 5.181666374206543, |
| "learning_rate": 5e-05, |
| "loss": 3.9559, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.013660757846804358, |
| "grad_norm": 5.067506313323975, |
| "learning_rate": 5e-05, |
| "loss": 4.0526, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.013986013986013986, |
| "grad_norm": 4.593716621398926, |
| "learning_rate": 5e-05, |
| "loss": 4.0814, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.014311270125223614, |
| "grad_norm": 5.379826545715332, |
| "learning_rate": 5e-05, |
| "loss": 4.13, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.014636526264433241, |
| "grad_norm": 6.113279819488525, |
| "learning_rate": 5e-05, |
| "loss": 4.0842, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.014961782403642869, |
| "grad_norm": 4.885502815246582, |
| "learning_rate": 5e-05, |
| "loss": 4.147, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.015287038542852497, |
| "grad_norm": 5.065276622772217, |
| "learning_rate": 5e-05, |
| "loss": 4.0597, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.015612294682062124, |
| "grad_norm": 5.306569576263428, |
| "learning_rate": 5e-05, |
| "loss": 4.4288, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01593755082127175, |
| "grad_norm": 6.738716125488281, |
| "learning_rate": 5e-05, |
| "loss": 4.2153, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01626280696048138, |
| "grad_norm": 7.264622211456299, |
| "learning_rate": 5e-05, |
| "loss": 4.3567, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.016588063099691006, |
| "grad_norm": 4.799393177032471, |
| "learning_rate": 5e-05, |
| "loss": 4.1491, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.016913319238900635, |
| "grad_norm": 4.5021071434021, |
| "learning_rate": 5e-05, |
| "loss": 4.0725, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01723857537811026, |
| "grad_norm": 5.524833679199219, |
| "learning_rate": 5e-05, |
| "loss": 4.405, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01756383151731989, |
| "grad_norm": 4.327210426330566, |
| "learning_rate": 5e-05, |
| "loss": 4.198, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.017889087656529516, |
| "grad_norm": 4.141977787017822, |
| "learning_rate": 5e-05, |
| "loss": 4.0445, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.018214343795739146, |
| "grad_norm": 4.746036529541016, |
| "learning_rate": 5e-05, |
| "loss": 4.5174, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.01853959993494877, |
| "grad_norm": 5.5715837478637695, |
| "learning_rate": 5e-05, |
| "loss": 4.2292, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0188648560741584, |
| "grad_norm": 5.887129306793213, |
| "learning_rate": 5e-05, |
| "loss": 4.0795, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.019190112213368027, |
| "grad_norm": 4.8050150871276855, |
| "learning_rate": 5e-05, |
| "loss": 4.1816, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.019515368352577656, |
| "grad_norm": 4.179840564727783, |
| "learning_rate": 5e-05, |
| "loss": 4.337, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.019840624491787282, |
| "grad_norm": 5.042575359344482, |
| "learning_rate": 5e-05, |
| "loss": 4.1466, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02016588063099691, |
| "grad_norm": 4.339786052703857, |
| "learning_rate": 5e-05, |
| "loss": 3.7189, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.020491136770206538, |
| "grad_norm": 8.27956771850586, |
| "learning_rate": 5e-05, |
| "loss": 4.1767, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.020816392909416167, |
| "grad_norm": 6.273040294647217, |
| "learning_rate": 5e-05, |
| "loss": 4.363, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.021141649048625793, |
| "grad_norm": 7.342176914215088, |
| "learning_rate": 5e-05, |
| "loss": 4.3939, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02146690518783542, |
| "grad_norm": 6.060370445251465, |
| "learning_rate": 5e-05, |
| "loss": 4.1033, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.021792161327045048, |
| "grad_norm": 6.683494567871094, |
| "learning_rate": 5e-05, |
| "loss": 3.9883, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.022117417466254674, |
| "grad_norm": 6.418432712554932, |
| "learning_rate": 5e-05, |
| "loss": 4.1472, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.022442673605464303, |
| "grad_norm": 6.342174053192139, |
| "learning_rate": 5e-05, |
| "loss": 4.414, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02276792974467393, |
| "grad_norm": 10.055042266845703, |
| "learning_rate": 5e-05, |
| "loss": 4.1925, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02309318588388356, |
| "grad_norm": 6.6624932289123535, |
| "learning_rate": 5e-05, |
| "loss": 4.1111, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.023418442023093185, |
| "grad_norm": 8.38736343383789, |
| "learning_rate": 5e-05, |
| "loss": 4.5028, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.023743698162302814, |
| "grad_norm": 8.479351997375488, |
| "learning_rate": 5e-05, |
| "loss": 4.2832, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02406895430151244, |
| "grad_norm": 8.613444328308105, |
| "learning_rate": 5e-05, |
| "loss": 4.5189, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.02439421044072207, |
| "grad_norm": 6.932406425476074, |
| "learning_rate": 5e-05, |
| "loss": 4.1228, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.024719466579931695, |
| "grad_norm": 5.989908695220947, |
| "learning_rate": 5e-05, |
| "loss": 4.0289, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.025044722719141325, |
| "grad_norm": 5.118892192840576, |
| "learning_rate": 5e-05, |
| "loss": 3.8781, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02536997885835095, |
| "grad_norm": 5.232855796813965, |
| "learning_rate": 5e-05, |
| "loss": 4.2731, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.02569523499756058, |
| "grad_norm": 4.9437103271484375, |
| "learning_rate": 5e-05, |
| "loss": 3.1985, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.026020491136770206, |
| "grad_norm": 7.929747581481934, |
| "learning_rate": 5e-05, |
| "loss": 4.1925, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.026345747275979835, |
| "grad_norm": 9.323763847351074, |
| "learning_rate": 5e-05, |
| "loss": 4.2925, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02667100341518946, |
| "grad_norm": 6.18411111831665, |
| "learning_rate": 5e-05, |
| "loss": 4.0956, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02699625955439909, |
| "grad_norm": 4.424221038818359, |
| "learning_rate": 5e-05, |
| "loss": 4.1803, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.027321515693608717, |
| "grad_norm": 6.044656276702881, |
| "learning_rate": 5e-05, |
| "loss": 4.0551, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.027646771832818346, |
| "grad_norm": 9.380243301391602, |
| "learning_rate": 5e-05, |
| "loss": 4.2753, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.027972027972027972, |
| "grad_norm": 10.203062057495117, |
| "learning_rate": 5e-05, |
| "loss": 4.5481, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.028297284111237598, |
| "grad_norm": 12.21309757232666, |
| "learning_rate": 5e-05, |
| "loss": 4.2921, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.028622540250447227, |
| "grad_norm": 7.666494369506836, |
| "learning_rate": 5e-05, |
| "loss": 4.2344, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.028947796389656853, |
| "grad_norm": 5.388767719268799, |
| "learning_rate": 5e-05, |
| "loss": 4.3914, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.029273052528866483, |
| "grad_norm": 7.598433971405029, |
| "learning_rate": 5e-05, |
| "loss": 4.242, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02959830866807611, |
| "grad_norm": 11.387866020202637, |
| "learning_rate": 5e-05, |
| "loss": 4.0014, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.029923564807285738, |
| "grad_norm": 10.232786178588867, |
| "learning_rate": 5e-05, |
| "loss": 4.025, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.030248820946495364, |
| "grad_norm": 9.431289672851562, |
| "learning_rate": 5e-05, |
| "loss": 4.2743, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.030574077085704993, |
| "grad_norm": 6.723931789398193, |
| "learning_rate": 5e-05, |
| "loss": 4.111, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03089933322491462, |
| "grad_norm": 5.171010971069336, |
| "learning_rate": 5e-05, |
| "loss": 3.8832, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03122458936412425, |
| "grad_norm": 7.667996883392334, |
| "learning_rate": 5e-05, |
| "loss": 4.1839, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.031549845503333875, |
| "grad_norm": 9.853692054748535, |
| "learning_rate": 5e-05, |
| "loss": 4.2596, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0318751016425435, |
| "grad_norm": 9.22080135345459, |
| "learning_rate": 5e-05, |
| "loss": 3.7551, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03220035778175313, |
| "grad_norm": 8.230822563171387, |
| "learning_rate": 5e-05, |
| "loss": 4.1223, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03252561392096276, |
| "grad_norm": 4.844912052154541, |
| "learning_rate": 5e-05, |
| "loss": 3.9264, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.032850870060172385, |
| "grad_norm": 5.341676712036133, |
| "learning_rate": 5e-05, |
| "loss": 4.2083, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03317612619938201, |
| "grad_norm": 10.346305847167969, |
| "learning_rate": 5e-05, |
| "loss": 4.6745, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.033501382338591644, |
| "grad_norm": 5.822200298309326, |
| "learning_rate": 5e-05, |
| "loss": 3.9329, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03382663847780127, |
| "grad_norm": 4.412308692932129, |
| "learning_rate": 5e-05, |
| "loss": 4.2581, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.034151894617010896, |
| "grad_norm": 6.643288612365723, |
| "learning_rate": 5e-05, |
| "loss": 4.0638, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03447715075622052, |
| "grad_norm": 4.771259784698486, |
| "learning_rate": 5e-05, |
| "loss": 3.9147, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.034802406895430155, |
| "grad_norm": 4.471869945526123, |
| "learning_rate": 5e-05, |
| "loss": 3.8011, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03512766303463978, |
| "grad_norm": 6.949775218963623, |
| "learning_rate": 5e-05, |
| "loss": 4.3229, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.035452919173849406, |
| "grad_norm": 5.095446586608887, |
| "learning_rate": 5e-05, |
| "loss": 3.616, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03577817531305903, |
| "grad_norm": 6.592041015625, |
| "learning_rate": 5e-05, |
| "loss": 3.7697, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03610343145226866, |
| "grad_norm": 7.455766677856445, |
| "learning_rate": 5e-05, |
| "loss": 4.0375, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.03642868759147829, |
| "grad_norm": 4.540219306945801, |
| "learning_rate": 5e-05, |
| "loss": 3.7951, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03675394373068792, |
| "grad_norm": 5.230220794677734, |
| "learning_rate": 5e-05, |
| "loss": 4.0304, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.03707919986989754, |
| "grad_norm": 5.179874420166016, |
| "learning_rate": 5e-05, |
| "loss": 4.0191, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03740445600910717, |
| "grad_norm": 6.374222755432129, |
| "learning_rate": 5e-05, |
| "loss": 4.4934, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0377297121483168, |
| "grad_norm": 6.96058988571167, |
| "learning_rate": 5e-05, |
| "loss": 4.1446, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.03805496828752643, |
| "grad_norm": 6.299279689788818, |
| "learning_rate": 5e-05, |
| "loss": 3.9077, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.038380224426736054, |
| "grad_norm": 6.169437408447266, |
| "learning_rate": 5e-05, |
| "loss": 4.1457, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03870548056594568, |
| "grad_norm": 5.159611701965332, |
| "learning_rate": 5e-05, |
| "loss": 3.7067, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.03903073670515531, |
| "grad_norm": 6.676630973815918, |
| "learning_rate": 5e-05, |
| "loss": 4.0493, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03935599284436494, |
| "grad_norm": 6.488524436950684, |
| "learning_rate": 5e-05, |
| "loss": 4.109, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.039681248983574564, |
| "grad_norm": 6.670077800750732, |
| "learning_rate": 5e-05, |
| "loss": 3.6645, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04000650512278419, |
| "grad_norm": 6.173693656921387, |
| "learning_rate": 5e-05, |
| "loss": 4.4163, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04033176126199382, |
| "grad_norm": 6.306183815002441, |
| "learning_rate": 5e-05, |
| "loss": 3.9382, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04065701740120345, |
| "grad_norm": 6.007297039031982, |
| "learning_rate": 5e-05, |
| "loss": 3.956, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.040982273540413075, |
| "grad_norm": 6.0243730545043945, |
| "learning_rate": 5e-05, |
| "loss": 3.9613, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0413075296796227, |
| "grad_norm": 5.6909871101379395, |
| "learning_rate": 5e-05, |
| "loss": 3.9213, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.041632785818832334, |
| "grad_norm": 5.5652265548706055, |
| "learning_rate": 5e-05, |
| "loss": 3.9325, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.04195804195804196, |
| "grad_norm": 7.6173272132873535, |
| "learning_rate": 5e-05, |
| "loss": 4.0422, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.042283298097251586, |
| "grad_norm": 10.900376319885254, |
| "learning_rate": 5e-05, |
| "loss": 3.9173, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04260855423646121, |
| "grad_norm": 12.899847984313965, |
| "learning_rate": 5e-05, |
| "loss": 4.0328, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04293381037567084, |
| "grad_norm": 11.928502082824707, |
| "learning_rate": 5e-05, |
| "loss": 3.9763, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04325906651488047, |
| "grad_norm": 8.4597749710083, |
| "learning_rate": 5e-05, |
| "loss": 4.0895, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.043584322654090096, |
| "grad_norm": 5.162694931030273, |
| "learning_rate": 5e-05, |
| "loss": 4.1564, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04390957879329972, |
| "grad_norm": 13.066299438476562, |
| "learning_rate": 5e-05, |
| "loss": 4.1774, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04423483493250935, |
| "grad_norm": 14.013510704040527, |
| "learning_rate": 5e-05, |
| "loss": 3.5203, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04456009107171898, |
| "grad_norm": 15.885542869567871, |
| "learning_rate": 5e-05, |
| "loss": 4.1405, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.04488534721092861, |
| "grad_norm": 7.15226411819458, |
| "learning_rate": 5e-05, |
| "loss": 4.043, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04521060335013823, |
| "grad_norm": 5.1085686683654785, |
| "learning_rate": 5e-05, |
| "loss": 3.7764, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04553585948934786, |
| "grad_norm": 6.9343390464782715, |
| "learning_rate": 5e-05, |
| "loss": 4.2056, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04586111562855749, |
| "grad_norm": 11.624869346618652, |
| "learning_rate": 5e-05, |
| "loss": 3.7097, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.04618637176776712, |
| "grad_norm": 12.546487808227539, |
| "learning_rate": 5e-05, |
| "loss": 4.2067, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.046511627906976744, |
| "grad_norm": 5.573001861572266, |
| "learning_rate": 5e-05, |
| "loss": 3.6916, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.04683688404618637, |
| "grad_norm": 6.0869140625, |
| "learning_rate": 5e-05, |
| "loss": 4.1239, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.047162140185396, |
| "grad_norm": 7.625162124633789, |
| "learning_rate": 5e-05, |
| "loss": 3.9154, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04748739632460563, |
| "grad_norm": 5.8476762771606445, |
| "learning_rate": 5e-05, |
| "loss": 3.9673, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.047812652463815254, |
| "grad_norm": 6.097865104675293, |
| "learning_rate": 5e-05, |
| "loss": 3.994, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.04813790860302488, |
| "grad_norm": 5.200497150421143, |
| "learning_rate": 5e-05, |
| "loss": 4.1332, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.04846316474223451, |
| "grad_norm": 8.92606258392334, |
| "learning_rate": 5e-05, |
| "loss": 4.1005, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.04878842088144414, |
| "grad_norm": 5.433960437774658, |
| "learning_rate": 5e-05, |
| "loss": 4.099, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.049113677020653765, |
| "grad_norm": 4.350966453552246, |
| "learning_rate": 5e-05, |
| "loss": 3.7492, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.04943893315986339, |
| "grad_norm": 8.3677978515625, |
| "learning_rate": 5e-05, |
| "loss": 3.8048, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.04976418929907302, |
| "grad_norm": 10.74728012084961, |
| "learning_rate": 5e-05, |
| "loss": 4.3122, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05008944543828265, |
| "grad_norm": 8.89576530456543, |
| "learning_rate": 5e-05, |
| "loss": 4.432, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.050414701577492275, |
| "grad_norm": 6.710874080657959, |
| "learning_rate": 5e-05, |
| "loss": 3.9353, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0507399577167019, |
| "grad_norm": 10.725092887878418, |
| "learning_rate": 5e-05, |
| "loss": 4.2776, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05106521385591153, |
| "grad_norm": 11.6733980178833, |
| "learning_rate": 5e-05, |
| "loss": 4.1354, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05139046999512116, |
| "grad_norm": 9.846784591674805, |
| "learning_rate": 5e-05, |
| "loss": 3.9646, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.051715726134330786, |
| "grad_norm": 8.646893501281738, |
| "learning_rate": 5e-05, |
| "loss": 3.9413, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05204098227354041, |
| "grad_norm": 7.0959062576293945, |
| "learning_rate": 5e-05, |
| "loss": 4.0745, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05236623841275004, |
| "grad_norm": 7.378256797790527, |
| "learning_rate": 5e-05, |
| "loss": 4.1712, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05269149455195967, |
| "grad_norm": 7.3625640869140625, |
| "learning_rate": 5e-05, |
| "loss": 3.9466, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0530167506911693, |
| "grad_norm": 4.918821811676025, |
| "learning_rate": 5e-05, |
| "loss": 3.8243, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.05334200683037892, |
| "grad_norm": 6.653145790100098, |
| "learning_rate": 5e-05, |
| "loss": 3.8895, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.05366726296958855, |
| "grad_norm": 8.699049949645996, |
| "learning_rate": 5e-05, |
| "loss": 4.1705, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.05399251910879818, |
| "grad_norm": 7.855594158172607, |
| "learning_rate": 5e-05, |
| "loss": 3.6343, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.05431777524800781, |
| "grad_norm": 5.597055435180664, |
| "learning_rate": 5e-05, |
| "loss": 3.7043, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.05464303138721743, |
| "grad_norm": 9.524121284484863, |
| "learning_rate": 5e-05, |
| "loss": 3.8942, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.05496828752642706, |
| "grad_norm": 5.389048099517822, |
| "learning_rate": 5e-05, |
| "loss": 3.4256, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.05529354366563669, |
| "grad_norm": 5.568866729736328, |
| "learning_rate": 5e-05, |
| "loss": 3.9784, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05561879980484632, |
| "grad_norm": 12.199308395385742, |
| "learning_rate": 5e-05, |
| "loss": 3.7894, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.055944055944055944, |
| "grad_norm": 15.115793228149414, |
| "learning_rate": 5e-05, |
| "loss": 3.6162, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.05626931208326557, |
| "grad_norm": 5.6520538330078125, |
| "learning_rate": 5e-05, |
| "loss": 3.8633, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.056594568222475196, |
| "grad_norm": 5.969440937042236, |
| "learning_rate": 5e-05, |
| "loss": 4.0392, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.05691982436168483, |
| "grad_norm": 8.82860279083252, |
| "learning_rate": 5e-05, |
| "loss": 4.1596, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.057245080500894455, |
| "grad_norm": 8.133511543273926, |
| "learning_rate": 5e-05, |
| "loss": 4.3075, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.05757033664010408, |
| "grad_norm": 5.794802665710449, |
| "learning_rate": 5e-05, |
| "loss": 3.7082, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.057895592779313707, |
| "grad_norm": 6.0018744468688965, |
| "learning_rate": 5e-05, |
| "loss": 3.67, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.05822084891852334, |
| "grad_norm": 9.123400688171387, |
| "learning_rate": 5e-05, |
| "loss": 3.6293, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.058546105057732965, |
| "grad_norm": 12.262410163879395, |
| "learning_rate": 5e-05, |
| "loss": 4.0337, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05887136119694259, |
| "grad_norm": 5.367374897003174, |
| "learning_rate": 5e-05, |
| "loss": 3.7243, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.05919661733615222, |
| "grad_norm": 5.942975997924805, |
| "learning_rate": 5e-05, |
| "loss": 3.691, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.05952187347536185, |
| "grad_norm": 5.772192001342773, |
| "learning_rate": 5e-05, |
| "loss": 3.8556, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.059847129614571476, |
| "grad_norm": 6.091885566711426, |
| "learning_rate": 5e-05, |
| "loss": 3.8396, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0601723857537811, |
| "grad_norm": 6.4458231925964355, |
| "learning_rate": 5e-05, |
| "loss": 4.3947, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06049764189299073, |
| "grad_norm": 6.378884315490723, |
| "learning_rate": 5e-05, |
| "loss": 4.375, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06082289803220036, |
| "grad_norm": 7.179290294647217, |
| "learning_rate": 5e-05, |
| "loss": 3.6237, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06114815417140999, |
| "grad_norm": 5.786200046539307, |
| "learning_rate": 5e-05, |
| "loss": 4.2066, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06147341031061961, |
| "grad_norm": 5.186939239501953, |
| "learning_rate": 5e-05, |
| "loss": 3.9935, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06179866644982924, |
| "grad_norm": 4.02333402633667, |
| "learning_rate": 5e-05, |
| "loss": 3.6273, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06212392258903887, |
| "grad_norm": 6.622715473175049, |
| "learning_rate": 5e-05, |
| "loss": 4.2028, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0624491787282485, |
| "grad_norm": 9.464071273803711, |
| "learning_rate": 5e-05, |
| "loss": 4.0506, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06277443486745812, |
| "grad_norm": 6.995242595672607, |
| "learning_rate": 5e-05, |
| "loss": 4.1047, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06309969100666775, |
| "grad_norm": 9.435445785522461, |
| "learning_rate": 5e-05, |
| "loss": 3.8718, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.06342494714587738, |
| "grad_norm": 7.273919582366943, |
| "learning_rate": 5e-05, |
| "loss": 3.9176, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.063750203285087, |
| "grad_norm": 6.896090030670166, |
| "learning_rate": 5e-05, |
| "loss": 3.8408, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.06407545942429663, |
| "grad_norm": 7.282253265380859, |
| "learning_rate": 5e-05, |
| "loss": 3.6903, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.06440071556350627, |
| "grad_norm": 9.39031982421875, |
| "learning_rate": 5e-05, |
| "loss": 3.9139, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.06472597170271589, |
| "grad_norm": 7.485379695892334, |
| "learning_rate": 5e-05, |
| "loss": 3.8522, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.06505122784192552, |
| "grad_norm": 7.848803997039795, |
| "learning_rate": 5e-05, |
| "loss": 3.879, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06537648398113514, |
| "grad_norm": 7.829058647155762, |
| "learning_rate": 5e-05, |
| "loss": 4.0283, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.06570174012034477, |
| "grad_norm": 8.984028816223145, |
| "learning_rate": 5e-05, |
| "loss": 3.8663, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.0660269962595544, |
| "grad_norm": 7.604732513427734, |
| "learning_rate": 5e-05, |
| "loss": 3.7871, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.06635225239876402, |
| "grad_norm": 6.779748916625977, |
| "learning_rate": 5e-05, |
| "loss": 3.9423, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.06667750853797365, |
| "grad_norm": 8.93659782409668, |
| "learning_rate": 5e-05, |
| "loss": 4.0208, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.06700276467718329, |
| "grad_norm": 6.093626022338867, |
| "learning_rate": 5e-05, |
| "loss": 3.8828, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.06732802081639291, |
| "grad_norm": 6.105995178222656, |
| "learning_rate": 5e-05, |
| "loss": 4.0983, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.06765327695560254, |
| "grad_norm": 7.379316329956055, |
| "learning_rate": 5e-05, |
| "loss": 4.0493, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.06797853309481217, |
| "grad_norm": 6.404873847961426, |
| "learning_rate": 5e-05, |
| "loss": 3.9271, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.06830378923402179, |
| "grad_norm": 7.560967445373535, |
| "learning_rate": 5e-05, |
| "loss": 3.8633, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.06862904537323142, |
| "grad_norm": 6.042522430419922, |
| "learning_rate": 5e-05, |
| "loss": 3.7389, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.06895430151244104, |
| "grad_norm": 6.4881367683410645, |
| "learning_rate": 5e-05, |
| "loss": 3.8196, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.06927955765165067, |
| "grad_norm": 6.52613639831543, |
| "learning_rate": 5e-05, |
| "loss": 3.7737, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.06960481379086031, |
| "grad_norm": 4.999444007873535, |
| "learning_rate": 5e-05, |
| "loss": 3.8722, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.06993006993006994, |
| "grad_norm": 7.060845851898193, |
| "learning_rate": 5e-05, |
| "loss": 3.774, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07025532606927956, |
| "grad_norm": 8.545415878295898, |
| "learning_rate": 5e-05, |
| "loss": 3.8479, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07058058220848919, |
| "grad_norm": 7.625663757324219, |
| "learning_rate": 5e-05, |
| "loss": 3.5068, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.07090583834769881, |
| "grad_norm": 7.191437244415283, |
| "learning_rate": 5e-05, |
| "loss": 3.7885, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07123109448690844, |
| "grad_norm": 7.386499881744385, |
| "learning_rate": 5e-05, |
| "loss": 3.9494, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07155635062611806, |
| "grad_norm": 5.616601943969727, |
| "learning_rate": 5e-05, |
| "loss": 3.8093, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07188160676532769, |
| "grad_norm": 6.17822265625, |
| "learning_rate": 5e-05, |
| "loss": 3.8981, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.07220686290453732, |
| "grad_norm": 6.138426303863525, |
| "learning_rate": 5e-05, |
| "loss": 4.0091, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.07253211904374696, |
| "grad_norm": 6.6297831535339355, |
| "learning_rate": 5e-05, |
| "loss": 3.9291, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.07285737518295658, |
| "grad_norm": 6.557385444641113, |
| "learning_rate": 5e-05, |
| "loss": 3.8338, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.07318263132216621, |
| "grad_norm": 6.9579291343688965, |
| "learning_rate": 5e-05, |
| "loss": 3.8158, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07350788746137583, |
| "grad_norm": 7.129207611083984, |
| "learning_rate": 5e-05, |
| "loss": 4.1731, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.07383314360058546, |
| "grad_norm": 6.645360946655273, |
| "learning_rate": 5e-05, |
| "loss": 3.6926, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.07415839973979509, |
| "grad_norm": 8.101895332336426, |
| "learning_rate": 5e-05, |
| "loss": 4.3064, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.07448365587900471, |
| "grad_norm": 7.812802791595459, |
| "learning_rate": 5e-05, |
| "loss": 3.7983, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.07480891201821434, |
| "grad_norm": 7.278988838195801, |
| "learning_rate": 5e-05, |
| "loss": 3.9539, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07513416815742398, |
| "grad_norm": 7.909803867340088, |
| "learning_rate": 5e-05, |
| "loss": 3.5888, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0754594242966336, |
| "grad_norm": 5.668457984924316, |
| "learning_rate": 5e-05, |
| "loss": 3.8219, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.07578468043584323, |
| "grad_norm": 6.159639358520508, |
| "learning_rate": 5e-05, |
| "loss": 4.0184, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.07610993657505286, |
| "grad_norm": 6.18869161605835, |
| "learning_rate": 5e-05, |
| "loss": 3.7595, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.07643519271426248, |
| "grad_norm": 5.471868991851807, |
| "learning_rate": 5e-05, |
| "loss": 3.9409, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.07676044885347211, |
| "grad_norm": 7.921130180358887, |
| "learning_rate": 5e-05, |
| "loss": 4.1452, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.07708570499268173, |
| "grad_norm": 6.49941349029541, |
| "learning_rate": 5e-05, |
| "loss": 3.8261, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.07741096113189136, |
| "grad_norm": 10.190372467041016, |
| "learning_rate": 5e-05, |
| "loss": 3.6134, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.07773621727110099, |
| "grad_norm": 5.995229244232178, |
| "learning_rate": 5e-05, |
| "loss": 3.7563, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.07806147341031063, |
| "grad_norm": 8.94497299194336, |
| "learning_rate": 5e-05, |
| "loss": 3.5379, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07838672954952025, |
| "grad_norm": 10.990089416503906, |
| "learning_rate": 5e-05, |
| "loss": 3.9176, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.07871198568872988, |
| "grad_norm": 7.899653434753418, |
| "learning_rate": 5e-05, |
| "loss": 3.7961, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0790372418279395, |
| "grad_norm": 7.264082908630371, |
| "learning_rate": 5e-05, |
| "loss": 3.5957, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.07936249796714913, |
| "grad_norm": 5.855433940887451, |
| "learning_rate": 5e-05, |
| "loss": 3.5913, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.07968775410635875, |
| "grad_norm": 6.854794979095459, |
| "learning_rate": 5e-05, |
| "loss": 3.6167, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08001301024556838, |
| "grad_norm": 7.06243896484375, |
| "learning_rate": 5e-05, |
| "loss": 3.7909, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.080338266384778, |
| "grad_norm": 8.033863067626953, |
| "learning_rate": 5e-05, |
| "loss": 4.046, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08066352252398765, |
| "grad_norm": 6.078402519226074, |
| "learning_rate": 5e-05, |
| "loss": 3.3269, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08098877866319727, |
| "grad_norm": 9.511942863464355, |
| "learning_rate": 5e-05, |
| "loss": 3.5749, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.0813140348024069, |
| "grad_norm": 9.74225902557373, |
| "learning_rate": 5e-05, |
| "loss": 3.9162, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08163929094161652, |
| "grad_norm": 6.432509422302246, |
| "learning_rate": 5e-05, |
| "loss": 3.8754, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.08196454708082615, |
| "grad_norm": 7.885379314422607, |
| "learning_rate": 5e-05, |
| "loss": 3.7214, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.08228980322003578, |
| "grad_norm": 11.552560806274414, |
| "learning_rate": 5e-05, |
| "loss": 3.7367, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0826150593592454, |
| "grad_norm": 9.054500579833984, |
| "learning_rate": 5e-05, |
| "loss": 3.5476, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.08294031549845503, |
| "grad_norm": 5.916128635406494, |
| "learning_rate": 5e-05, |
| "loss": 3.695, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08326557163766467, |
| "grad_norm": 10.65311050415039, |
| "learning_rate": 5e-05, |
| "loss": 4.1597, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.0835908277768743, |
| "grad_norm": 13.63244342803955, |
| "learning_rate": 5e-05, |
| "loss": 3.5796, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.08391608391608392, |
| "grad_norm": 10.830595970153809, |
| "learning_rate": 5e-05, |
| "loss": 3.4825, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.08424134005529355, |
| "grad_norm": 5.9953718185424805, |
| "learning_rate": 5e-05, |
| "loss": 3.792, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.08456659619450317, |
| "grad_norm": 12.76282787322998, |
| "learning_rate": 5e-05, |
| "loss": 3.5532, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0848918523337128, |
| "grad_norm": 18.605255126953125, |
| "learning_rate": 5e-05, |
| "loss": 3.7788, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.08521710847292242, |
| "grad_norm": 12.753776550292969, |
| "learning_rate": 5e-05, |
| "loss": 3.7542, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.08554236461213205, |
| "grad_norm": 7.8098673820495605, |
| "learning_rate": 5e-05, |
| "loss": 4.4529, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.08586762075134168, |
| "grad_norm": 9.642732620239258, |
| "learning_rate": 5e-05, |
| "loss": 3.8515, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.08619287689055131, |
| "grad_norm": 5.820125102996826, |
| "learning_rate": 5e-05, |
| "loss": 4.3286, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.08651813302976094, |
| "grad_norm": 9.613585472106934, |
| "learning_rate": 5e-05, |
| "loss": 3.9569, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.08684338916897057, |
| "grad_norm": 9.211997985839844, |
| "learning_rate": 5e-05, |
| "loss": 3.9244, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.08716864530818019, |
| "grad_norm": 6.351746559143066, |
| "learning_rate": 5e-05, |
| "loss": 3.6584, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.08749390144738982, |
| "grad_norm": 6.802426815032959, |
| "learning_rate": 5e-05, |
| "loss": 3.8361, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.08781915758659944, |
| "grad_norm": 7.498976707458496, |
| "learning_rate": 5e-05, |
| "loss": 3.6589, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.08814441372580907, |
| "grad_norm": 6.058091163635254, |
| "learning_rate": 5e-05, |
| "loss": 3.6699, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.0884696698650187, |
| "grad_norm": 5.347617149353027, |
| "learning_rate": 5e-05, |
| "loss": 3.586, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.08879492600422834, |
| "grad_norm": 6.720355033874512, |
| "learning_rate": 5e-05, |
| "loss": 3.9508, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.08912018214343796, |
| "grad_norm": 5.8187174797058105, |
| "learning_rate": 5e-05, |
| "loss": 4.1922, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.08944543828264759, |
| "grad_norm": 4.981271743774414, |
| "learning_rate": 5e-05, |
| "loss": 3.5997, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.08977069442185721, |
| "grad_norm": 6.238826751708984, |
| "learning_rate": 5e-05, |
| "loss": 3.6929, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.09009595056106684, |
| "grad_norm": 7.073620796203613, |
| "learning_rate": 5e-05, |
| "loss": 3.7813, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09042120670027647, |
| "grad_norm": 7.445998668670654, |
| "learning_rate": 5e-05, |
| "loss": 3.5475, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.09074646283948609, |
| "grad_norm": 6.120103359222412, |
| "learning_rate": 5e-05, |
| "loss": 3.7499, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.09107171897869572, |
| "grad_norm": 8.138337135314941, |
| "learning_rate": 5e-05, |
| "loss": 3.8753, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09139697511790534, |
| "grad_norm": 7.516664981842041, |
| "learning_rate": 5e-05, |
| "loss": 3.6093, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.09172223125711498, |
| "grad_norm": 5.630866050720215, |
| "learning_rate": 5e-05, |
| "loss": 3.7269, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.09204748739632461, |
| "grad_norm": 5.958463668823242, |
| "learning_rate": 5e-05, |
| "loss": 3.7448, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.09237274353553424, |
| "grad_norm": 6.990318298339844, |
| "learning_rate": 5e-05, |
| "loss": 3.6994, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.09269799967474386, |
| "grad_norm": 6.364505767822266, |
| "learning_rate": 5e-05, |
| "loss": 3.5316, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 6.308237552642822, |
| "learning_rate": 5e-05, |
| "loss": 4.1899, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.09334851195316311, |
| "grad_norm": 8.585831642150879, |
| "learning_rate": 5e-05, |
| "loss": 3.7078, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.09367376809237274, |
| "grad_norm": 6.02251672744751, |
| "learning_rate": 5e-05, |
| "loss": 3.7081, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.09399902423158236, |
| "grad_norm": 6.891519546508789, |
| "learning_rate": 5e-05, |
| "loss": 3.9432, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.094324280370792, |
| "grad_norm": 8.65449047088623, |
| "learning_rate": 5e-05, |
| "loss": 3.4704, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09464953651000163, |
| "grad_norm": 6.133912563323975, |
| "learning_rate": 5e-05, |
| "loss": 3.8847, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.09497479264921126, |
| "grad_norm": 6.619656085968018, |
| "learning_rate": 5e-05, |
| "loss": 3.5818, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.09530004878842088, |
| "grad_norm": 9.706931114196777, |
| "learning_rate": 5e-05, |
| "loss": 3.6839, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.09562530492763051, |
| "grad_norm": 6.43947172164917, |
| "learning_rate": 5e-05, |
| "loss": 4.0691, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.09595056106684013, |
| "grad_norm": 7.45628547668457, |
| "learning_rate": 5e-05, |
| "loss": 3.9546, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.09627581720604976, |
| "grad_norm": 9.464739799499512, |
| "learning_rate": 5e-05, |
| "loss": 3.8893, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.09660107334525939, |
| "grad_norm": 9.263232231140137, |
| "learning_rate": 5e-05, |
| "loss": 4.3718, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.09692632948446903, |
| "grad_norm": 6.793147087097168, |
| "learning_rate": 5e-05, |
| "loss": 3.7036, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.09725158562367865, |
| "grad_norm": 12.16869831085205, |
| "learning_rate": 5e-05, |
| "loss": 4.2924, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.09757684176288828, |
| "grad_norm": 10.058348655700684, |
| "learning_rate": 5e-05, |
| "loss": 3.852, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0979020979020979, |
| "grad_norm": 5.966858386993408, |
| "learning_rate": 5e-05, |
| "loss": 3.7514, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.09822735404130753, |
| "grad_norm": 9.075318336486816, |
| "learning_rate": 5e-05, |
| "loss": 3.6808, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.09855261018051716, |
| "grad_norm": 11.372644424438477, |
| "learning_rate": 5e-05, |
| "loss": 4.0671, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.09887786631972678, |
| "grad_norm": 12.080697059631348, |
| "learning_rate": 5e-05, |
| "loss": 3.2312, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.09920312245893641, |
| "grad_norm": 5.336820125579834, |
| "learning_rate": 5e-05, |
| "loss": 3.5947, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.09952837859814603, |
| "grad_norm": 7.266972064971924, |
| "learning_rate": 5e-05, |
| "loss": 3.1227, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.09985363473735567, |
| "grad_norm": 6.334729194641113, |
| "learning_rate": 5e-05, |
| "loss": 3.6156, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.1001788908765653, |
| "grad_norm": 5.643017768859863, |
| "learning_rate": 5e-05, |
| "loss": 3.6258, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.10050414701577493, |
| "grad_norm": 5.640905380249023, |
| "learning_rate": 5e-05, |
| "loss": 3.9051, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.10082940315498455, |
| "grad_norm": 6.985749244689941, |
| "learning_rate": 5e-05, |
| "loss": 3.6725, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10115465929419418, |
| "grad_norm": 6.893199443817139, |
| "learning_rate": 5e-05, |
| "loss": 3.4541, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1014799154334038, |
| "grad_norm": 6.803256511688232, |
| "learning_rate": 5e-05, |
| "loss": 3.0762, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.10180517157261343, |
| "grad_norm": 8.491405487060547, |
| "learning_rate": 5e-05, |
| "loss": 3.719, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.10213042771182305, |
| "grad_norm": 5.912895202636719, |
| "learning_rate": 5e-05, |
| "loss": 3.6613, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1024556838510327, |
| "grad_norm": 6.211380958557129, |
| "learning_rate": 5e-05, |
| "loss": 3.632, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.10278093999024232, |
| "grad_norm": 8.083343505859375, |
| "learning_rate": 5e-05, |
| "loss": 3.5327, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.10310619612945195, |
| "grad_norm": 8.091614723205566, |
| "learning_rate": 5e-05, |
| "loss": 3.8166, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.10343145226866157, |
| "grad_norm": 5.631373405456543, |
| "learning_rate": 5e-05, |
| "loss": 3.6482, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1037567084078712, |
| "grad_norm": 12.532264709472656, |
| "learning_rate": 5e-05, |
| "loss": 3.5283, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.10408196454708082, |
| "grad_norm": 5.990050315856934, |
| "learning_rate": 5e-05, |
| "loss": 3.9679, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10440722068629045, |
| "grad_norm": 6.2988667488098145, |
| "learning_rate": 5e-05, |
| "loss": 3.8285, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.10473247682550008, |
| "grad_norm": 8.320550918579102, |
| "learning_rate": 5e-05, |
| "loss": 4.0295, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.10505773296470972, |
| "grad_norm": 6.640725612640381, |
| "learning_rate": 5e-05, |
| "loss": 3.5094, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.10538298910391934, |
| "grad_norm": 6.340143203735352, |
| "learning_rate": 5e-05, |
| "loss": 3.6144, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.10570824524312897, |
| "grad_norm": 7.403520584106445, |
| "learning_rate": 5e-05, |
| "loss": 3.9676, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1060335013823386, |
| "grad_norm": 7.462515354156494, |
| "learning_rate": 5e-05, |
| "loss": 3.7215, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.10635875752154822, |
| "grad_norm": 5.612910747528076, |
| "learning_rate": 5e-05, |
| "loss": 3.5211, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.10668401366075785, |
| "grad_norm": 7.502828598022461, |
| "learning_rate": 5e-05, |
| "loss": 4.0133, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.10700926979996747, |
| "grad_norm": 8.6078462600708, |
| "learning_rate": 5e-05, |
| "loss": 4.1303, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1073345259391771, |
| "grad_norm": 9.176727294921875, |
| "learning_rate": 5e-05, |
| "loss": 3.8978, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.10765978207838672, |
| "grad_norm": 10.041065216064453, |
| "learning_rate": 5e-05, |
| "loss": 4.0287, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.10798503821759636, |
| "grad_norm": 9.741332054138184, |
| "learning_rate": 5e-05, |
| "loss": 3.5463, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.10831029435680599, |
| "grad_norm": 7.8142499923706055, |
| "learning_rate": 5e-05, |
| "loss": 3.7777, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.10863555049601561, |
| "grad_norm": 8.65985107421875, |
| "learning_rate": 5e-05, |
| "loss": 3.7169, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.10896080663522524, |
| "grad_norm": 9.955862998962402, |
| "learning_rate": 5e-05, |
| "loss": 3.4762, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.10928606277443487, |
| "grad_norm": 8.422538757324219, |
| "learning_rate": 5e-05, |
| "loss": 3.8524, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.10961131891364449, |
| "grad_norm": 6.849399089813232, |
| "learning_rate": 5e-05, |
| "loss": 3.5617, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.10993657505285412, |
| "grad_norm": 10.709142684936523, |
| "learning_rate": 5e-05, |
| "loss": 3.8691, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.11026183119206374, |
| "grad_norm": 6.636946678161621, |
| "learning_rate": 5e-05, |
| "loss": 3.9301, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.11058708733127338, |
| "grad_norm": 7.364269256591797, |
| "learning_rate": 5e-05, |
| "loss": 3.485, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11091234347048301, |
| "grad_norm": 12.705086708068848, |
| "learning_rate": 5e-05, |
| "loss": 3.8413, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.11123759960969264, |
| "grad_norm": 6.6930928230285645, |
| "learning_rate": 5e-05, |
| "loss": 3.8912, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.11156285574890226, |
| "grad_norm": 6.823209285736084, |
| "learning_rate": 5e-05, |
| "loss": 4.2082, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.11188811188811189, |
| "grad_norm": 9.133801460266113, |
| "learning_rate": 5e-05, |
| "loss": 4.0229, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.11221336802732151, |
| "grad_norm": 7.471242904663086, |
| "learning_rate": 5e-05, |
| "loss": 3.6206, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11253862416653114, |
| "grad_norm": 7.450990676879883, |
| "learning_rate": 5e-05, |
| "loss": 3.7254, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.11286388030574077, |
| "grad_norm": 7.53968620300293, |
| "learning_rate": 5e-05, |
| "loss": 4.0496, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.11318913644495039, |
| "grad_norm": 12.383916854858398, |
| "learning_rate": 5e-05, |
| "loss": 3.6815, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.11351439258416003, |
| "grad_norm": 8.754898071289062, |
| "learning_rate": 5e-05, |
| "loss": 3.5597, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.11383964872336966, |
| "grad_norm": 7.65074348449707, |
| "learning_rate": 5e-05, |
| "loss": 4.0558, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11416490486257928, |
| "grad_norm": 6.735880374908447, |
| "learning_rate": 5e-05, |
| "loss": 3.5702, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.11449016100178891, |
| "grad_norm": 6.371280670166016, |
| "learning_rate": 5e-05, |
| "loss": 3.7325, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.11481541714099854, |
| "grad_norm": 6.2961745262146, |
| "learning_rate": 5e-05, |
| "loss": 3.8299, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.11514067328020816, |
| "grad_norm": 8.073019027709961, |
| "learning_rate": 5e-05, |
| "loss": 3.77, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.11546592941941779, |
| "grad_norm": 7.552728176116943, |
| "learning_rate": 5e-05, |
| "loss": 3.6974, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.11579118555862741, |
| "grad_norm": 6.595133304595947, |
| "learning_rate": 5e-05, |
| "loss": 3.6737, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.11611644169783705, |
| "grad_norm": 7.287491321563721, |
| "learning_rate": 5e-05, |
| "loss": 3.5135, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.11644169783704668, |
| "grad_norm": 8.068704605102539, |
| "learning_rate": 5e-05, |
| "loss": 3.9036, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1167669539762563, |
| "grad_norm": 6.8040618896484375, |
| "learning_rate": 5e-05, |
| "loss": 3.8034, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.11709221011546593, |
| "grad_norm": 9.113652229309082, |
| "learning_rate": 5e-05, |
| "loss": 3.4709, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.11741746625467556, |
| "grad_norm": 8.15011978149414, |
| "learning_rate": 5e-05, |
| "loss": 4.111, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.11774272239388518, |
| "grad_norm": 6.63869047164917, |
| "learning_rate": 5e-05, |
| "loss": 3.8713, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.11806797853309481, |
| "grad_norm": 6.785707473754883, |
| "learning_rate": 5e-05, |
| "loss": 3.1996, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.11839323467230443, |
| "grad_norm": 7.099983215332031, |
| "learning_rate": 5e-05, |
| "loss": 3.4236, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.11871849081151407, |
| "grad_norm": 7.014822006225586, |
| "learning_rate": 5e-05, |
| "loss": 3.7598, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1190437469507237, |
| "grad_norm": 7.138816833496094, |
| "learning_rate": 5e-05, |
| "loss": 3.6681, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.11936900308993333, |
| "grad_norm": 6.563411235809326, |
| "learning_rate": 5e-05, |
| "loss": 3.82, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.11969425922914295, |
| "grad_norm": 6.389061450958252, |
| "learning_rate": 5e-05, |
| "loss": 3.7305, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.12001951536835258, |
| "grad_norm": 8.009288787841797, |
| "learning_rate": 5e-05, |
| "loss": 3.9673, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1203447715075622, |
| "grad_norm": 6.436244964599609, |
| "learning_rate": 5e-05, |
| "loss": 3.6934, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12067002764677183, |
| "grad_norm": 7.818999767303467, |
| "learning_rate": 5e-05, |
| "loss": 3.8564, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.12099528378598146, |
| "grad_norm": 7.891193866729736, |
| "learning_rate": 5e-05, |
| "loss": 3.9254, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.12132053992519108, |
| "grad_norm": 8.381048202514648, |
| "learning_rate": 5e-05, |
| "loss": 3.8966, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.12164579606440072, |
| "grad_norm": 7.328115940093994, |
| "learning_rate": 5e-05, |
| "loss": 3.4804, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.12197105220361035, |
| "grad_norm": 7.0525922775268555, |
| "learning_rate": 5e-05, |
| "loss": 4.0177, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.12229630834281997, |
| "grad_norm": 7.212350845336914, |
| "learning_rate": 5e-05, |
| "loss": 3.4131, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1226215644820296, |
| "grad_norm": 6.808897972106934, |
| "learning_rate": 5e-05, |
| "loss": 3.5174, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.12294682062123923, |
| "grad_norm": 7.094473838806152, |
| "learning_rate": 5e-05, |
| "loss": 3.7852, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.12327207676044885, |
| "grad_norm": 6.998628616333008, |
| "learning_rate": 5e-05, |
| "loss": 3.8094, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.12359733289965848, |
| "grad_norm": 7.043560028076172, |
| "learning_rate": 5e-05, |
| "loss": 3.6862, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1239225890388681, |
| "grad_norm": 6.198429107666016, |
| "learning_rate": 5e-05, |
| "loss": 3.5462, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.12424784517807774, |
| "grad_norm": 6.5926513671875, |
| "learning_rate": 5e-05, |
| "loss": 3.9851, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.12457310131728737, |
| "grad_norm": 5.893482208251953, |
| "learning_rate": 5e-05, |
| "loss": 3.6685, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.124898357456497, |
| "grad_norm": 5.886164665222168, |
| "learning_rate": 5e-05, |
| "loss": 3.7367, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1252236135957066, |
| "grad_norm": 7.275190353393555, |
| "learning_rate": 5e-05, |
| "loss": 3.4105, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.12554886973491625, |
| "grad_norm": 8.864086151123047, |
| "learning_rate": 5e-05, |
| "loss": 4.0736, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1258741258741259, |
| "grad_norm": 9.517216682434082, |
| "learning_rate": 5e-05, |
| "loss": 3.6961, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1261993820133355, |
| "grad_norm": 8.437984466552734, |
| "learning_rate": 5e-05, |
| "loss": 4.1894, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.12652463815254514, |
| "grad_norm": 9.38377571105957, |
| "learning_rate": 5e-05, |
| "loss": 3.9029, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.12684989429175475, |
| "grad_norm": 8.621910095214844, |
| "learning_rate": 5e-05, |
| "loss": 3.9334, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1271751504309644, |
| "grad_norm": 7.772785186767578, |
| "learning_rate": 5e-05, |
| "loss": 3.4678, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.127500406570174, |
| "grad_norm": 8.752019882202148, |
| "learning_rate": 5e-05, |
| "loss": 3.62, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.12782566270938364, |
| "grad_norm": 7.4593706130981445, |
| "learning_rate": 5e-05, |
| "loss": 3.907, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.12815091884859325, |
| "grad_norm": 7.014523983001709, |
| "learning_rate": 5e-05, |
| "loss": 3.7242, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1284761749878029, |
| "grad_norm": 7.254335403442383, |
| "learning_rate": 5e-05, |
| "loss": 3.8927, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.12880143112701253, |
| "grad_norm": 7.555474281311035, |
| "learning_rate": 5e-05, |
| "loss": 4.3168, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.12912668726622215, |
| "grad_norm": 11.899949073791504, |
| "learning_rate": 5e-05, |
| "loss": 3.6753, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.12945194340543179, |
| "grad_norm": 11.901144027709961, |
| "learning_rate": 5e-05, |
| "loss": 3.7524, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1297771995446414, |
| "grad_norm": 9.584845542907715, |
| "learning_rate": 5e-05, |
| "loss": 3.4015, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.13010245568385104, |
| "grad_norm": 12.348978042602539, |
| "learning_rate": 5e-05, |
| "loss": 3.6966, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13042771182306065, |
| "grad_norm": 12.886831283569336, |
| "learning_rate": 5e-05, |
| "loss": 4.5981, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1307529679622703, |
| "grad_norm": 7.066255569458008, |
| "learning_rate": 5e-05, |
| "loss": 3.6408, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.13107822410147993, |
| "grad_norm": 7.1310014724731445, |
| "learning_rate": 5e-05, |
| "loss": 3.7652, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.13140348024068954, |
| "grad_norm": 7.658654689788818, |
| "learning_rate": 5e-05, |
| "loss": 3.9908, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.13172873637989918, |
| "grad_norm": 9.139669418334961, |
| "learning_rate": 5e-05, |
| "loss": 3.7055, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.1320539925191088, |
| "grad_norm": 7.406591892242432, |
| "learning_rate": 5e-05, |
| "loss": 3.9435, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.13237924865831843, |
| "grad_norm": 7.888886451721191, |
| "learning_rate": 5e-05, |
| "loss": 3.7047, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.13270450479752804, |
| "grad_norm": 6.58457088470459, |
| "learning_rate": 5e-05, |
| "loss": 3.5882, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.13302976093673768, |
| "grad_norm": 6.361485958099365, |
| "learning_rate": 5e-05, |
| "loss": 3.6571, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1333550170759473, |
| "grad_norm": 10.977415084838867, |
| "learning_rate": 5e-05, |
| "loss": 3.9192, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13368027321515694, |
| "grad_norm": 8.509581565856934, |
| "learning_rate": 5e-05, |
| "loss": 3.7566, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.13400552935436658, |
| "grad_norm": 7.781307220458984, |
| "learning_rate": 5e-05, |
| "loss": 4.1816, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1343307854935762, |
| "grad_norm": 7.275979518890381, |
| "learning_rate": 5e-05, |
| "loss": 3.2031, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.13465604163278583, |
| "grad_norm": 7.543152332305908, |
| "learning_rate": 5e-05, |
| "loss": 3.9525, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.13498129777199544, |
| "grad_norm": 9.093851089477539, |
| "learning_rate": 5e-05, |
| "loss": 3.9126, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.13530655391120508, |
| "grad_norm": 12.760071754455566, |
| "learning_rate": 5e-05, |
| "loss": 3.7383, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1356318100504147, |
| "grad_norm": 8.025795936584473, |
| "learning_rate": 5e-05, |
| "loss": 3.5959, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.13595706618962433, |
| "grad_norm": 7.363426685333252, |
| "learning_rate": 5e-05, |
| "loss": 3.7238, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.13628232232883394, |
| "grad_norm": 8.554017066955566, |
| "learning_rate": 5e-05, |
| "loss": 3.3142, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.13660757846804358, |
| "grad_norm": 7.679929256439209, |
| "learning_rate": 5e-05, |
| "loss": 3.6632, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.13693283460725322, |
| "grad_norm": 7.977594375610352, |
| "learning_rate": 5e-05, |
| "loss": 3.3469, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.13725809074646284, |
| "grad_norm": 7.612384796142578, |
| "learning_rate": 5e-05, |
| "loss": 3.6385, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.13758334688567248, |
| "grad_norm": 9.067974090576172, |
| "learning_rate": 5e-05, |
| "loss": 3.7935, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.1379086030248821, |
| "grad_norm": 9.513571739196777, |
| "learning_rate": 5e-05, |
| "loss": 4.0498, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.13823385916409173, |
| "grad_norm": 10.234723091125488, |
| "learning_rate": 5e-05, |
| "loss": 3.9196, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.13855911530330134, |
| "grad_norm": 8.635189056396484, |
| "learning_rate": 5e-05, |
| "loss": 3.8482, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.13888437144251098, |
| "grad_norm": 10.970733642578125, |
| "learning_rate": 5e-05, |
| "loss": 3.765, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.13920962758172062, |
| "grad_norm": 9.9661226272583, |
| "learning_rate": 5e-05, |
| "loss": 3.939, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.13953488372093023, |
| "grad_norm": 9.381548881530762, |
| "learning_rate": 5e-05, |
| "loss": 3.749, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.13986013986013987, |
| "grad_norm": 8.287264823913574, |
| "learning_rate": 5e-05, |
| "loss": 3.7818, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14018539599934948, |
| "grad_norm": 8.97390365600586, |
| "learning_rate": 5e-05, |
| "loss": 4.0317, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.14051065213855912, |
| "grad_norm": 7.877195835113525, |
| "learning_rate": 5e-05, |
| "loss": 3.5584, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.14083590827776873, |
| "grad_norm": 9.581697463989258, |
| "learning_rate": 5e-05, |
| "loss": 3.4095, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.14116116441697837, |
| "grad_norm": 17.695627212524414, |
| "learning_rate": 5e-05, |
| "loss": 4.3531, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.141486420556188, |
| "grad_norm": 8.333785057067871, |
| "learning_rate": 5e-05, |
| "loss": 3.3827, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.14181167669539763, |
| "grad_norm": 7.970407009124756, |
| "learning_rate": 5e-05, |
| "loss": 3.5483, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.14213693283460727, |
| "grad_norm": 9.061053276062012, |
| "learning_rate": 5e-05, |
| "loss": 3.7439, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.14246218897381688, |
| "grad_norm": 6.50039005279541, |
| "learning_rate": 5e-05, |
| "loss": 3.9181, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.14278744511302652, |
| "grad_norm": 9.928549766540527, |
| "learning_rate": 5e-05, |
| "loss": 3.5832, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.14311270125223613, |
| "grad_norm": 12.250447273254395, |
| "learning_rate": 5e-05, |
| "loss": 3.4639, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14343795739144577, |
| "grad_norm": 8.427464485168457, |
| "learning_rate": 5e-05, |
| "loss": 3.4289, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.14376321353065538, |
| "grad_norm": 12.150249481201172, |
| "learning_rate": 5e-05, |
| "loss": 3.8382, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.14408846966986502, |
| "grad_norm": 7.406175136566162, |
| "learning_rate": 5e-05, |
| "loss": 3.9814, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.14441372580907463, |
| "grad_norm": 6.988471031188965, |
| "learning_rate": 5e-05, |
| "loss": 4.0547, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.14473898194828427, |
| "grad_norm": 12.318148612976074, |
| "learning_rate": 5e-05, |
| "loss": 3.9433, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.1450642380874939, |
| "grad_norm": 10.347890853881836, |
| "learning_rate": 5e-05, |
| "loss": 3.7052, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.14538949422670353, |
| "grad_norm": 9.213937759399414, |
| "learning_rate": 5e-05, |
| "loss": 3.9906, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.14571475036591316, |
| "grad_norm": 10.614618301391602, |
| "learning_rate": 5e-05, |
| "loss": 3.276, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.14604000650512278, |
| "grad_norm": 11.268338203430176, |
| "learning_rate": 5e-05, |
| "loss": 3.5563, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.14636526264433242, |
| "grad_norm": 8.418384552001953, |
| "learning_rate": 5e-05, |
| "loss": 3.4222, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.14669051878354203, |
| "grad_norm": 12.904454231262207, |
| "learning_rate": 5e-05, |
| "loss": 3.7737, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.14701577492275167, |
| "grad_norm": 9.767146110534668, |
| "learning_rate": 5e-05, |
| "loss": 4.0424, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.1473410310619613, |
| "grad_norm": 8.333344459533691, |
| "learning_rate": 5e-05, |
| "loss": 3.7574, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.14766628720117092, |
| "grad_norm": 11.907800674438477, |
| "learning_rate": 5e-05, |
| "loss": 3.7266, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.14799154334038056, |
| "grad_norm": 9.306441307067871, |
| "learning_rate": 5e-05, |
| "loss": 3.8885, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.14831679947959017, |
| "grad_norm": 10.486589431762695, |
| "learning_rate": 5e-05, |
| "loss": 3.7777, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.1486420556187998, |
| "grad_norm": 12.291946411132812, |
| "learning_rate": 5e-05, |
| "loss": 3.6718, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.14896731175800942, |
| "grad_norm": 12.743392944335938, |
| "learning_rate": 5e-05, |
| "loss": 3.6533, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.14929256789721906, |
| "grad_norm": 9.82790470123291, |
| "learning_rate": 5e-05, |
| "loss": 3.8719, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.14961782403642868, |
| "grad_norm": 11.749984741210938, |
| "learning_rate": 5e-05, |
| "loss": 3.495, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.14994308017563832, |
| "grad_norm": 11.35322380065918, |
| "learning_rate": 5e-05, |
| "loss": 4.0119, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.15026833631484796, |
| "grad_norm": 7.1181159019470215, |
| "learning_rate": 5e-05, |
| "loss": 3.6675, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.15059359245405757, |
| "grad_norm": 11.3037109375, |
| "learning_rate": 5e-05, |
| "loss": 3.5125, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1509188485932672, |
| "grad_norm": 10.003439903259277, |
| "learning_rate": 5e-05, |
| "loss": 3.2109, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.15124410473247682, |
| "grad_norm": 7.561882019042969, |
| "learning_rate": 5e-05, |
| "loss": 3.566, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.15156936087168646, |
| "grad_norm": 7.8044586181640625, |
| "learning_rate": 5e-05, |
| "loss": 3.5393, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.15189461701089607, |
| "grad_norm": 11.766450881958008, |
| "learning_rate": 5e-05, |
| "loss": 3.7168, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1522198731501057, |
| "grad_norm": 7.827965259552002, |
| "learning_rate": 5e-05, |
| "loss": 3.7292, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.15254512928931532, |
| "grad_norm": 9.4542236328125, |
| "learning_rate": 5e-05, |
| "loss": 3.4709, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.15287038542852496, |
| "grad_norm": 8.595137596130371, |
| "learning_rate": 5e-05, |
| "loss": 4.3444, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1531956415677346, |
| "grad_norm": 8.952139854431152, |
| "learning_rate": 5e-05, |
| "loss": 4.3924, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.15352089770694421, |
| "grad_norm": 9.41843318939209, |
| "learning_rate": 5e-05, |
| "loss": 3.7794, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 7.201237201690674, |
| "learning_rate": 5e-05, |
| "loss": 3.3527, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.15417140998536347, |
| "grad_norm": 9.496088981628418, |
| "learning_rate": 5e-05, |
| "loss": 3.3554, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.1544966661245731, |
| "grad_norm": 8.370413780212402, |
| "learning_rate": 5e-05, |
| "loss": 3.8943, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.15482192226378272, |
| "grad_norm": 8.910099029541016, |
| "learning_rate": 5e-05, |
| "loss": 3.5867, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.15514717840299236, |
| "grad_norm": 7.085155963897705, |
| "learning_rate": 5e-05, |
| "loss": 3.3267, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.15547243454220197, |
| "grad_norm": 8.49357795715332, |
| "learning_rate": 5e-05, |
| "loss": 3.5925, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.1557976906814116, |
| "grad_norm": 7.811654567718506, |
| "learning_rate": 5e-05, |
| "loss": 3.9464, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.15612294682062125, |
| "grad_norm": 9.111326217651367, |
| "learning_rate": 5e-05, |
| "loss": 3.5547, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.15644820295983086, |
| "grad_norm": 7.44144344329834, |
| "learning_rate": 5e-05, |
| "loss": 3.9226, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1567734590990405, |
| "grad_norm": 9.64633846282959, |
| "learning_rate": 5e-05, |
| "loss": 3.887, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.15709871523825011, |
| "grad_norm": 11.60362720489502, |
| "learning_rate": 5e-05, |
| "loss": 3.7125, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.15742397137745975, |
| "grad_norm": 7.746142864227295, |
| "learning_rate": 5e-05, |
| "loss": 3.9722, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.15774922751666937, |
| "grad_norm": 7.080160140991211, |
| "learning_rate": 5e-05, |
| "loss": 3.1951, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.158074483655879, |
| "grad_norm": 17.081741333007812, |
| "learning_rate": 5e-05, |
| "loss": 3.7901, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.15839973979508865, |
| "grad_norm": 7.917695999145508, |
| "learning_rate": 5e-05, |
| "loss": 3.5357, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.15872499593429826, |
| "grad_norm": 8.854647636413574, |
| "learning_rate": 5e-05, |
| "loss": 3.7515, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.1590502520735079, |
| "grad_norm": 9.028191566467285, |
| "learning_rate": 5e-05, |
| "loss": 3.5516, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.1593755082127175, |
| "grad_norm": 10.008574485778809, |
| "learning_rate": 5e-05, |
| "loss": 3.6189, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.15970076435192715, |
| "grad_norm": 10.596325874328613, |
| "learning_rate": 5e-05, |
| "loss": 3.4306, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.16002602049113676, |
| "grad_norm": 12.258661270141602, |
| "learning_rate": 5e-05, |
| "loss": 3.5577, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.1603512766303464, |
| "grad_norm": 11.506673812866211, |
| "learning_rate": 5e-05, |
| "loss": 3.9079, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.160676532769556, |
| "grad_norm": 7.967488765716553, |
| "learning_rate": 5e-05, |
| "loss": 3.6066, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.16100178890876565, |
| "grad_norm": 11.729570388793945, |
| "learning_rate": 5e-05, |
| "loss": 3.9043, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1613270450479753, |
| "grad_norm": 8.846243858337402, |
| "learning_rate": 5e-05, |
| "loss": 3.8228, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.1616523011871849, |
| "grad_norm": 12.016153335571289, |
| "learning_rate": 5e-05, |
| "loss": 3.9159, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.16197755732639454, |
| "grad_norm": 8.154533386230469, |
| "learning_rate": 5e-05, |
| "loss": 3.7158, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.16230281346560416, |
| "grad_norm": 11.766820907592773, |
| "learning_rate": 5e-05, |
| "loss": 3.5189, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.1626280696048138, |
| "grad_norm": 11.467671394348145, |
| "learning_rate": 5e-05, |
| "loss": 3.8887, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1629533257440234, |
| "grad_norm": 7.477685451507568, |
| "learning_rate": 5e-05, |
| "loss": 4.0159, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.16327858188323305, |
| "grad_norm": 6.95028018951416, |
| "learning_rate": 5e-05, |
| "loss": 3.7446, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.16360383802244266, |
| "grad_norm": 11.298054695129395, |
| "learning_rate": 5e-05, |
| "loss": 3.692, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.1639290941616523, |
| "grad_norm": 7.807789325714111, |
| "learning_rate": 5e-05, |
| "loss": 3.6819, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.16425435030086194, |
| "grad_norm": 7.94120454788208, |
| "learning_rate": 5e-05, |
| "loss": 3.6853, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.16457960644007155, |
| "grad_norm": 8.74360466003418, |
| "learning_rate": 5e-05, |
| "loss": 3.5674, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1649048625792812, |
| "grad_norm": 8.877306938171387, |
| "learning_rate": 5e-05, |
| "loss": 3.5393, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1652301187184908, |
| "grad_norm": 7.502864837646484, |
| "learning_rate": 5e-05, |
| "loss": 3.6376, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.16555537485770044, |
| "grad_norm": 11.325250625610352, |
| "learning_rate": 5e-05, |
| "loss": 3.7114, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.16588063099691006, |
| "grad_norm": 9.918580055236816, |
| "learning_rate": 5e-05, |
| "loss": 3.783, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1662058871361197, |
| "grad_norm": 8.940899848937988, |
| "learning_rate": 5e-05, |
| "loss": 3.707, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.16653114327532934, |
| "grad_norm": 17.020418167114258, |
| "learning_rate": 5e-05, |
| "loss": 4.319, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.16685639941453895, |
| "grad_norm": 10.722935676574707, |
| "learning_rate": 5e-05, |
| "loss": 3.4215, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.1671816555537486, |
| "grad_norm": 9.579489707946777, |
| "learning_rate": 5e-05, |
| "loss": 2.835, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.1675069116929582, |
| "grad_norm": 8.158295631408691, |
| "learning_rate": 5e-05, |
| "loss": 2.78, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.16783216783216784, |
| "grad_norm": 8.238765716552734, |
| "learning_rate": 5e-05, |
| "loss": 3.4628, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.16815742397137745, |
| "grad_norm": 8.580034255981445, |
| "learning_rate": 5e-05, |
| "loss": 3.9686, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1684826801105871, |
| "grad_norm": 8.966435432434082, |
| "learning_rate": 5e-05, |
| "loss": 3.4936, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1688079362497967, |
| "grad_norm": 6.970677375793457, |
| "learning_rate": 5e-05, |
| "loss": 3.4287, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.16913319238900634, |
| "grad_norm": 7.750168323516846, |
| "learning_rate": 5e-05, |
| "loss": 3.3396, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.16945844852821598, |
| "grad_norm": 7.394572734832764, |
| "learning_rate": 5e-05, |
| "loss": 3.6622, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.1697837046674256, |
| "grad_norm": 6.781547546386719, |
| "learning_rate": 5e-05, |
| "loss": 3.5704, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.17010896080663523, |
| "grad_norm": 11.887150764465332, |
| "learning_rate": 5e-05, |
| "loss": 3.4968, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.17043421694584485, |
| "grad_norm": 7.665548324584961, |
| "learning_rate": 5e-05, |
| "loss": 3.6475, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.1707594730850545, |
| "grad_norm": 8.977829933166504, |
| "learning_rate": 5e-05, |
| "loss": 2.6397, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.1710847292242641, |
| "grad_norm": 9.76028060913086, |
| "learning_rate": 5e-05, |
| "loss": 3.6625, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.17140998536347374, |
| "grad_norm": 9.248437881469727, |
| "learning_rate": 5e-05, |
| "loss": 3.5199, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.17173524150268335, |
| "grad_norm": 9.724522590637207, |
| "learning_rate": 5e-05, |
| "loss": 3.7095, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.172060497641893, |
| "grad_norm": 8.21666431427002, |
| "learning_rate": 5e-05, |
| "loss": 3.6802, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.17238575378110263, |
| "grad_norm": 7.362802505493164, |
| "learning_rate": 5e-05, |
| "loss": 3.6511, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17271100992031224, |
| "grad_norm": 7.2599334716796875, |
| "learning_rate": 5e-05, |
| "loss": 3.1097, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.17303626605952188, |
| "grad_norm": 9.250597953796387, |
| "learning_rate": 5e-05, |
| "loss": 3.608, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.1733615221987315, |
| "grad_norm": 8.88681411743164, |
| "learning_rate": 5e-05, |
| "loss": 3.8212, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.17368677833794113, |
| "grad_norm": 11.290247917175293, |
| "learning_rate": 5e-05, |
| "loss": 3.3286, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.17401203447715075, |
| "grad_norm": 10.88404369354248, |
| "learning_rate": 5e-05, |
| "loss": 3.5241, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.17433729061636039, |
| "grad_norm": 9.20648193359375, |
| "learning_rate": 5e-05, |
| "loss": 3.6005, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.17466254675557003, |
| "grad_norm": 8.10119342803955, |
| "learning_rate": 5e-05, |
| "loss": 3.5739, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.17498780289477964, |
| "grad_norm": 10.661712646484375, |
| "learning_rate": 5e-05, |
| "loss": 3.4837, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.17531305903398928, |
| "grad_norm": 10.739490509033203, |
| "learning_rate": 5e-05, |
| "loss": 3.6787, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.1756383151731989, |
| "grad_norm": 9.183284759521484, |
| "learning_rate": 5e-05, |
| "loss": 3.6174, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.17596357131240853, |
| "grad_norm": 8.37816047668457, |
| "learning_rate": 5e-05, |
| "loss": 3.5964, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.17628882745161814, |
| "grad_norm": 10.69176959991455, |
| "learning_rate": 5e-05, |
| "loss": 3.8804, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.17661408359082778, |
| "grad_norm": 8.007463455200195, |
| "learning_rate": 5e-05, |
| "loss": 3.8485, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.1769393397300374, |
| "grad_norm": 8.839487075805664, |
| "learning_rate": 5e-05, |
| "loss": 3.6356, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.17726459586924703, |
| "grad_norm": 11.773893356323242, |
| "learning_rate": 5e-05, |
| "loss": 3.8846, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.17758985200845667, |
| "grad_norm": 11.021324157714844, |
| "learning_rate": 5e-05, |
| "loss": 3.8369, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.17791510814766628, |
| "grad_norm": 10.33259391784668, |
| "learning_rate": 5e-05, |
| "loss": 3.392, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.17824036428687592, |
| "grad_norm": 8.52408504486084, |
| "learning_rate": 5e-05, |
| "loss": 3.8102, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.17856562042608554, |
| "grad_norm": 9.792526245117188, |
| "learning_rate": 5e-05, |
| "loss": 3.9087, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.17889087656529518, |
| "grad_norm": 9.989365577697754, |
| "learning_rate": 5e-05, |
| "loss": 3.8268, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1792161327045048, |
| "grad_norm": 9.42458438873291, |
| "learning_rate": 5e-05, |
| "loss": 3.7863, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.17954138884371443, |
| "grad_norm": 8.195592880249023, |
| "learning_rate": 5e-05, |
| "loss": 3.7507, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.17986664498292404, |
| "grad_norm": 9.999628067016602, |
| "learning_rate": 5e-05, |
| "loss": 3.742, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.18019190112213368, |
| "grad_norm": 8.54041576385498, |
| "learning_rate": 5e-05, |
| "loss": 4.4209, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.18051715726134332, |
| "grad_norm": 8.870122909545898, |
| "learning_rate": 5e-05, |
| "loss": 3.2489, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.18084241340055293, |
| "grad_norm": 10.103729248046875, |
| "learning_rate": 5e-05, |
| "loss": 3.5182, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.18116766953976257, |
| "grad_norm": 9.698139190673828, |
| "learning_rate": 5e-05, |
| "loss": 3.7934, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.18149292567897218, |
| "grad_norm": 9.132758140563965, |
| "learning_rate": 5e-05, |
| "loss": 3.3561, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 8.661752700805664, |
| "learning_rate": 5e-05, |
| "loss": 3.5775, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.18214343795739144, |
| "grad_norm": 9.332806587219238, |
| "learning_rate": 5e-05, |
| "loss": 3.0548, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18246869409660108, |
| "grad_norm": 9.24280071258545, |
| "learning_rate": 5e-05, |
| "loss": 4.0208, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.1827939502358107, |
| "grad_norm": 9.924572944641113, |
| "learning_rate": 5e-05, |
| "loss": 3.6914, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.18311920637502033, |
| "grad_norm": 9.362936973571777, |
| "learning_rate": 5e-05, |
| "loss": 3.9219, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.18344446251422997, |
| "grad_norm": 12.603287696838379, |
| "learning_rate": 5e-05, |
| "loss": 3.4585, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.18376971865343958, |
| "grad_norm": 10.904036521911621, |
| "learning_rate": 5e-05, |
| "loss": 3.3266, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.18409497479264922, |
| "grad_norm": 9.598895072937012, |
| "learning_rate": 5e-05, |
| "loss": 3.6607, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.18442023093185883, |
| "grad_norm": 8.842279434204102, |
| "learning_rate": 5e-05, |
| "loss": 3.5674, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.18474548707106847, |
| "grad_norm": 9.379899024963379, |
| "learning_rate": 5e-05, |
| "loss": 3.6743, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.18507074321027808, |
| "grad_norm": 9.745834350585938, |
| "learning_rate": 5e-05, |
| "loss": 4.0652, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.18539599934948772, |
| "grad_norm": 8.990086555480957, |
| "learning_rate": 5e-05, |
| "loss": 3.637, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.18572125548869736, |
| "grad_norm": 8.382301330566406, |
| "learning_rate": 5e-05, |
| "loss": 3.967, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 8.533965110778809, |
| "learning_rate": 5e-05, |
| "loss": 3.4941, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.18637176776711661, |
| "grad_norm": 9.823786735534668, |
| "learning_rate": 5e-05, |
| "loss": 3.7345, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.18669702390632623, |
| "grad_norm": 7.766260147094727, |
| "learning_rate": 5e-05, |
| "loss": 3.6831, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.18702228004553587, |
| "grad_norm": 8.095032691955566, |
| "learning_rate": 5e-05, |
| "loss": 3.4701, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.18734753618474548, |
| "grad_norm": 11.641885757446289, |
| "learning_rate": 5e-05, |
| "loss": 4.0934, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.18767279232395512, |
| "grad_norm": 9.155062675476074, |
| "learning_rate": 5e-05, |
| "loss": 3.5356, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.18799804846316473, |
| "grad_norm": 8.703105926513672, |
| "learning_rate": 5e-05, |
| "loss": 3.8144, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.18832330460237437, |
| "grad_norm": 9.528350830078125, |
| "learning_rate": 5e-05, |
| "loss": 3.2073, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.188648560741584, |
| "grad_norm": 9.156220436096191, |
| "learning_rate": 5e-05, |
| "loss": 3.777, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.18897381688079362, |
| "grad_norm": 8.443305015563965, |
| "learning_rate": 5e-05, |
| "loss": 3.7239, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.18929907302000326, |
| "grad_norm": 7.838225841522217, |
| "learning_rate": 5e-05, |
| "loss": 3.4467, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.18962432915921287, |
| "grad_norm": 7.3834757804870605, |
| "learning_rate": 5e-05, |
| "loss": 3.5151, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.1899495852984225, |
| "grad_norm": 9.460673332214355, |
| "learning_rate": 5e-05, |
| "loss": 3.4287, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.19027484143763213, |
| "grad_norm": 8.232035636901855, |
| "learning_rate": 5e-05, |
| "loss": 4.2533, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.19060009757684176, |
| "grad_norm": 12.586129188537598, |
| "learning_rate": 5e-05, |
| "loss": 3.7747, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.19092535371605138, |
| "grad_norm": 8.150300979614258, |
| "learning_rate": 5e-05, |
| "loss": 3.9724, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.19125060985526102, |
| "grad_norm": 8.529426574707031, |
| "learning_rate": 5e-05, |
| "loss": 4.6377, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.19157586599447066, |
| "grad_norm": 7.794090747833252, |
| "learning_rate": 5e-05, |
| "loss": 3.2597, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.19190112213368027, |
| "grad_norm": 8.038799285888672, |
| "learning_rate": 5e-05, |
| "loss": 3.9826, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1922263782728899, |
| "grad_norm": 7.855754852294922, |
| "learning_rate": 5e-05, |
| "loss": 3.7777, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.19255163441209952, |
| "grad_norm": 8.140380859375, |
| "learning_rate": 5e-05, |
| "loss": 3.443, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.19287689055130916, |
| "grad_norm": 9.79352855682373, |
| "learning_rate": 5e-05, |
| "loss": 3.7066, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.19320214669051877, |
| "grad_norm": 13.119344711303711, |
| "learning_rate": 5e-05, |
| "loss": 3.9336, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.1935274028297284, |
| "grad_norm": 8.955309867858887, |
| "learning_rate": 5e-05, |
| "loss": 3.3643, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.19385265896893805, |
| "grad_norm": 8.016314506530762, |
| "learning_rate": 5e-05, |
| "loss": 3.7039, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.19417791510814766, |
| "grad_norm": 8.888792991638184, |
| "learning_rate": 5e-05, |
| "loss": 3.6356, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.1945031712473573, |
| "grad_norm": 9.017245292663574, |
| "learning_rate": 5e-05, |
| "loss": 3.6173, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.19482842738656692, |
| "grad_norm": 9.05187702178955, |
| "learning_rate": 5e-05, |
| "loss": 3.7156, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.19515368352577656, |
| "grad_norm": 8.980157852172852, |
| "learning_rate": 5e-05, |
| "loss": 3.5519, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2792420391095808.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|