| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 12.698412698412698, |
| "eval_steps": 500, |
| "global_step": 800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 0.42645490169525146, |
| "learning_rate": 4.999720254525684e-05, |
| "loss": 1.3067, |
| "num_input_tokens_seen": 269280, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 0.10797163844108582, |
| "learning_rate": 4.9987533135093934e-05, |
| "loss": 0.2064, |
| "num_input_tokens_seen": 536656, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.10832954943180084, |
| "learning_rate": 4.997095990396411e-05, |
| "loss": 0.2025, |
| "num_input_tokens_seen": 804720, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.11103782057762146, |
| "learning_rate": 4.994748743089566e-05, |
| "loss": 0.2011, |
| "num_input_tokens_seen": 1073520, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 0.09958792477846146, |
| "learning_rate": 4.9917122201112656e-05, |
| "loss": 0.2028, |
| "num_input_tokens_seen": 1341184, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.6581681370735168, |
| "learning_rate": 4.9879872604243184e-05, |
| "loss": 0.1993, |
| "num_input_tokens_seen": 1609968, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.2799030542373657, |
| "learning_rate": 4.983574893200139e-05, |
| "loss": 0.1979, |
| "num_input_tokens_seen": 1878240, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.2698412698412698, |
| "grad_norm": 0.17586013674736023, |
| "learning_rate": 4.978476337534393e-05, |
| "loss": 0.1931, |
| "num_input_tokens_seen": 2146528, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.24899278581142426, |
| "learning_rate": 4.972693002110176e-05, |
| "loss": 0.1931, |
| "num_input_tokens_seen": 2415696, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 0.16181747615337372, |
| "learning_rate": 4.9662264848088034e-05, |
| "loss": 0.192, |
| "num_input_tokens_seen": 2683600, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.746031746031746, |
| "grad_norm": 0.18402352929115295, |
| "learning_rate": 4.959078572268337e-05, |
| "loss": 0.1874, |
| "num_input_tokens_seen": 2950720, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.2943824827671051, |
| "learning_rate": 4.951251239389948e-05, |
| "loss": 0.1871, |
| "num_input_tokens_seen": 3219792, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.0634920634920633, |
| "grad_norm": 0.18450967967510223, |
| "learning_rate": 4.942746648792274e-05, |
| "loss": 0.1887, |
| "num_input_tokens_seen": 3488400, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.2516356408596039, |
| "learning_rate": 4.9335671502139024e-05, |
| "loss": 0.1876, |
| "num_input_tokens_seen": 3757952, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.23607608675956726, |
| "learning_rate": 4.9237152798641696e-05, |
| "loss": 0.1843, |
| "num_input_tokens_seen": 4025536, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.5396825396825395, |
| "grad_norm": 0.1812293380498886, |
| "learning_rate": 4.9131937597224185e-05, |
| "loss": 0.1791, |
| "num_input_tokens_seen": 4294240, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.6984126984126986, |
| "grad_norm": 0.1874535083770752, |
| "learning_rate": 4.902005496785951e-05, |
| "loss": 0.1851, |
| "num_input_tokens_seen": 4563376, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.25721630454063416, |
| "learning_rate": 4.8901535822668446e-05, |
| "loss": 0.1836, |
| "num_input_tokens_seen": 4831168, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.015873015873016, |
| "grad_norm": 0.22797122597694397, |
| "learning_rate": 4.877641290737884e-05, |
| "loss": 0.1834, |
| "num_input_tokens_seen": 5098496, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.1746031746031744, |
| "grad_norm": 0.16337507963180542, |
| "learning_rate": 4.8644720792278264e-05, |
| "loss": 0.186, |
| "num_input_tokens_seen": 5368864, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.17769697308540344, |
| "learning_rate": 4.850649586266255e-05, |
| "loss": 0.1803, |
| "num_input_tokens_seen": 5637456, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.492063492063492, |
| "grad_norm": 0.2481444925069809, |
| "learning_rate": 4.836177630878289e-05, |
| "loss": 0.1798, |
| "num_input_tokens_seen": 5905104, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.6507936507936507, |
| "grad_norm": 0.22745923697948456, |
| "learning_rate": 4.821060211529424e-05, |
| "loss": 0.1815, |
| "num_input_tokens_seen": 6174032, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.8095238095238093, |
| "grad_norm": 0.16727988421916962, |
| "learning_rate": 4.8053015050207915e-05, |
| "loss": 0.1811, |
| "num_input_tokens_seen": 6442896, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.9682539682539684, |
| "grad_norm": 0.3471706807613373, |
| "learning_rate": 4.7889058653351485e-05, |
| "loss": 0.1795, |
| "num_input_tokens_seen": 6710352, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.1269841269841265, |
| "grad_norm": 0.23989547789096832, |
| "learning_rate": 4.771877822433911e-05, |
| "loss": 0.1769, |
| "num_input_tokens_seen": 6977744, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.23704519867897034, |
| "learning_rate": 4.754222081005574e-05, |
| "loss": 0.174, |
| "num_input_tokens_seen": 7246272, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.2272966355085373, |
| "learning_rate": 4.7359435191658425e-05, |
| "loss": 0.1716, |
| "num_input_tokens_seen": 7512592, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.603174603174603, |
| "grad_norm": 0.23121878504753113, |
| "learning_rate": 4.717047187109861e-05, |
| "loss": 0.1804, |
| "num_input_tokens_seen": 7780144, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 0.31674066185951233, |
| "learning_rate": 4.697538305716885e-05, |
| "loss": 0.1784, |
| "num_input_tokens_seen": 8049392, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.920634920634921, |
| "grad_norm": 0.2399132400751114, |
| "learning_rate": 4.6774222651078106e-05, |
| "loss": 0.1796, |
| "num_input_tokens_seen": 8316912, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.079365079365079, |
| "grad_norm": 0.2677905261516571, |
| "learning_rate": 4.656704623155922e-05, |
| "loss": 0.1736, |
| "num_input_tokens_seen": 8586544, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.238095238095238, |
| "grad_norm": 0.33959662914276123, |
| "learning_rate": 4.6353911039513145e-05, |
| "loss": 0.1766, |
| "num_input_tokens_seen": 8855680, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.396825396825397, |
| "grad_norm": 0.26891693472862244, |
| "learning_rate": 4.613487596219376e-05, |
| "loss": 0.1724, |
| "num_input_tokens_seen": 9123808, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.2796987295150757, |
| "learning_rate": 4.591000151693789e-05, |
| "loss": 0.1721, |
| "num_input_tokens_seen": 9392560, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.257348895072937, |
| "learning_rate": 4.567934983444495e-05, |
| "loss": 0.1718, |
| "num_input_tokens_seen": 9660480, |
| "step": 360 |
| }, |
| { |
| "epoch": 5.8730158730158735, |
| "grad_norm": 0.2910774052143097, |
| "learning_rate": 4.544298464161079e-05, |
| "loss": 0.1718, |
| "num_input_tokens_seen": 9927936, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.031746031746032, |
| "grad_norm": 0.3452795445919037, |
| "learning_rate": 4.520097124392055e-05, |
| "loss": 0.1711, |
| "num_input_tokens_seen": 10197520, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.190476190476191, |
| "grad_norm": 0.46368861198425293, |
| "learning_rate": 4.49533765074054e-05, |
| "loss": 0.1652, |
| "num_input_tokens_seen": 10466240, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.349206349206349, |
| "grad_norm": 0.42205390334129333, |
| "learning_rate": 4.4700268840168045e-05, |
| "loss": 0.1677, |
| "num_input_tokens_seen": 10734496, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.507936507936508, |
| "grad_norm": 0.25223520398139954, |
| "learning_rate": 4.444171817348225e-05, |
| "loss": 0.1684, |
| "num_input_tokens_seen": 11004416, |
| "step": 410 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.4380488991737366, |
| "learning_rate": 4.417779594247143e-05, |
| "loss": 0.1655, |
| "num_input_tokens_seen": 11272656, |
| "step": 420 |
| }, |
| { |
| "epoch": 6.825396825396825, |
| "grad_norm": 0.2701490819454193, |
| "learning_rate": 4.3908575066371835e-05, |
| "loss": 0.1722, |
| "num_input_tokens_seen": 11540112, |
| "step": 430 |
| }, |
| { |
| "epoch": 6.984126984126984, |
| "grad_norm": 0.3422671854496002, |
| "learning_rate": 4.363412992838566e-05, |
| "loss": 0.1676, |
| "num_input_tokens_seen": 11808816, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.6143015623092651, |
| "learning_rate": 4.335453635512961e-05, |
| "loss": 0.1538, |
| "num_input_tokens_seen": 12077648, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.301587301587301, |
| "grad_norm": 0.44244784116744995, |
| "learning_rate": 4.306987159568479e-05, |
| "loss": 0.1572, |
| "num_input_tokens_seen": 12346240, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.4603174603174605, |
| "grad_norm": 0.441853404045105, |
| "learning_rate": 4.278021430025343e-05, |
| "loss": 0.1587, |
| "num_input_tokens_seen": 12614864, |
| "step": 470 |
| }, |
| { |
| "epoch": 7.619047619047619, |
| "grad_norm": 0.520702600479126, |
| "learning_rate": 4.248564449842864e-05, |
| "loss": 0.1616, |
| "num_input_tokens_seen": 12883088, |
| "step": 480 |
| }, |
| { |
| "epoch": 7.777777777777778, |
| "grad_norm": 0.473958283662796, |
| "learning_rate": 4.2186243577082954e-05, |
| "loss": 0.1602, |
| "num_input_tokens_seen": 13151264, |
| "step": 490 |
| }, |
| { |
| "epoch": 7.936507936507937, |
| "grad_norm": 0.4550235867500305, |
| "learning_rate": 4.1882094257881885e-05, |
| "loss": 0.1597, |
| "num_input_tokens_seen": 13419344, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.095238095238095, |
| "grad_norm": 0.7338590025901794, |
| "learning_rate": 4.157328057442874e-05, |
| "loss": 0.1473, |
| "num_input_tokens_seen": 13686752, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.253968253968253, |
| "grad_norm": 0.6510297060012817, |
| "learning_rate": 4.1259887849046906e-05, |
| "loss": 0.1363, |
| "num_input_tokens_seen": 13954352, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.412698412698413, |
| "grad_norm": 0.767859160900116, |
| "learning_rate": 4.0942002669206085e-05, |
| "loss": 0.1408, |
| "num_input_tokens_seen": 14222352, |
| "step": 530 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.7285030484199524, |
| "learning_rate": 4.0619712863599e-05, |
| "loss": 0.1422, |
| "num_input_tokens_seen": 14491920, |
| "step": 540 |
| }, |
| { |
| "epoch": 8.73015873015873, |
| "grad_norm": 0.6987579464912415, |
| "learning_rate": 4.029310747787516e-05, |
| "loss": 0.1483, |
| "num_input_tokens_seen": 14760400, |
| "step": 550 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.7618018984794617, |
| "learning_rate": 3.996227675003834e-05, |
| "loss": 0.1437, |
| "num_input_tokens_seen": 15029280, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.047619047619047, |
| "grad_norm": 0.7082319855690002, |
| "learning_rate": 3.962731208551474e-05, |
| "loss": 0.1386, |
| "num_input_tokens_seen": 15298416, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.206349206349206, |
| "grad_norm": 0.9523563385009766, |
| "learning_rate": 3.928830603189844e-05, |
| "loss": 0.1034, |
| "num_input_tokens_seen": 15567104, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.365079365079366, |
| "grad_norm": 1.1607928276062012, |
| "learning_rate": 3.894535225338143e-05, |
| "loss": 0.1073, |
| "num_input_tokens_seen": 15835952, |
| "step": 590 |
| }, |
| { |
| "epoch": 9.523809523809524, |
| "grad_norm": 1.0483174324035645, |
| "learning_rate": 3.859854550487506e-05, |
| "loss": 0.1124, |
| "num_input_tokens_seen": 16103648, |
| "step": 600 |
| }, |
| { |
| "epoch": 9.682539682539682, |
| "grad_norm": 0.9111513495445251, |
| "learning_rate": 3.824798160583012e-05, |
| "loss": 0.1202, |
| "num_input_tokens_seen": 16373888, |
| "step": 610 |
| }, |
| { |
| "epoch": 9.841269841269842, |
| "grad_norm": 1.031439185142517, |
| "learning_rate": 3.789375741376286e-05, |
| "loss": 0.1194, |
| "num_input_tokens_seen": 16642320, |
| "step": 620 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.9815431237220764, |
| "learning_rate": 3.7535970797494136e-05, |
| "loss": 0.117, |
| "num_input_tokens_seen": 16910032, |
| "step": 630 |
| }, |
| { |
| "epoch": 10.158730158730158, |
| "grad_norm": 1.4907585382461548, |
| "learning_rate": 3.717472061010918e-05, |
| "loss": 0.0739, |
| "num_input_tokens_seen": 17178576, |
| "step": 640 |
| }, |
| { |
| "epoch": 10.317460317460318, |
| "grad_norm": 1.1762831211090088, |
| "learning_rate": 3.681010666164546e-05, |
| "loss": 0.0704, |
| "num_input_tokens_seen": 17448288, |
| "step": 650 |
| }, |
| { |
| "epoch": 10.476190476190476, |
| "grad_norm": 1.2105902433395386, |
| "learning_rate": 3.644222969151605e-05, |
| "loss": 0.0735, |
| "num_input_tokens_seen": 17716784, |
| "step": 660 |
| }, |
| { |
| "epoch": 10.634920634920634, |
| "grad_norm": 1.1394544839859009, |
| "learning_rate": 3.607119134067629e-05, |
| "loss": 0.077, |
| "num_input_tokens_seen": 17984944, |
| "step": 670 |
| }, |
| { |
| "epoch": 10.793650793650794, |
| "grad_norm": 1.2243598699569702, |
| "learning_rate": 3.569709412354136e-05, |
| "loss": 0.0763, |
| "num_input_tokens_seen": 18252080, |
| "step": 680 |
| }, |
| { |
| "epoch": 10.952380952380953, |
| "grad_norm": 1.0364540815353394, |
| "learning_rate": 3.5320041399662494e-05, |
| "loss": 0.0762, |
| "num_input_tokens_seen": 18520464, |
| "step": 690 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 1.0455269813537598, |
| "learning_rate": 3.494013734516971e-05, |
| "loss": 0.0514, |
| "num_input_tokens_seen": 18786528, |
| "step": 700 |
| }, |
| { |
| "epoch": 11.26984126984127, |
| "grad_norm": 1.2155787944793701, |
| "learning_rate": 3.4557486923988924e-05, |
| "loss": 0.0375, |
| "num_input_tokens_seen": 19055536, |
| "step": 710 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 1.1954303979873657, |
| "learning_rate": 3.4172195858841404e-05, |
| "loss": 0.0389, |
| "num_input_tokens_seen": 19324304, |
| "step": 720 |
| }, |
| { |
| "epoch": 11.587301587301587, |
| "grad_norm": 1.1928291320800781, |
| "learning_rate": 3.378437060203357e-05, |
| "loss": 0.0374, |
| "num_input_tokens_seen": 19593552, |
| "step": 730 |
| }, |
| { |
| "epoch": 11.746031746031747, |
| "grad_norm": 1.192438006401062, |
| "learning_rate": 3.3394118306045217e-05, |
| "loss": 0.0426, |
| "num_input_tokens_seen": 19862784, |
| "step": 740 |
| }, |
| { |
| "epoch": 11.904761904761905, |
| "grad_norm": 1.1554771661758423, |
| "learning_rate": 3.3001546793924285e-05, |
| "loss": 0.0432, |
| "num_input_tokens_seen": 20131584, |
| "step": 750 |
| }, |
| { |
| "epoch": 12.063492063492063, |
| "grad_norm": 0.7850580215454102, |
| "learning_rate": 3.260676452949641e-05, |
| "loss": 0.0348, |
| "num_input_tokens_seen": 20401120, |
| "step": 760 |
| }, |
| { |
| "epoch": 12.222222222222221, |
| "grad_norm": 0.6133368611335754, |
| "learning_rate": 3.22098805873973e-05, |
| "loss": 0.0165, |
| "num_input_tokens_seen": 20670080, |
| "step": 770 |
| }, |
| { |
| "epoch": 12.380952380952381, |
| "grad_norm": 0.9954155087471008, |
| "learning_rate": 3.1811004622936525e-05, |
| "loss": 0.0192, |
| "num_input_tokens_seen": 20938000, |
| "step": 780 |
| }, |
| { |
| "epoch": 12.53968253968254, |
| "grad_norm": 0.9651346206665039, |
| "learning_rate": 3.141024684180071e-05, |
| "loss": 0.0212, |
| "num_input_tokens_seen": 21206432, |
| "step": 790 |
| }, |
| { |
| "epoch": 12.698412698412698, |
| "grad_norm": 1.0618289709091187, |
| "learning_rate": 3.10077179696048e-05, |
| "loss": 0.0231, |
| "num_input_tokens_seen": 21476960, |
| "step": 800 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1890, |
| "num_input_tokens_seen": 21476960, |
| "num_train_epochs": 30, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.216970364477768e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|