{ "best_metric": 0.43360278, "best_model_checkpoint": "/data/liuzihang/haobin/pangkaiyu/output/output_step_audio2_mini-encoder+align+llm-whole0130_signal_new1_dpdc-lora-1gpu-bs16_4_gckF_2e6_all/v2-20260215-150801/checkpoint-1200", "epoch": 1.0548523206751055, "eval_steps": 200, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002109704641350211, "grad_norm": 0.76171875, "learning_rate": 5.263157894736842e-09, "loss": 0.2671268880367279, "step": 1, "token_acc": 0.9268792044457443 }, { "epoch": 0.0004219409282700422, "grad_norm": 0.6640625, "learning_rate": 1.0526315789473684e-08, "loss": 0.25691983103752136, "step": 2, "token_acc": 0.9315326633165829 }, { "epoch": 0.0006329113924050633, "grad_norm": 0.58984375, "learning_rate": 1.5789473684210525e-08, "loss": 0.20553478598594666, "step": 3, "token_acc": 0.9399263247378861 }, { "epoch": 0.0008438818565400844, "grad_norm": 0.7578125, "learning_rate": 2.1052631578947368e-08, "loss": 0.2874465584754944, "step": 4, "token_acc": 0.9190901238122661 }, { "epoch": 0.0010548523206751054, "grad_norm": 0.99609375, "learning_rate": 2.6315789473684208e-08, "loss": 0.23955097794532776, "step": 5, "token_acc": 0.9297171186934966 }, { "epoch": 0.0012658227848101266, "grad_norm": 0.6484375, "learning_rate": 3.157894736842105e-08, "loss": 0.22314852476119995, "step": 6, "token_acc": 0.9365693865396069 }, { "epoch": 0.0014767932489451476, "grad_norm": 0.74609375, "learning_rate": 3.684210526315789e-08, "loss": 0.23751771450042725, "step": 7, "token_acc": 0.9336440910337264 }, { "epoch": 0.0016877637130801688, "grad_norm": 0.86328125, "learning_rate": 4.2105263157894737e-08, "loss": 0.22823776304721832, "step": 8, "token_acc": 0.9342301943198804 }, { "epoch": 0.0018987341772151898, "grad_norm": 0.6015625, "learning_rate": 4.736842105263158e-08, "loss": 0.22278322279453278, "step": 9, "token_acc": 0.9367531331973186 }, { "epoch": 0.002109704641350211, "grad_norm": 0.72265625, "learning_rate": 5.2631578947368416e-08, "loss": 0.25690096616744995, "step": 10, "token_acc": 0.9291217257318952 }, { "epoch": 0.002320675105485232, "grad_norm": 1.046875, "learning_rate": 5.789473684210526e-08, "loss": 0.3066456913948059, "step": 11, "token_acc": 0.9201006605850897 }, { "epoch": 0.002531645569620253, "grad_norm": 0.7890625, "learning_rate": 6.31578947368421e-08, "loss": 0.2525354027748108, "step": 12, "token_acc": 0.9276785714285715 }, { "epoch": 0.0027426160337552744, "grad_norm": 0.76953125, "learning_rate": 6.842105263157895e-08, "loss": 0.30059731006622314, "step": 13, "token_acc": 0.9238062986793092 }, { "epoch": 0.002953586497890295, "grad_norm": 0.9921875, "learning_rate": 7.368421052631577e-08, "loss": 0.250629723072052, "step": 14, "token_acc": 0.9291187739463601 }, { "epoch": 0.0031645569620253164, "grad_norm": 0.6796875, "learning_rate": 7.894736842105262e-08, "loss": 0.25745123624801636, "step": 15, "token_acc": 0.9317230273752013 }, { "epoch": 0.0033755274261603376, "grad_norm": 0.85546875, "learning_rate": 8.421052631578947e-08, "loss": 0.3376100957393646, "step": 16, "token_acc": 0.9077240566037735 }, { "epoch": 0.003586497890295359, "grad_norm": 0.703125, "learning_rate": 8.947368421052631e-08, "loss": 0.24760955572128296, "step": 17, "token_acc": 0.9222253760999148 }, { "epoch": 0.0037974683544303796, "grad_norm": 0.703125, "learning_rate": 9.473684210526316e-08, "loss": 0.2046602964401245, "step": 18, "token_acc": 0.9403993855606759 }, { "epoch": 0.004008438818565401, "grad_norm": 0.83203125, "learning_rate": 1e-07, "loss": 0.26379868388175964, "step": 19, "token_acc": 0.9294478527607362 }, { "epoch": 0.004219409282700422, "grad_norm": 0.72265625, "learning_rate": 1.0526315789473683e-07, "loss": 0.2657994031906128, "step": 20, "token_acc": 0.9254714157437893 }, { "epoch": 0.004430379746835443, "grad_norm": 0.6640625, "learning_rate": 1.1052631578947368e-07, "loss": 0.28728997707366943, "step": 21, "token_acc": 0.9262518968133535 }, { "epoch": 0.004641350210970464, "grad_norm": 0.77734375, "learning_rate": 1.1578947368421052e-07, "loss": 0.27618926763534546, "step": 22, "token_acc": 0.9308072487644151 }, { "epoch": 0.004852320675105486, "grad_norm": 0.7109375, "learning_rate": 1.2105263157894737e-07, "loss": 0.2314767688512802, "step": 23, "token_acc": 0.936124911284599 }, { "epoch": 0.005063291139240506, "grad_norm": 0.73828125, "learning_rate": 1.263157894736842e-07, "loss": 0.24274200201034546, "step": 24, "token_acc": 0.9345039018952063 }, { "epoch": 0.005274261603375527, "grad_norm": 0.64453125, "learning_rate": 1.3157894736842104e-07, "loss": 0.2632070481777191, "step": 25, "token_acc": 0.9223796033994335 }, { "epoch": 0.005485232067510549, "grad_norm": 0.7421875, "learning_rate": 1.368421052631579e-07, "loss": 0.2736364006996155, "step": 26, "token_acc": 0.9269776876267748 }, { "epoch": 0.00569620253164557, "grad_norm": 0.96875, "learning_rate": 1.4210526315789474e-07, "loss": 0.29377132654190063, "step": 27, "token_acc": 0.9181763285024155 }, { "epoch": 0.00590717299578059, "grad_norm": 0.7734375, "learning_rate": 1.4736842105263155e-07, "loss": 0.25689125061035156, "step": 28, "token_acc": 0.9299856527977044 }, { "epoch": 0.006118143459915612, "grad_norm": 0.9609375, "learning_rate": 1.526315789473684e-07, "loss": 0.24775874614715576, "step": 29, "token_acc": 0.9330016583747927 }, { "epoch": 0.006329113924050633, "grad_norm": 0.703125, "learning_rate": 1.5789473684210525e-07, "loss": 0.25338542461395264, "step": 30, "token_acc": 0.925512104283054 }, { "epoch": 0.006540084388185654, "grad_norm": 0.6328125, "learning_rate": 1.631578947368421e-07, "loss": 0.25087809562683105, "step": 31, "token_acc": 0.9313120472229676 }, { "epoch": 0.006751054852320675, "grad_norm": 0.67578125, "learning_rate": 1.6842105263157895e-07, "loss": 0.2502059042453766, "step": 32, "token_acc": 0.9279077218840115 }, { "epoch": 0.006962025316455696, "grad_norm": 0.578125, "learning_rate": 1.7368421052631578e-07, "loss": 0.18295930325984955, "step": 33, "token_acc": 0.9424480628860191 }, { "epoch": 0.007172995780590718, "grad_norm": 0.7265625, "learning_rate": 1.7894736842105262e-07, "loss": 0.2690507471561432, "step": 34, "token_acc": 0.9241155819605725 }, { "epoch": 0.007383966244725738, "grad_norm": 0.90234375, "learning_rate": 1.8421052631578946e-07, "loss": 0.2535433769226074, "step": 35, "token_acc": 0.9346446700507615 }, { "epoch": 0.007594936708860759, "grad_norm": 0.73046875, "learning_rate": 1.8947368421052632e-07, "loss": 0.26006314158439636, "step": 36, "token_acc": 0.9291425420457678 }, { "epoch": 0.007805907172995781, "grad_norm": 0.86328125, "learning_rate": 1.9473684210526315e-07, "loss": 0.2664929926395416, "step": 37, "token_acc": 0.9286128845037724 }, { "epoch": 0.008016877637130802, "grad_norm": 0.65234375, "learning_rate": 2e-07, "loss": 0.2170935869216919, "step": 38, "token_acc": 0.9359218028780885 }, { "epoch": 0.008227848101265823, "grad_norm": 0.75390625, "learning_rate": 2.0526315789473683e-07, "loss": 0.31706634163856506, "step": 39, "token_acc": 0.9133278822567457 }, { "epoch": 0.008438818565400843, "grad_norm": 0.6953125, "learning_rate": 2.1052631578947366e-07, "loss": 0.23433184623718262, "step": 40, "token_acc": 0.9313227829202747 }, { "epoch": 0.008649789029535865, "grad_norm": 0.65234375, "learning_rate": 2.1578947368421053e-07, "loss": 0.19157642126083374, "step": 41, "token_acc": 0.9416859122401847 }, { "epoch": 0.008860759493670886, "grad_norm": 0.75390625, "learning_rate": 2.2105263157894736e-07, "loss": 0.26239246129989624, "step": 42, "token_acc": 0.9240352476450927 }, { "epoch": 0.009071729957805906, "grad_norm": 0.97265625, "learning_rate": 2.263157894736842e-07, "loss": 0.27333155274391174, "step": 43, "token_acc": 0.9244935543278084 }, { "epoch": 0.009282700421940928, "grad_norm": 0.671875, "learning_rate": 2.3157894736842104e-07, "loss": 0.22739389538764954, "step": 44, "token_acc": 0.9345622119815669 }, { "epoch": 0.00949367088607595, "grad_norm": 1.359375, "learning_rate": 2.3684210526315787e-07, "loss": 0.2970912754535675, "step": 45, "token_acc": 0.918966119455117 }, { "epoch": 0.009704641350210971, "grad_norm": 0.98046875, "learning_rate": 2.4210526315789473e-07, "loss": 0.24367359280586243, "step": 46, "token_acc": 0.9307627357162961 }, { "epoch": 0.009915611814345991, "grad_norm": 0.69140625, "learning_rate": 2.4736842105263157e-07, "loss": 0.24166589975357056, "step": 47, "token_acc": 0.925096985974336 }, { "epoch": 0.010126582278481013, "grad_norm": 0.8203125, "learning_rate": 2.526315789473684e-07, "loss": 0.2498053014278412, "step": 48, "token_acc": 0.9309855154785572 }, { "epoch": 0.010337552742616034, "grad_norm": 0.81640625, "learning_rate": 2.578947368421053e-07, "loss": 0.27882808446884155, "step": 49, "token_acc": 0.9232902033271719 }, { "epoch": 0.010548523206751054, "grad_norm": 0.7890625, "learning_rate": 2.631578947368421e-07, "loss": 0.2516263723373413, "step": 50, "token_acc": 0.9283572142619126 }, { "epoch": 0.010759493670886076, "grad_norm": 0.65625, "learning_rate": 2.684210526315789e-07, "loss": 0.22138270735740662, "step": 51, "token_acc": 0.9392942583732058 }, { "epoch": 0.010970464135021098, "grad_norm": 1.015625, "learning_rate": 2.736842105263158e-07, "loss": 0.25101763010025024, "step": 52, "token_acc": 0.9323397913561848 }, { "epoch": 0.011181434599156118, "grad_norm": 0.77734375, "learning_rate": 2.789473684210526e-07, "loss": 0.2590043842792511, "step": 53, "token_acc": 0.9248989023685731 }, { "epoch": 0.01139240506329114, "grad_norm": 1.03125, "learning_rate": 2.842105263157895e-07, "loss": 0.22629833221435547, "step": 54, "token_acc": 0.9388133498145859 }, { "epoch": 0.011603375527426161, "grad_norm": 1.2421875, "learning_rate": 2.894736842105263e-07, "loss": 0.26315873861312866, "step": 55, "token_acc": 0.9215632686526374 }, { "epoch": 0.01181434599156118, "grad_norm": 1.390625, "learning_rate": 2.947368421052631e-07, "loss": 0.3269142806529999, "step": 56, "token_acc": 0.9163541967118546 }, { "epoch": 0.012025316455696202, "grad_norm": 0.8125, "learning_rate": 3e-07, "loss": 0.2740277945995331, "step": 57, "token_acc": 0.9223241590214067 }, { "epoch": 0.012236286919831224, "grad_norm": 0.7109375, "learning_rate": 3.052631578947368e-07, "loss": 0.2807028889656067, "step": 58, "token_acc": 0.9231628946633138 }, { "epoch": 0.012447257383966244, "grad_norm": 0.83984375, "learning_rate": 3.105263157894737e-07, "loss": 0.3021116256713867, "step": 59, "token_acc": 0.9209953343701399 }, { "epoch": 0.012658227848101266, "grad_norm": 0.73828125, "learning_rate": 3.157894736842105e-07, "loss": 0.2364785373210907, "step": 60, "token_acc": 0.9329750237116662 }, { "epoch": 0.012869198312236287, "grad_norm": 0.7109375, "learning_rate": 3.2105263157894733e-07, "loss": 0.2884541153907776, "step": 61, "token_acc": 0.9274515831540117 }, { "epoch": 0.013080168776371307, "grad_norm": 0.76953125, "learning_rate": 3.263157894736842e-07, "loss": 0.25490057468414307, "step": 62, "token_acc": 0.9336579427875837 }, { "epoch": 0.013291139240506329, "grad_norm": 0.859375, "learning_rate": 3.31578947368421e-07, "loss": 0.27591922879219055, "step": 63, "token_acc": 0.9201367308887508 }, { "epoch": 0.01350210970464135, "grad_norm": 0.83203125, "learning_rate": 3.368421052631579e-07, "loss": 0.2646903693675995, "step": 64, "token_acc": 0.9261189454322502 }, { "epoch": 0.013713080168776372, "grad_norm": 0.7265625, "learning_rate": 3.4210526315789473e-07, "loss": 0.2481774091720581, "step": 65, "token_acc": 0.9300921512551636 }, { "epoch": 0.013924050632911392, "grad_norm": 0.671875, "learning_rate": 3.4736842105263157e-07, "loss": 0.2667776644229889, "step": 66, "token_acc": 0.9202678027997565 }, { "epoch": 0.014135021097046414, "grad_norm": 0.77734375, "learning_rate": 3.526315789473684e-07, "loss": 0.2720962464809418, "step": 67, "token_acc": 0.9237356168049238 }, { "epoch": 0.014345991561181435, "grad_norm": 0.6171875, "learning_rate": 3.5789473684210524e-07, "loss": 0.25555452704429626, "step": 68, "token_acc": 0.9306469298245614 }, { "epoch": 0.014556962025316455, "grad_norm": 0.640625, "learning_rate": 3.6315789473684213e-07, "loss": 0.22453869879245758, "step": 69, "token_acc": 0.9388444990780578 }, { "epoch": 0.014767932489451477, "grad_norm": 0.75, "learning_rate": 3.684210526315789e-07, "loss": 0.28728553652763367, "step": 70, "token_acc": 0.9182989690721649 }, { "epoch": 0.014978902953586498, "grad_norm": 0.95703125, "learning_rate": 3.7368421052631575e-07, "loss": 0.2622889578342438, "step": 71, "token_acc": 0.9239098624524437 }, { "epoch": 0.015189873417721518, "grad_norm": 0.78125, "learning_rate": 3.7894736842105264e-07, "loss": 0.2780531346797943, "step": 72, "token_acc": 0.9266362252663622 }, { "epoch": 0.01540084388185654, "grad_norm": 0.703125, "learning_rate": 3.842105263157894e-07, "loss": 0.2625043988227844, "step": 73, "token_acc": 0.9331468531468532 }, { "epoch": 0.015611814345991562, "grad_norm": 0.703125, "learning_rate": 3.894736842105263e-07, "loss": 0.25795984268188477, "step": 74, "token_acc": 0.9242160278745645 }, { "epoch": 0.015822784810126583, "grad_norm": 0.71484375, "learning_rate": 3.9473684210526315e-07, "loss": 0.2481173276901245, "step": 75, "token_acc": 0.9310846176214016 }, { "epoch": 0.016033755274261603, "grad_norm": 0.640625, "learning_rate": 4e-07, "loss": 0.23631326854228973, "step": 76, "token_acc": 0.9369342184671092 }, { "epoch": 0.016244725738396623, "grad_norm": 0.69140625, "learning_rate": 4.052631578947368e-07, "loss": 0.24659401178359985, "step": 77, "token_acc": 0.930279458369346 }, { "epoch": 0.016455696202531647, "grad_norm": 0.77734375, "learning_rate": 4.1052631578947365e-07, "loss": 0.28330034017562866, "step": 78, "token_acc": 0.9203966005665722 }, { "epoch": 0.016666666666666666, "grad_norm": 0.96484375, "learning_rate": 4.1578947368421054e-07, "loss": 0.2582593560218811, "step": 79, "token_acc": 0.9296250768285187 }, { "epoch": 0.016877637130801686, "grad_norm": 0.734375, "learning_rate": 4.2105263157894733e-07, "loss": 0.2518593370914459, "step": 80, "token_acc": 0.9288208434058555 }, { "epoch": 0.01708860759493671, "grad_norm": 0.859375, "learning_rate": 4.2631578947368416e-07, "loss": 0.30441492795944214, "step": 81, "token_acc": 0.9242614707730987 }, { "epoch": 0.01729957805907173, "grad_norm": 0.7578125, "learning_rate": 4.3157894736842105e-07, "loss": 0.30916911363601685, "step": 82, "token_acc": 0.9124253625248792 }, { "epoch": 0.01751054852320675, "grad_norm": 0.609375, "learning_rate": 4.368421052631579e-07, "loss": 0.28638702630996704, "step": 83, "token_acc": 0.9245689655172413 }, { "epoch": 0.017721518987341773, "grad_norm": 0.80859375, "learning_rate": 4.421052631578947e-07, "loss": 0.2646373510360718, "step": 84, "token_acc": 0.9279176201372997 }, { "epoch": 0.017932489451476793, "grad_norm": 0.97265625, "learning_rate": 4.4736842105263156e-07, "loss": 0.27530571818351746, "step": 85, "token_acc": 0.9217687074829932 }, { "epoch": 0.018143459915611813, "grad_norm": 0.73828125, "learning_rate": 4.526315789473684e-07, "loss": 0.30989915132522583, "step": 86, "token_acc": 0.9133137062479555 }, { "epoch": 0.018354430379746836, "grad_norm": 0.91015625, "learning_rate": 4.5789473684210523e-07, "loss": 0.2850973308086395, "step": 87, "token_acc": 0.9211218229623137 }, { "epoch": 0.018565400843881856, "grad_norm": 0.72265625, "learning_rate": 4.6315789473684207e-07, "loss": 0.2523067593574524, "step": 88, "token_acc": 0.9337899543378996 }, { "epoch": 0.018776371308016876, "grad_norm": 0.734375, "learning_rate": 4.6842105263157896e-07, "loss": 0.2510542869567871, "step": 89, "token_acc": 0.9345845983991168 }, { "epoch": 0.0189873417721519, "grad_norm": 0.65625, "learning_rate": 4.7368421052631574e-07, "loss": 0.27831587195396423, "step": 90, "token_acc": 0.9237072619384007 }, { "epoch": 0.01919831223628692, "grad_norm": 0.67578125, "learning_rate": 4.789473684210526e-07, "loss": 0.2806670665740967, "step": 91, "token_acc": 0.9245553643144004 }, { "epoch": 0.019409282700421943, "grad_norm": 0.8046875, "learning_rate": 4.842105263157895e-07, "loss": 0.2630135416984558, "step": 92, "token_acc": 0.9274447949526814 }, { "epoch": 0.019620253164556962, "grad_norm": 0.71875, "learning_rate": 4.894736842105263e-07, "loss": 0.2662945091724396, "step": 93, "token_acc": 0.9303838646714379 }, { "epoch": 0.019831223628691982, "grad_norm": 0.703125, "learning_rate": 4.947368421052631e-07, "loss": 0.2430751472711563, "step": 94, "token_acc": 0.9333737129012719 }, { "epoch": 0.020042194092827006, "grad_norm": 0.80078125, "learning_rate": 5e-07, "loss": 0.2676389515399933, "step": 95, "token_acc": 0.9231901118304885 }, { "epoch": 0.020253164556962026, "grad_norm": 0.765625, "learning_rate": 5.052631578947368e-07, "loss": 0.24123789370059967, "step": 96, "token_acc": 0.9292089873807325 }, { "epoch": 0.020464135021097046, "grad_norm": 0.80078125, "learning_rate": 5.105263157894736e-07, "loss": 0.24845993518829346, "step": 97, "token_acc": 0.9311657879320445 }, { "epoch": 0.02067510548523207, "grad_norm": 0.6953125, "learning_rate": 5.157894736842106e-07, "loss": 0.2502240836620331, "step": 98, "token_acc": 0.9316136772645471 }, { "epoch": 0.02088607594936709, "grad_norm": 0.64453125, "learning_rate": 5.210526315789473e-07, "loss": 0.2715577483177185, "step": 99, "token_acc": 0.9284542172628816 }, { "epoch": 0.02109704641350211, "grad_norm": 0.640625, "learning_rate": 5.263157894736842e-07, "loss": 0.25573915243148804, "step": 100, "token_acc": 0.9303937007874016 }, { "epoch": 0.021308016877637132, "grad_norm": 0.5859375, "learning_rate": 5.31578947368421e-07, "loss": 0.23634707927703857, "step": 101, "token_acc": 0.9278404618210443 }, { "epoch": 0.021518987341772152, "grad_norm": 0.79296875, "learning_rate": 5.368421052631578e-07, "loss": 0.24541448056697845, "step": 102, "token_acc": 0.9269794721407625 }, { "epoch": 0.021729957805907172, "grad_norm": 0.8046875, "learning_rate": 5.421052631578948e-07, "loss": 0.29984721541404724, "step": 103, "token_acc": 0.9175257731958762 }, { "epoch": 0.021940928270042195, "grad_norm": 0.703125, "learning_rate": 5.473684210526316e-07, "loss": 0.23752300441265106, "step": 104, "token_acc": 0.9307146753955264 }, { "epoch": 0.022151898734177215, "grad_norm": 0.7578125, "learning_rate": 5.526315789473684e-07, "loss": 0.26209786534309387, "step": 105, "token_acc": 0.9331191588785047 }, { "epoch": 0.022362869198312235, "grad_norm": 0.6015625, "learning_rate": 5.578947368421052e-07, "loss": 0.26135504245758057, "step": 106, "token_acc": 0.9239098624524437 }, { "epoch": 0.02257383966244726, "grad_norm": 0.76171875, "learning_rate": 5.63157894736842e-07, "loss": 0.2286645919084549, "step": 107, "token_acc": 0.9346016646848989 }, { "epoch": 0.02278481012658228, "grad_norm": 0.94921875, "learning_rate": 5.68421052631579e-07, "loss": 0.2228844314813614, "step": 108, "token_acc": 0.9335610058987892 }, { "epoch": 0.0229957805907173, "grad_norm": 0.7265625, "learning_rate": 5.736842105263158e-07, "loss": 0.23979443311691284, "step": 109, "token_acc": 0.9332460732984293 }, { "epoch": 0.023206751054852322, "grad_norm": 0.7734375, "learning_rate": 5.789473684210526e-07, "loss": 0.2925586402416229, "step": 110, "token_acc": 0.9247163973874184 }, { "epoch": 0.02341772151898734, "grad_norm": 0.89453125, "learning_rate": 5.842105263157895e-07, "loss": 0.26546645164489746, "step": 111, "token_acc": 0.9265167364016736 }, { "epoch": 0.02362869198312236, "grad_norm": 0.71875, "learning_rate": 5.894736842105262e-07, "loss": 0.23430243134498596, "step": 112, "token_acc": 0.9325294286534597 }, { "epoch": 0.023839662447257385, "grad_norm": 0.6875, "learning_rate": 5.947368421052631e-07, "loss": 0.2555156946182251, "step": 113, "token_acc": 0.9277472527472528 }, { "epoch": 0.024050632911392405, "grad_norm": 0.625, "learning_rate": 6e-07, "loss": 0.27733132243156433, "step": 114, "token_acc": 0.9281454979129398 }, { "epoch": 0.024261603375527425, "grad_norm": 0.6328125, "learning_rate": 6.052631578947368e-07, "loss": 0.2438090741634369, "step": 115, "token_acc": 0.9304691916336914 }, { "epoch": 0.024472573839662448, "grad_norm": 0.70703125, "learning_rate": 6.105263157894736e-07, "loss": 0.2810766100883484, "step": 116, "token_acc": 0.9260921603830042 }, { "epoch": 0.024683544303797468, "grad_norm": 0.6171875, "learning_rate": 6.157894736842105e-07, "loss": 0.21388083696365356, "step": 117, "token_acc": 0.9422394320748628 }, { "epoch": 0.024894514767932488, "grad_norm": 0.8828125, "learning_rate": 6.210526315789474e-07, "loss": 0.304085373878479, "step": 118, "token_acc": 0.9148753224419605 }, { "epoch": 0.02510548523206751, "grad_norm": 0.8125, "learning_rate": 6.263157894736842e-07, "loss": 0.24785375595092773, "step": 119, "token_acc": 0.9345686160972785 }, { "epoch": 0.02531645569620253, "grad_norm": 0.6015625, "learning_rate": 6.31578947368421e-07, "loss": 0.23662757873535156, "step": 120, "token_acc": 0.9339464882943144 }, { "epoch": 0.02552742616033755, "grad_norm": 0.7890625, "learning_rate": 6.368421052631578e-07, "loss": 0.30741703510284424, "step": 121, "token_acc": 0.9190517490604221 }, { "epoch": 0.025738396624472575, "grad_norm": 0.58984375, "learning_rate": 6.421052631578947e-07, "loss": 0.2324860692024231, "step": 122, "token_acc": 0.9350512753089666 }, { "epoch": 0.025949367088607594, "grad_norm": 0.71875, "learning_rate": 6.473684210526316e-07, "loss": 0.22093364596366882, "step": 123, "token_acc": 0.9382284382284383 }, { "epoch": 0.026160337552742614, "grad_norm": 0.58203125, "learning_rate": 6.526315789473684e-07, "loss": 0.26719486713409424, "step": 124, "token_acc": 0.9285504263451926 }, { "epoch": 0.026371308016877638, "grad_norm": 0.69921875, "learning_rate": 6.578947368421053e-07, "loss": 0.26403629779815674, "step": 125, "token_acc": 0.9270715096481271 }, { "epoch": 0.026582278481012658, "grad_norm": 1.046875, "learning_rate": 6.63157894736842e-07, "loss": 0.24764738976955414, "step": 126, "token_acc": 0.9328358208955224 }, { "epoch": 0.02679324894514768, "grad_norm": 0.80078125, "learning_rate": 6.684210526315788e-07, "loss": 0.2644532322883606, "step": 127, "token_acc": 0.9267714201008005 }, { "epoch": 0.0270042194092827, "grad_norm": 0.80859375, "learning_rate": 6.736842105263158e-07, "loss": 0.2985777258872986, "step": 128, "token_acc": 0.9214629997164729 }, { "epoch": 0.02721518987341772, "grad_norm": 0.62109375, "learning_rate": 6.789473684210526e-07, "loss": 0.23605869710445404, "step": 129, "token_acc": 0.9335576114381834 }, { "epoch": 0.027426160337552744, "grad_norm": 0.72265625, "learning_rate": 6.842105263157895e-07, "loss": 0.254613995552063, "step": 130, "token_acc": 0.9289265867212635 }, { "epoch": 0.027637130801687764, "grad_norm": 0.91015625, "learning_rate": 6.894736842105263e-07, "loss": 0.32649004459381104, "step": 131, "token_acc": 0.9107457428068115 }, { "epoch": 0.027848101265822784, "grad_norm": 0.66796875, "learning_rate": 6.947368421052631e-07, "loss": 0.2345716655254364, "step": 132, "token_acc": 0.9308671922377199 }, { "epoch": 0.028059071729957807, "grad_norm": 0.796875, "learning_rate": 7e-07, "loss": 0.2658767104148865, "step": 133, "token_acc": 0.9226377390807879 }, { "epoch": 0.028270042194092827, "grad_norm": 0.70703125, "learning_rate": 7.052631578947368e-07, "loss": 0.2791082561016083, "step": 134, "token_acc": 0.9298836497244336 }, { "epoch": 0.028481012658227847, "grad_norm": 0.75390625, "learning_rate": 7.105263157894736e-07, "loss": 0.28342780470848083, "step": 135, "token_acc": 0.9173340961098398 }, { "epoch": 0.02869198312236287, "grad_norm": 0.53125, "learning_rate": 7.157894736842105e-07, "loss": 0.19060048460960388, "step": 136, "token_acc": 0.9426000620539869 }, { "epoch": 0.02890295358649789, "grad_norm": 0.76171875, "learning_rate": 7.210526315789473e-07, "loss": 0.24044275283813477, "step": 137, "token_acc": 0.9315665883931566 }, { "epoch": 0.02911392405063291, "grad_norm": 0.69921875, "learning_rate": 7.263157894736843e-07, "loss": 0.22750994563102722, "step": 138, "token_acc": 0.9367798193709125 }, { "epoch": 0.029324894514767934, "grad_norm": 0.65234375, "learning_rate": 7.315789473684211e-07, "loss": 0.2409280240535736, "step": 139, "token_acc": 0.9357879234167894 }, { "epoch": 0.029535864978902954, "grad_norm": 0.74609375, "learning_rate": 7.368421052631578e-07, "loss": 0.24030432105064392, "step": 140, "token_acc": 0.928141912206855 }, { "epoch": 0.029746835443037974, "grad_norm": 0.703125, "learning_rate": 7.421052631578947e-07, "loss": 0.255402147769928, "step": 141, "token_acc": 0.9247853124074622 }, { "epoch": 0.029957805907172997, "grad_norm": 0.74609375, "learning_rate": 7.473684210526315e-07, "loss": 0.21290147304534912, "step": 142, "token_acc": 0.9362466327446872 }, { "epoch": 0.030168776371308017, "grad_norm": 0.88671875, "learning_rate": 7.526315789473684e-07, "loss": 0.2536450922489166, "step": 143, "token_acc": 0.9314112291350531 }, { "epoch": 0.030379746835443037, "grad_norm": 0.6015625, "learning_rate": 7.578947368421053e-07, "loss": 0.20192307233810425, "step": 144, "token_acc": 0.9473519272955186 }, { "epoch": 0.03059071729957806, "grad_norm": 0.7734375, "learning_rate": 7.631578947368421e-07, "loss": 0.27158498764038086, "step": 145, "token_acc": 0.9229805886036319 }, { "epoch": 0.03080168776371308, "grad_norm": 0.93359375, "learning_rate": 7.684210526315788e-07, "loss": 0.2483355551958084, "step": 146, "token_acc": 0.9249113760876571 }, { "epoch": 0.0310126582278481, "grad_norm": 0.546875, "learning_rate": 7.736842105263157e-07, "loss": 0.2523292303085327, "step": 147, "token_acc": 0.9309432853364679 }, { "epoch": 0.031223628691983123, "grad_norm": 0.9375, "learning_rate": 7.789473684210526e-07, "loss": 0.28833281993865967, "step": 148, "token_acc": 0.9170937594211637 }, { "epoch": 0.03143459915611815, "grad_norm": 0.7421875, "learning_rate": 7.842105263157895e-07, "loss": 0.250460147857666, "step": 149, "token_acc": 0.9285714285714286 }, { "epoch": 0.03164556962025317, "grad_norm": 0.9453125, "learning_rate": 7.894736842105263e-07, "loss": 0.2840534746646881, "step": 150, "token_acc": 0.9216349108789182 }, { "epoch": 0.03185654008438819, "grad_norm": 0.74609375, "learning_rate": 7.947368421052631e-07, "loss": 0.2967279851436615, "step": 151, "token_acc": 0.9164603960396039 }, { "epoch": 0.032067510548523206, "grad_norm": 0.83203125, "learning_rate": 8e-07, "loss": 0.25097131729125977, "step": 152, "token_acc": 0.929299572509043 }, { "epoch": 0.032278481012658226, "grad_norm": 0.6953125, "learning_rate": 8.052631578947368e-07, "loss": 0.23201878368854523, "step": 153, "token_acc": 0.9383025367992484 }, { "epoch": 0.032489451476793246, "grad_norm": 0.65625, "learning_rate": 8.105263157894736e-07, "loss": 0.2245524525642395, "step": 154, "token_acc": 0.9406554472984943 }, { "epoch": 0.03270042194092827, "grad_norm": 0.75, "learning_rate": 8.157894736842105e-07, "loss": 0.22958879172801971, "step": 155, "token_acc": 0.9288455860643637 }, { "epoch": 0.03291139240506329, "grad_norm": 0.70703125, "learning_rate": 8.210526315789473e-07, "loss": 0.2539287805557251, "step": 156, "token_acc": 0.9255386565272496 }, { "epoch": 0.03312236286919831, "grad_norm": 0.6875, "learning_rate": 8.263157894736841e-07, "loss": 0.245978444814682, "step": 157, "token_acc": 0.9341576506955178 }, { "epoch": 0.03333333333333333, "grad_norm": 0.66796875, "learning_rate": 8.315789473684211e-07, "loss": 0.26281115412712097, "step": 158, "token_acc": 0.927784222737819 }, { "epoch": 0.03354430379746835, "grad_norm": 1.0, "learning_rate": 8.368421052631579e-07, "loss": 0.229181706905365, "step": 159, "token_acc": 0.9329073482428115 }, { "epoch": 0.03375527426160337, "grad_norm": 0.73046875, "learning_rate": 8.421052631578947e-07, "loss": 0.2598130702972412, "step": 160, "token_acc": 0.9302388707926167 }, { "epoch": 0.0339662447257384, "grad_norm": 0.75, "learning_rate": 8.473684210526315e-07, "loss": 0.25113117694854736, "step": 161, "token_acc": 0.9281177829099307 }, { "epoch": 0.03417721518987342, "grad_norm": 0.6015625, "learning_rate": 8.526315789473683e-07, "loss": 0.20510195195674896, "step": 162, "token_acc": 0.936978417266187 }, { "epoch": 0.03438818565400844, "grad_norm": 0.75, "learning_rate": 8.578947368421053e-07, "loss": 0.25259485840797424, "step": 163, "token_acc": 0.9289311695579183 }, { "epoch": 0.03459915611814346, "grad_norm": 0.6484375, "learning_rate": 8.631578947368421e-07, "loss": 0.2823118567466736, "step": 164, "token_acc": 0.9225122349102773 }, { "epoch": 0.03481012658227848, "grad_norm": 0.84375, "learning_rate": 8.684210526315789e-07, "loss": 0.2576562762260437, "step": 165, "token_acc": 0.931045050566963 }, { "epoch": 0.0350210970464135, "grad_norm": 0.6796875, "learning_rate": 8.736842105263158e-07, "loss": 0.2349683940410614, "step": 166, "token_acc": 0.9357770372614359 }, { "epoch": 0.035232067510548526, "grad_norm": 0.89453125, "learning_rate": 8.789473684210525e-07, "loss": 0.19951120018959045, "step": 167, "token_acc": 0.9381918819188192 }, { "epoch": 0.035443037974683546, "grad_norm": 0.58984375, "learning_rate": 8.842105263157895e-07, "loss": 0.24796079099178314, "step": 168, "token_acc": 0.9311111111111111 }, { "epoch": 0.035654008438818566, "grad_norm": 0.73828125, "learning_rate": 8.894736842105263e-07, "loss": 0.2532733082771301, "step": 169, "token_acc": 0.9297912713472486 }, { "epoch": 0.035864978902953586, "grad_norm": 0.98046875, "learning_rate": 8.947368421052631e-07, "loss": 0.25062763690948486, "step": 170, "token_acc": 0.9289099526066351 }, { "epoch": 0.036075949367088606, "grad_norm": 0.6484375, "learning_rate": 9e-07, "loss": 0.2228512465953827, "step": 171, "token_acc": 0.9302030456852792 }, { "epoch": 0.036286919831223625, "grad_norm": 0.55859375, "learning_rate": 9.052631578947368e-07, "loss": 0.20684444904327393, "step": 172, "token_acc": 0.9388583019414662 }, { "epoch": 0.03649789029535865, "grad_norm": 0.68359375, "learning_rate": 9.105263157894737e-07, "loss": 0.2119678407907486, "step": 173, "token_acc": 0.938135593220339 }, { "epoch": 0.03670886075949367, "grad_norm": 0.828125, "learning_rate": 9.157894736842105e-07, "loss": 0.25168293714523315, "step": 174, "token_acc": 0.9275818639798489 }, { "epoch": 0.03691983122362869, "grad_norm": 2.046875, "learning_rate": 9.210526315789473e-07, "loss": 0.2518053650856018, "step": 175, "token_acc": 0.9275232105420784 }, { "epoch": 0.03713080168776371, "grad_norm": 0.6171875, "learning_rate": 9.263157894736841e-07, "loss": 0.25257354974746704, "step": 176, "token_acc": 0.9272777932571747 }, { "epoch": 0.03734177215189873, "grad_norm": 1.0390625, "learning_rate": 9.31578947368421e-07, "loss": 0.24908028542995453, "step": 177, "token_acc": 0.9316982303632412 }, { "epoch": 0.03755274261603375, "grad_norm": 0.796875, "learning_rate": 9.368421052631579e-07, "loss": 0.2594815194606781, "step": 178, "token_acc": 0.9222846441947565 }, { "epoch": 0.03776371308016878, "grad_norm": 1.734375, "learning_rate": 9.421052631578948e-07, "loss": 0.301219642162323, "step": 179, "token_acc": 0.9261637239165329 }, { "epoch": 0.0379746835443038, "grad_norm": 0.6171875, "learning_rate": 9.473684210526315e-07, "loss": 0.2224687933921814, "step": 180, "token_acc": 0.9298196166854565 }, { "epoch": 0.03818565400843882, "grad_norm": 0.76953125, "learning_rate": 9.526315789473683e-07, "loss": 0.2755109667778015, "step": 181, "token_acc": 0.9245460237946149 }, { "epoch": 0.03839662447257384, "grad_norm": 0.6796875, "learning_rate": 9.578947368421053e-07, "loss": 0.24350810050964355, "step": 182, "token_acc": 0.9347500748278958 }, { "epoch": 0.03860759493670886, "grad_norm": 0.70703125, "learning_rate": 9.63157894736842e-07, "loss": 0.26835542917251587, "step": 183, "token_acc": 0.9246247205365697 }, { "epoch": 0.038818565400843885, "grad_norm": 0.703125, "learning_rate": 9.68421052631579e-07, "loss": 0.25252771377563477, "step": 184, "token_acc": 0.9330130016958734 }, { "epoch": 0.039029535864978905, "grad_norm": 1.21875, "learning_rate": 9.736842105263158e-07, "loss": 0.27294090390205383, "step": 185, "token_acc": 0.9220917822838848 }, { "epoch": 0.039240506329113925, "grad_norm": 0.9140625, "learning_rate": 9.789473684210526e-07, "loss": 0.257973313331604, "step": 186, "token_acc": 0.9298401420959147 }, { "epoch": 0.039451476793248945, "grad_norm": 0.71875, "learning_rate": 9.842105263157894e-07, "loss": 0.20286661386489868, "step": 187, "token_acc": 0.9379893517068587 }, { "epoch": 0.039662447257383965, "grad_norm": 1.0859375, "learning_rate": 9.894736842105263e-07, "loss": 0.30547526478767395, "step": 188, "token_acc": 0.9224360815857512 }, { "epoch": 0.039873417721518985, "grad_norm": 0.6796875, "learning_rate": 9.947368421052631e-07, "loss": 0.2581551671028137, "step": 189, "token_acc": 0.9263346257083209 }, { "epoch": 0.04008438818565401, "grad_norm": 0.99609375, "learning_rate": 1e-06, "loss": 0.267391562461853, "step": 190, "token_acc": 0.9272947591638897 }, { "epoch": 0.04029535864978903, "grad_norm": 0.7265625, "learning_rate": 9.99999971410384e-07, "loss": 0.22360718250274658, "step": 191, "token_acc": 0.9367160775370581 }, { "epoch": 0.04050632911392405, "grad_norm": 0.71875, "learning_rate": 9.999998856415392e-07, "loss": 0.2589290738105774, "step": 192, "token_acc": 0.9256432004523607 }, { "epoch": 0.04071729957805907, "grad_norm": 0.65234375, "learning_rate": 9.999997426934757e-07, "loss": 0.2469128668308258, "step": 193, "token_acc": 0.9290098745663197 }, { "epoch": 0.04092827004219409, "grad_norm": 1.203125, "learning_rate": 9.999995425662095e-07, "loss": 0.2602100968360901, "step": 194, "token_acc": 0.9284731774415406 }, { "epoch": 0.04113924050632911, "grad_norm": 0.61328125, "learning_rate": 9.999992852597638e-07, "loss": 0.2579442858695984, "step": 195, "token_acc": 0.9280116110304789 }, { "epoch": 0.04135021097046414, "grad_norm": 0.75, "learning_rate": 9.999989707741678e-07, "loss": 0.266757071018219, "step": 196, "token_acc": 0.923786841321822 }, { "epoch": 0.04156118143459916, "grad_norm": 0.59765625, "learning_rate": 9.999985991094577e-07, "loss": 0.22912907600402832, "step": 197, "token_acc": 0.9336933693369337 }, { "epoch": 0.04177215189873418, "grad_norm": 0.73046875, "learning_rate": 9.999981702656756e-07, "loss": 0.23296543955802917, "step": 198, "token_acc": 0.931110498759989 }, { "epoch": 0.0419831223628692, "grad_norm": 0.75390625, "learning_rate": 9.999976842428708e-07, "loss": 0.27944594621658325, "step": 199, "token_acc": 0.9281785829828535 }, { "epoch": 0.04219409282700422, "grad_norm": 0.6328125, "learning_rate": 9.99997141041099e-07, "loss": 0.2449186146259308, "step": 200, "token_acc": 0.9309120699071546 }, { "epoch": 0.04219409282700422, "eval_loss": 0.43372446298599243, "eval_runtime": 245.8313, "eval_samples_per_second": 137.106, "eval_steps_per_second": 2.144, "eval_token_acc": 0.8990801399982051, "step": 200 }, { "epoch": 0.04240506329113924, "grad_norm": 0.765625, "learning_rate": 9.99996540660422e-07, "loss": 0.2534567713737488, "step": 201, "token_acc": 0.9282550930026572 }, { "epoch": 0.042616033755274264, "grad_norm": 1.0, "learning_rate": 9.999958831009087e-07, "loss": 0.2861325144767761, "step": 202, "token_acc": 0.9250295159386068 }, { "epoch": 0.042827004219409284, "grad_norm": 0.671875, "learning_rate": 9.999951683626345e-07, "loss": 0.24760206043720245, "step": 203, "token_acc": 0.9320360151031077 }, { "epoch": 0.043037974683544304, "grad_norm": 0.62109375, "learning_rate": 9.999943964456805e-07, "loss": 0.2488883137702942, "step": 204, "token_acc": 0.9300783604581073 }, { "epoch": 0.043248945147679324, "grad_norm": 0.80078125, "learning_rate": 9.999935673501355e-07, "loss": 0.257844477891922, "step": 205, "token_acc": 0.9278959810874704 }, { "epoch": 0.043459915611814344, "grad_norm": 0.71875, "learning_rate": 9.99992681076094e-07, "loss": 0.21238219738006592, "step": 206, "token_acc": 0.937776467118844 }, { "epoch": 0.043670886075949364, "grad_norm": 0.67578125, "learning_rate": 9.999917376236578e-07, "loss": 0.21476256847381592, "step": 207, "token_acc": 0.9357326478149101 }, { "epoch": 0.04388185654008439, "grad_norm": 0.97265625, "learning_rate": 9.999907369929344e-07, "loss": 0.24194155633449554, "step": 208, "token_acc": 0.9311714096624751 }, { "epoch": 0.04409282700421941, "grad_norm": 0.765625, "learning_rate": 9.999896791840383e-07, "loss": 0.2757856249809265, "step": 209, "token_acc": 0.9243792325056434 }, { "epoch": 0.04430379746835443, "grad_norm": 0.66015625, "learning_rate": 9.999885641970906e-07, "loss": 0.2318935990333557, "step": 210, "token_acc": 0.9357142857142857 }, { "epoch": 0.04451476793248945, "grad_norm": 0.64453125, "learning_rate": 9.999873920322186e-07, "loss": 0.2802179157733917, "step": 211, "token_acc": 0.9227716727716728 }, { "epoch": 0.04472573839662447, "grad_norm": 0.6796875, "learning_rate": 9.999861626895565e-07, "loss": 0.2558714747428894, "step": 212, "token_acc": 0.9255022321428571 }, { "epoch": 0.04493670886075949, "grad_norm": 0.890625, "learning_rate": 9.99984876169245e-07, "loss": 0.29882046580314636, "step": 213, "token_acc": 0.9213449414590213 }, { "epoch": 0.04514767932489452, "grad_norm": 0.76171875, "learning_rate": 9.999835324714307e-07, "loss": 0.24097202718257904, "step": 214, "token_acc": 0.9359388774610637 }, { "epoch": 0.04535864978902954, "grad_norm": 1.703125, "learning_rate": 9.99982131596268e-07, "loss": 0.28899186849594116, "step": 215, "token_acc": 0.9218701937865272 }, { "epoch": 0.04556962025316456, "grad_norm": 0.68359375, "learning_rate": 9.999806735439165e-07, "loss": 0.2710872292518616, "step": 216, "token_acc": 0.9248780487804878 }, { "epoch": 0.04578059071729958, "grad_norm": 0.6484375, "learning_rate": 9.999791583145433e-07, "loss": 0.2295331209897995, "step": 217, "token_acc": 0.9357304643261608 }, { "epoch": 0.0459915611814346, "grad_norm": 0.9921875, "learning_rate": 9.999775859083216e-07, "loss": 0.2171935886144638, "step": 218, "token_acc": 0.9356940509915014 }, { "epoch": 0.046202531645569624, "grad_norm": 0.94921875, "learning_rate": 9.99975956325431e-07, "loss": 0.2726757526397705, "step": 219, "token_acc": 0.9209691375829248 }, { "epoch": 0.046413502109704644, "grad_norm": 0.703125, "learning_rate": 9.99974269566058e-07, "loss": 0.27028149366378784, "step": 220, "token_acc": 0.9284016636957814 }, { "epoch": 0.04662447257383966, "grad_norm": 0.56640625, "learning_rate": 9.999725256303957e-07, "loss": 0.20975014567375183, "step": 221, "token_acc": 0.9344503233392122 }, { "epoch": 0.04683544303797468, "grad_norm": 0.80078125, "learning_rate": 9.999707245186434e-07, "loss": 0.3065168261528015, "step": 222, "token_acc": 0.9186879823594267 }, { "epoch": 0.0470464135021097, "grad_norm": 0.66796875, "learning_rate": 9.999688662310072e-07, "loss": 0.20764990150928497, "step": 223, "token_acc": 0.9452255418863503 }, { "epoch": 0.04725738396624472, "grad_norm": 0.89453125, "learning_rate": 9.99966950767699e-07, "loss": 0.2654411196708679, "step": 224, "token_acc": 0.9302244039270687 }, { "epoch": 0.04746835443037975, "grad_norm": 0.6953125, "learning_rate": 9.999649781289385e-07, "loss": 0.2514041066169739, "step": 225, "token_acc": 0.933082271147161 }, { "epoch": 0.04767932489451477, "grad_norm": 0.6328125, "learning_rate": 9.99962948314951e-07, "loss": 0.21037127077579498, "step": 226, "token_acc": 0.9351134846461949 }, { "epoch": 0.04789029535864979, "grad_norm": 0.59765625, "learning_rate": 9.99960861325969e-07, "loss": 0.21236909925937653, "step": 227, "token_acc": 0.940097449125824 }, { "epoch": 0.04810126582278481, "grad_norm": 0.6640625, "learning_rate": 9.999587171622305e-07, "loss": 0.21992863714694977, "step": 228, "token_acc": 0.9344711978055471 }, { "epoch": 0.04831223628691983, "grad_norm": 0.95703125, "learning_rate": 9.999565158239812e-07, "loss": 0.26401764154434204, "step": 229, "token_acc": 0.9244654262704805 }, { "epoch": 0.04852320675105485, "grad_norm": 0.73828125, "learning_rate": 9.999542573114728e-07, "loss": 0.24087585508823395, "step": 230, "token_acc": 0.926786751888437 }, { "epoch": 0.048734177215189876, "grad_norm": 0.703125, "learning_rate": 9.999519416249634e-07, "loss": 0.2533552646636963, "step": 231, "token_acc": 0.9275784028451342 }, { "epoch": 0.048945147679324896, "grad_norm": 1.4296875, "learning_rate": 9.999495687647178e-07, "loss": 0.2529897689819336, "step": 232, "token_acc": 0.9269195189639223 }, { "epoch": 0.049156118143459916, "grad_norm": 0.81640625, "learning_rate": 9.999471387310077e-07, "loss": 0.2788076400756836, "step": 233, "token_acc": 0.9202168861347793 }, { "epoch": 0.049367088607594936, "grad_norm": 0.68359375, "learning_rate": 9.999446515241108e-07, "loss": 0.2300492525100708, "step": 234, "token_acc": 0.9325668116842759 }, { "epoch": 0.049578059071729956, "grad_norm": 0.85546875, "learning_rate": 9.999421071443115e-07, "loss": 0.2711006700992584, "step": 235, "token_acc": 0.9220738900962434 }, { "epoch": 0.049789029535864976, "grad_norm": 0.66796875, "learning_rate": 9.999395055919007e-07, "loss": 0.24382656812667847, "step": 236, "token_acc": 0.9297777777777778 }, { "epoch": 0.05, "grad_norm": 0.76953125, "learning_rate": 9.999368468671758e-07, "loss": 0.2818126380443573, "step": 237, "token_acc": 0.9211531781868705 }, { "epoch": 0.05021097046413502, "grad_norm": 0.79296875, "learning_rate": 9.999341309704413e-07, "loss": 0.29420921206474304, "step": 238, "token_acc": 0.9187301587301587 }, { "epoch": 0.05042194092827004, "grad_norm": 0.6796875, "learning_rate": 9.999313579020074e-07, "loss": 0.24233081936836243, "step": 239, "token_acc": 0.9322516367776829 }, { "epoch": 0.05063291139240506, "grad_norm": 0.93359375, "learning_rate": 9.999285276621913e-07, "loss": 0.22199922800064087, "step": 240, "token_acc": 0.9347892956013534 }, { "epoch": 0.05084388185654008, "grad_norm": 0.6796875, "learning_rate": 9.999256402513168e-07, "loss": 0.2756049931049347, "step": 241, "token_acc": 0.9229754682141915 }, { "epoch": 0.0510548523206751, "grad_norm": 1.7421875, "learning_rate": 9.999226956697138e-07, "loss": 0.2459474354982376, "step": 242, "token_acc": 0.9287122207621551 }, { "epoch": 0.05126582278481013, "grad_norm": 0.7265625, "learning_rate": 9.999196939177195e-07, "loss": 0.26543667912483215, "step": 243, "token_acc": 0.9251445086705202 }, { "epoch": 0.05147679324894515, "grad_norm": 0.69921875, "learning_rate": 9.999166349956768e-07, "loss": 0.29306668043136597, "step": 244, "token_acc": 0.922089552238806 }, { "epoch": 0.05168776371308017, "grad_norm": 0.78515625, "learning_rate": 9.999135189039356e-07, "loss": 0.232993021607399, "step": 245, "token_acc": 0.933374460209747 }, { "epoch": 0.05189873417721519, "grad_norm": 0.703125, "learning_rate": 9.999103456428522e-07, "loss": 0.29452502727508545, "step": 246, "token_acc": 0.9255251432208784 }, { "epoch": 0.05210970464135021, "grad_norm": 0.75, "learning_rate": 9.999071152127897e-07, "loss": 0.2289431095123291, "step": 247, "token_acc": 0.9372047791053071 }, { "epoch": 0.05232067510548523, "grad_norm": 0.8046875, "learning_rate": 9.999038276141175e-07, "loss": 0.3194141983985901, "step": 248, "token_acc": 0.914375 }, { "epoch": 0.052531645569620256, "grad_norm": 0.68359375, "learning_rate": 9.999004828472112e-07, "loss": 0.24136003851890564, "step": 249, "token_acc": 0.9315025252525253 }, { "epoch": 0.052742616033755275, "grad_norm": 0.74609375, "learning_rate": 9.998970809124537e-07, "loss": 0.31663718819618225, "step": 250, "token_acc": 0.9186367823150138 }, { "epoch": 0.052953586497890295, "grad_norm": 0.765625, "learning_rate": 9.998936218102338e-07, "loss": 0.2638603448867798, "step": 251, "token_acc": 0.9242610837438424 }, { "epoch": 0.053164556962025315, "grad_norm": 0.62109375, "learning_rate": 9.998901055409474e-07, "loss": 0.26234734058380127, "step": 252, "token_acc": 0.9283480238839921 }, { "epoch": 0.053375527426160335, "grad_norm": 0.78125, "learning_rate": 9.99886532104996e-07, "loss": 0.27683377265930176, "step": 253, "token_acc": 0.9228208232445521 }, { "epoch": 0.05358649789029536, "grad_norm": 0.65234375, "learning_rate": 9.99882901502789e-07, "loss": 0.20958667993545532, "step": 254, "token_acc": 0.9367622259696459 }, { "epoch": 0.05379746835443038, "grad_norm": 0.765625, "learning_rate": 9.998792137347412e-07, "loss": 0.2769642174243927, "step": 255, "token_acc": 0.9259877573734001 }, { "epoch": 0.0540084388185654, "grad_norm": 0.7890625, "learning_rate": 9.998754688012744e-07, "loss": 0.291420578956604, "step": 256, "token_acc": 0.9195469067673541 }, { "epoch": 0.05421940928270042, "grad_norm": 0.7109375, "learning_rate": 9.998716667028166e-07, "loss": 0.2671175003051758, "step": 257, "token_acc": 0.9248520710059172 }, { "epoch": 0.05443037974683544, "grad_norm": 0.640625, "learning_rate": 9.99867807439803e-07, "loss": 0.2104148268699646, "step": 258, "token_acc": 0.9412735070933685 }, { "epoch": 0.05464135021097046, "grad_norm": 0.8125, "learning_rate": 9.99863891012675e-07, "loss": 0.25562331080436707, "step": 259, "token_acc": 0.9300422386483632 }, { "epoch": 0.05485232067510549, "grad_norm": 0.93359375, "learning_rate": 9.998599174218797e-07, "loss": 0.25945645570755005, "step": 260, "token_acc": 0.9278557114228457 }, { "epoch": 0.05506329113924051, "grad_norm": 0.76953125, "learning_rate": 9.998558866678726e-07, "loss": 0.2575325667858124, "step": 261, "token_acc": 0.9237336368810473 }, { "epoch": 0.05527426160337553, "grad_norm": 0.79296875, "learning_rate": 9.998517987511139e-07, "loss": 0.21312668919563293, "step": 262, "token_acc": 0.9391352244560727 }, { "epoch": 0.05548523206751055, "grad_norm": 1.5390625, "learning_rate": 9.998476536720712e-07, "loss": 0.27397406101226807, "step": 263, "token_acc": 0.9302109181141439 }, { "epoch": 0.05569620253164557, "grad_norm": 0.7265625, "learning_rate": 9.998434514312187e-07, "loss": 0.27095240354537964, "step": 264, "token_acc": 0.9266853059956508 }, { "epoch": 0.05590717299578059, "grad_norm": 0.76953125, "learning_rate": 9.99839192029037e-07, "loss": 0.237601175904274, "step": 265, "token_acc": 0.9364719228587635 }, { "epoch": 0.056118143459915615, "grad_norm": 0.7421875, "learning_rate": 9.998348754660129e-07, "loss": 0.2851409316062927, "step": 266, "token_acc": 0.9176470588235294 }, { "epoch": 0.056329113924050635, "grad_norm": 0.796875, "learning_rate": 9.998305017426403e-07, "loss": 0.26605701446533203, "step": 267, "token_acc": 0.9261460101867572 }, { "epoch": 0.056540084388185655, "grad_norm": 0.67578125, "learning_rate": 9.998260708594192e-07, "loss": 0.26237568259239197, "step": 268, "token_acc": 0.9257142857142857 }, { "epoch": 0.056751054852320675, "grad_norm": 0.859375, "learning_rate": 9.998215828168566e-07, "loss": 0.2315206527709961, "step": 269, "token_acc": 0.9332755632582322 }, { "epoch": 0.056962025316455694, "grad_norm": 0.70703125, "learning_rate": 9.998170376154654e-07, "loss": 0.26748204231262207, "step": 270, "token_acc": 0.9308067757680161 }, { "epoch": 0.057172995780590714, "grad_norm": 0.9609375, "learning_rate": 9.998124352557655e-07, "loss": 0.33397209644317627, "step": 271, "token_acc": 0.9161147902869757 }, { "epoch": 0.05738396624472574, "grad_norm": 0.71484375, "learning_rate": 9.998077757382835e-07, "loss": 0.2637864351272583, "step": 272, "token_acc": 0.9291949563530553 }, { "epoch": 0.05759493670886076, "grad_norm": 0.734375, "learning_rate": 9.998030590635517e-07, "loss": 0.2878430485725403, "step": 273, "token_acc": 0.919882100750268 }, { "epoch": 0.05780590717299578, "grad_norm": 0.65625, "learning_rate": 9.997982852321099e-07, "loss": 0.2438146024942398, "step": 274, "token_acc": 0.9312955692652832 }, { "epoch": 0.0580168776371308, "grad_norm": 0.74609375, "learning_rate": 9.99793454244504e-07, "loss": 0.2523839771747589, "step": 275, "token_acc": 0.9254349627174814 }, { "epoch": 0.05822784810126582, "grad_norm": 0.609375, "learning_rate": 9.997885661012865e-07, "loss": 0.23295487463474274, "step": 276, "token_acc": 0.937351934719663 }, { "epoch": 0.05843881856540084, "grad_norm": 0.77734375, "learning_rate": 9.99783620803016e-07, "loss": 0.287087619304657, "step": 277, "token_acc": 0.9255798969072165 }, { "epoch": 0.05864978902953587, "grad_norm": 0.65234375, "learning_rate": 9.997786183502584e-07, "loss": 0.23424138128757477, "step": 278, "token_acc": 0.924812030075188 }, { "epoch": 0.05886075949367089, "grad_norm": 0.9296875, "learning_rate": 9.997735587435858e-07, "loss": 0.25225332379341125, "step": 279, "token_acc": 0.9257325210327821 }, { "epoch": 0.05907172995780591, "grad_norm": 0.62890625, "learning_rate": 9.997684419835767e-07, "loss": 0.24867427349090576, "step": 280, "token_acc": 0.9290465631929047 }, { "epoch": 0.05928270042194093, "grad_norm": 1.265625, "learning_rate": 9.997632680708163e-07, "loss": 0.2555754482746124, "step": 281, "token_acc": 0.9300212056952438 }, { "epoch": 0.05949367088607595, "grad_norm": 0.6796875, "learning_rate": 9.99758037005896e-07, "loss": 0.25050830841064453, "step": 282, "token_acc": 0.9328039095907147 }, { "epoch": 0.05970464135021097, "grad_norm": 0.77734375, "learning_rate": 9.997527487894144e-07, "loss": 0.264704167842865, "step": 283, "token_acc": 0.9269878805793674 }, { "epoch": 0.059915611814345994, "grad_norm": 0.765625, "learning_rate": 9.997474034219762e-07, "loss": 0.29550492763519287, "step": 284, "token_acc": 0.9211438474870017 }, { "epoch": 0.060126582278481014, "grad_norm": 0.7109375, "learning_rate": 9.997420009041927e-07, "loss": 0.264403373003006, "step": 285, "token_acc": 0.9260048721071864 }, { "epoch": 0.060337552742616034, "grad_norm": 0.78125, "learning_rate": 9.997365412366812e-07, "loss": 0.2595897316932678, "step": 286, "token_acc": 0.9286173633440514 }, { "epoch": 0.060548523206751054, "grad_norm": 0.70703125, "learning_rate": 9.997310244200667e-07, "loss": 0.23976776003837585, "step": 287, "token_acc": 0.9318894271872328 }, { "epoch": 0.060759493670886074, "grad_norm": 0.9140625, "learning_rate": 9.997254504549799e-07, "loss": 0.26183438301086426, "step": 288, "token_acc": 0.9322949777495232 }, { "epoch": 0.0609704641350211, "grad_norm": 1.0078125, "learning_rate": 9.99719819342058e-07, "loss": 0.24600914120674133, "step": 289, "token_acc": 0.9311287236949987 }, { "epoch": 0.06118143459915612, "grad_norm": 0.83984375, "learning_rate": 9.997141310819454e-07, "loss": 0.3296029567718506, "step": 290, "token_acc": 0.9126184834123223 }, { "epoch": 0.06139240506329114, "grad_norm": 0.85546875, "learning_rate": 9.997083856752923e-07, "loss": 0.2794192433357239, "step": 291, "token_acc": 0.9190751445086706 }, { "epoch": 0.06160337552742616, "grad_norm": 0.6953125, "learning_rate": 9.997025831227557e-07, "loss": 0.23178298771381378, "step": 292, "token_acc": 0.9380645161290323 }, { "epoch": 0.06181434599156118, "grad_norm": 0.87109375, "learning_rate": 9.996967234249994e-07, "loss": 0.2989250123500824, "step": 293, "token_acc": 0.9201101928374655 }, { "epoch": 0.0620253164556962, "grad_norm": 0.625, "learning_rate": 9.996908065826935e-07, "loss": 0.20801636576652527, "step": 294, "token_acc": 0.9374828626268166 }, { "epoch": 0.06223628691983123, "grad_norm": 0.68359375, "learning_rate": 9.996848325965142e-07, "loss": 0.2513968050479889, "step": 295, "token_acc": 0.9286151960784313 }, { "epoch": 0.06244725738396625, "grad_norm": 0.77734375, "learning_rate": 9.99678801467145e-07, "loss": 0.23670442402362823, "step": 296, "token_acc": 0.9297945205479452 }, { "epoch": 0.06265822784810127, "grad_norm": 0.71875, "learning_rate": 9.99672713195276e-07, "loss": 0.3005760908126831, "step": 297, "token_acc": 0.9181008902077151 }, { "epoch": 0.0628691983122363, "grad_norm": 0.64453125, "learning_rate": 9.996665677816027e-07, "loss": 0.2198331356048584, "step": 298, "token_acc": 0.934411226357535 }, { "epoch": 0.0630801687763713, "grad_norm": 0.7421875, "learning_rate": 9.996603652268283e-07, "loss": 0.22930385172367096, "step": 299, "token_acc": 0.9343891402714932 }, { "epoch": 0.06329113924050633, "grad_norm": 0.6953125, "learning_rate": 9.99654105531662e-07, "loss": 0.26769182085990906, "step": 300, "token_acc": 0.9295731707317073 }, { "epoch": 0.06350210970464135, "grad_norm": 0.58203125, "learning_rate": 9.9964778869682e-07, "loss": 0.2113886922597885, "step": 301, "token_acc": 0.9383966244725739 }, { "epoch": 0.06371308016877637, "grad_norm": 0.78515625, "learning_rate": 9.996414147230242e-07, "loss": 0.2549387812614441, "step": 302, "token_acc": 0.9245056920311564 }, { "epoch": 0.06392405063291139, "grad_norm": 0.83203125, "learning_rate": 9.996349836110035e-07, "loss": 0.24877741932868958, "step": 303, "token_acc": 0.9278890600924499 }, { "epoch": 0.06413502109704641, "grad_norm": 0.85546875, "learning_rate": 9.996284953614938e-07, "loss": 0.2965357303619385, "step": 304, "token_acc": 0.9167351410572446 }, { "epoch": 0.06434599156118144, "grad_norm": 0.6484375, "learning_rate": 9.996219499752365e-07, "loss": 0.21444806456565857, "step": 305, "token_acc": 0.938101788170564 }, { "epoch": 0.06455696202531645, "grad_norm": 0.671875, "learning_rate": 9.996153474529807e-07, "loss": 0.24650560319423676, "step": 306, "token_acc": 0.928284854563691 }, { "epoch": 0.06476793248945148, "grad_norm": 0.73046875, "learning_rate": 9.996086877954812e-07, "loss": 0.26447594165802, "step": 307, "token_acc": 0.9272459499263623 }, { "epoch": 0.06497890295358649, "grad_norm": 0.6484375, "learning_rate": 9.996019710034997e-07, "loss": 0.22312676906585693, "step": 308, "token_acc": 0.9304399524375743 }, { "epoch": 0.06518987341772152, "grad_norm": 0.75, "learning_rate": 9.99595197077804e-07, "loss": 0.3124806582927704, "step": 309, "token_acc": 0.9134506242905789 }, { "epoch": 0.06540084388185655, "grad_norm": 0.6640625, "learning_rate": 9.99588366019169e-07, "loss": 0.21681739389896393, "step": 310, "token_acc": 0.9393859879296772 }, { "epoch": 0.06561181434599156, "grad_norm": 0.7734375, "learning_rate": 9.99581477828376e-07, "loss": 0.262542188167572, "step": 311, "token_acc": 0.9339063426200356 }, { "epoch": 0.06582278481012659, "grad_norm": 0.77734375, "learning_rate": 9.995745325062126e-07, "loss": 0.24062800407409668, "step": 312, "token_acc": 0.9331594391913922 }, { "epoch": 0.0660337552742616, "grad_norm": 0.71484375, "learning_rate": 9.995675300534729e-07, "loss": 0.26486438512802124, "step": 313, "token_acc": 0.9250824093497153 }, { "epoch": 0.06624472573839663, "grad_norm": 0.54296875, "learning_rate": 9.995604704709578e-07, "loss": 0.18465927243232727, "step": 314, "token_acc": 0.9464985994397759 }, { "epoch": 0.06645569620253164, "grad_norm": 0.91015625, "learning_rate": 9.99553353759475e-07, "loss": 0.2520803213119507, "step": 315, "token_acc": 0.9257213014119091 }, { "epoch": 0.06666666666666667, "grad_norm": 0.65625, "learning_rate": 9.995461799198378e-07, "loss": 0.29753151535987854, "step": 316, "token_acc": 0.9256516587677726 }, { "epoch": 0.06687763713080169, "grad_norm": 0.71875, "learning_rate": 9.995389489528667e-07, "loss": 0.2546613812446594, "step": 317, "token_acc": 0.9310240048617442 }, { "epoch": 0.0670886075949367, "grad_norm": 1.4140625, "learning_rate": 9.995316608593886e-07, "loss": 0.24808946251869202, "step": 318, "token_acc": 0.9316290130796671 }, { "epoch": 0.06729957805907173, "grad_norm": 0.73046875, "learning_rate": 9.995243156402374e-07, "loss": 0.2512444853782654, "step": 319, "token_acc": 0.9294703723125328 }, { "epoch": 0.06751054852320675, "grad_norm": 0.921875, "learning_rate": 9.995169132962527e-07, "loss": 0.2597760260105133, "step": 320, "token_acc": 0.9277310924369748 }, { "epoch": 0.06772151898734177, "grad_norm": 0.65234375, "learning_rate": 9.99509453828281e-07, "loss": 0.2552398443222046, "step": 321, "token_acc": 0.9278263321116437 }, { "epoch": 0.0679324894514768, "grad_norm": 0.75390625, "learning_rate": 9.995019372371754e-07, "loss": 0.29060834646224976, "step": 322, "token_acc": 0.9247988807275271 }, { "epoch": 0.06814345991561181, "grad_norm": 0.57421875, "learning_rate": 9.994943635237955e-07, "loss": 0.21358612179756165, "step": 323, "token_acc": 0.9360210341805434 }, { "epoch": 0.06835443037974684, "grad_norm": 0.8671875, "learning_rate": 9.994867326890078e-07, "loss": 0.2634425759315491, "step": 324, "token_acc": 0.9219944937289691 }, { "epoch": 0.06856540084388185, "grad_norm": 0.82421875, "learning_rate": 9.994790447336842e-07, "loss": 0.3185754120349884, "step": 325, "token_acc": 0.9133514986376022 }, { "epoch": 0.06877637130801688, "grad_norm": 0.73828125, "learning_rate": 9.994712996587044e-07, "loss": 0.29746031761169434, "step": 326, "token_acc": 0.9256002705444707 }, { "epoch": 0.0689873417721519, "grad_norm": 0.76953125, "learning_rate": 9.994634974649541e-07, "loss": 0.29588472843170166, "step": 327, "token_acc": 0.9272846380609236 }, { "epoch": 0.06919831223628692, "grad_norm": 0.875, "learning_rate": 9.994556381533252e-07, "loss": 0.277068167924881, "step": 328, "token_acc": 0.922756981580511 }, { "epoch": 0.06940928270042195, "grad_norm": 0.73046875, "learning_rate": 9.994477217247168e-07, "loss": 0.27129507064819336, "step": 329, "token_acc": 0.9270741068792442 }, { "epoch": 0.06962025316455696, "grad_norm": 0.8671875, "learning_rate": 9.994397481800342e-07, "loss": 0.24473360180854797, "step": 330, "token_acc": 0.9313361611876988 }, { "epoch": 0.06983122362869199, "grad_norm": 0.69140625, "learning_rate": 9.994317175201893e-07, "loss": 0.22818127274513245, "step": 331, "token_acc": 0.9380833851897946 }, { "epoch": 0.070042194092827, "grad_norm": 0.6953125, "learning_rate": 9.994236297461003e-07, "loss": 0.262783944606781, "step": 332, "token_acc": 0.9235968263297091 }, { "epoch": 0.07025316455696203, "grad_norm": 0.63671875, "learning_rate": 9.994154848586919e-07, "loss": 0.24861930310726166, "step": 333, "token_acc": 0.930621342992477 }, { "epoch": 0.07046413502109705, "grad_norm": 0.62109375, "learning_rate": 9.99407282858896e-07, "loss": 0.26165515184402466, "step": 334, "token_acc": 0.9247956403269755 }, { "epoch": 0.07067510548523206, "grad_norm": 0.64453125, "learning_rate": 9.993990237476504e-07, "loss": 0.23681169748306274, "step": 335, "token_acc": 0.9285078611687927 }, { "epoch": 0.07088607594936709, "grad_norm": 1.65625, "learning_rate": 9.993907075258994e-07, "loss": 0.2824210524559021, "step": 336, "token_acc": 0.925273390036452 }, { "epoch": 0.0710970464135021, "grad_norm": 0.6796875, "learning_rate": 9.993823341945942e-07, "loss": 0.2578677535057068, "step": 337, "token_acc": 0.9232230059685296 }, { "epoch": 0.07130801687763713, "grad_norm": 0.69140625, "learning_rate": 9.993739037546924e-07, "loss": 0.25358960032463074, "step": 338, "token_acc": 0.9312054539820267 }, { "epoch": 0.07151898734177216, "grad_norm": 0.83984375, "learning_rate": 9.99365416207158e-07, "loss": 0.2980523705482483, "step": 339, "token_acc": 0.9155807365439094 }, { "epoch": 0.07172995780590717, "grad_norm": 0.9296875, "learning_rate": 9.993568715529616e-07, "loss": 0.29448622465133667, "step": 340, "token_acc": 0.9224402207234825 }, { "epoch": 0.0719409282700422, "grad_norm": 0.6328125, "learning_rate": 9.993482697930805e-07, "loss": 0.2686302065849304, "step": 341, "token_acc": 0.9284134881149807 }, { "epoch": 0.07215189873417721, "grad_norm": 0.75, "learning_rate": 9.993396109284985e-07, "loss": 0.2800794839859009, "step": 342, "token_acc": 0.9271758436944938 }, { "epoch": 0.07236286919831224, "grad_norm": 0.765625, "learning_rate": 9.993308949602054e-07, "loss": 0.2576884329319, "step": 343, "token_acc": 0.9227618490345231 }, { "epoch": 0.07257383966244725, "grad_norm": 0.65625, "learning_rate": 9.993221218891982e-07, "loss": 0.24451857805252075, "step": 344, "token_acc": 0.933461117196057 }, { "epoch": 0.07278481012658228, "grad_norm": 0.95703125, "learning_rate": 9.993132917164801e-07, "loss": 0.2957763075828552, "step": 345, "token_acc": 0.9123943661971831 }, { "epoch": 0.0729957805907173, "grad_norm": 1.6875, "learning_rate": 9.99304404443061e-07, "loss": 0.253467321395874, "step": 346, "token_acc": 0.9318112633181126 }, { "epoch": 0.07320675105485232, "grad_norm": 0.8046875, "learning_rate": 9.99295460069957e-07, "loss": 0.2847754955291748, "step": 347, "token_acc": 0.9224688355123137 }, { "epoch": 0.07341772151898734, "grad_norm": 0.73046875, "learning_rate": 9.992864585981913e-07, "loss": 0.25176408886909485, "step": 348, "token_acc": 0.9317745035233824 }, { "epoch": 0.07362869198312236, "grad_norm": 0.96484375, "learning_rate": 9.99277400028793e-07, "loss": 0.282976359128952, "step": 349, "token_acc": 0.9220568335588634 }, { "epoch": 0.07383966244725738, "grad_norm": 0.64453125, "learning_rate": 9.992682843627984e-07, "loss": 0.2807369530200958, "step": 350, "token_acc": 0.9215148188803512 }, { "epoch": 0.07405063291139241, "grad_norm": 0.7265625, "learning_rate": 9.992591116012495e-07, "loss": 0.2882058322429657, "step": 351, "token_acc": 0.9216602528862012 }, { "epoch": 0.07426160337552742, "grad_norm": 0.73828125, "learning_rate": 9.992498817451955e-07, "loss": 0.27112358808517456, "step": 352, "token_acc": 0.9312214611872146 }, { "epoch": 0.07447257383966245, "grad_norm": 0.77734375, "learning_rate": 9.99240594795692e-07, "loss": 0.25564056634902954, "step": 353, "token_acc": 0.9308086560364465 }, { "epoch": 0.07468354430379746, "grad_norm": 0.66796875, "learning_rate": 9.99231250753801e-07, "loss": 0.21060852706432343, "step": 354, "token_acc": 0.9366489046773239 }, { "epoch": 0.07489451476793249, "grad_norm": 0.5546875, "learning_rate": 9.992218496205908e-07, "loss": 0.23291520774364471, "step": 355, "token_acc": 0.9379619852164731 }, { "epoch": 0.0751054852320675, "grad_norm": 0.703125, "learning_rate": 9.99212391397137e-07, "loss": 0.23014740645885468, "step": 356, "token_acc": 0.930849478390462 }, { "epoch": 0.07531645569620253, "grad_norm": 0.73046875, "learning_rate": 9.992028760845207e-07, "loss": 0.2653324604034424, "step": 357, "token_acc": 0.9264833574529667 }, { "epoch": 0.07552742616033756, "grad_norm": 0.76171875, "learning_rate": 9.991933036838303e-07, "loss": 0.23712849617004395, "step": 358, "token_acc": 0.9348139601961349 }, { "epoch": 0.07573839662447257, "grad_norm": 0.61328125, "learning_rate": 9.991836741961605e-07, "loss": 0.24832651019096375, "step": 359, "token_acc": 0.9297736506094022 }, { "epoch": 0.0759493670886076, "grad_norm": 2.21875, "learning_rate": 9.991739876226127e-07, "loss": 0.30170413851737976, "step": 360, "token_acc": 0.9175753688261706 }, { "epoch": 0.07616033755274261, "grad_norm": 0.6875, "learning_rate": 9.991642439642944e-07, "loss": 0.2096886932849884, "step": 361, "token_acc": 0.9416713404374649 }, { "epoch": 0.07637130801687764, "grad_norm": 0.609375, "learning_rate": 9.991544432223198e-07, "loss": 0.24230161309242249, "step": 362, "token_acc": 0.9317358595709111 }, { "epoch": 0.07658227848101266, "grad_norm": 0.79296875, "learning_rate": 9.991445853978098e-07, "loss": 0.2464846670627594, "step": 363, "token_acc": 0.9277708592777086 }, { "epoch": 0.07679324894514768, "grad_norm": 0.99609375, "learning_rate": 9.991346704918918e-07, "loss": 0.25032496452331543, "step": 364, "token_acc": 0.931261207411835 }, { "epoch": 0.0770042194092827, "grad_norm": 1.0, "learning_rate": 9.991246985056995e-07, "loss": 0.3197912871837616, "step": 365, "token_acc": 0.9187062937062938 }, { "epoch": 0.07721518987341772, "grad_norm": 0.6953125, "learning_rate": 9.991146694403733e-07, "loss": 0.2740510404109955, "step": 366, "token_acc": 0.9192671056398511 }, { "epoch": 0.07742616033755274, "grad_norm": 0.875, "learning_rate": 9.991045832970603e-07, "loss": 0.29503384232521057, "step": 367, "token_acc": 0.919302394324564 }, { "epoch": 0.07763713080168777, "grad_norm": 0.796875, "learning_rate": 9.990944400769138e-07, "loss": 0.27579015493392944, "step": 368, "token_acc": 0.9176502882239912 }, { "epoch": 0.07784810126582278, "grad_norm": 0.8828125, "learning_rate": 9.99084239781094e-07, "loss": 0.2694048583507538, "step": 369, "token_acc": 0.925148762918885 }, { "epoch": 0.07805907172995781, "grad_norm": 0.7578125, "learning_rate": 9.990739824107669e-07, "loss": 0.2885046601295471, "step": 370, "token_acc": 0.9219858156028369 }, { "epoch": 0.07827004219409282, "grad_norm": 1.109375, "learning_rate": 9.99063667967106e-07, "loss": 0.2373015433549881, "step": 371, "token_acc": 0.9304477611940298 }, { "epoch": 0.07848101265822785, "grad_norm": 0.93359375, "learning_rate": 9.990532964512901e-07, "loss": 0.29645416140556335, "step": 372, "token_acc": 0.918646080760095 }, { "epoch": 0.07869198312236286, "grad_norm": 0.66796875, "learning_rate": 9.990428678645062e-07, "loss": 0.24266409873962402, "step": 373, "token_acc": 0.9363528715216104 }, { "epoch": 0.07890295358649789, "grad_norm": 0.8046875, "learning_rate": 9.990323822079464e-07, "loss": 0.2219400256872177, "step": 374, "token_acc": 0.9366262814538676 }, { "epoch": 0.07911392405063292, "grad_norm": 0.609375, "learning_rate": 9.9902183948281e-07, "loss": 0.2171144187450409, "step": 375, "token_acc": 0.937206572769953 }, { "epoch": 0.07932489451476793, "grad_norm": 0.61328125, "learning_rate": 9.990112396903027e-07, "loss": 0.23284628987312317, "step": 376, "token_acc": 0.9356833642547928 }, { "epoch": 0.07953586497890296, "grad_norm": 0.765625, "learning_rate": 9.990005828316363e-07, "loss": 0.26610440015792847, "step": 377, "token_acc": 0.9276832460732984 }, { "epoch": 0.07974683544303797, "grad_norm": 0.55859375, "learning_rate": 9.989898689080299e-07, "loss": 0.19865182042121887, "step": 378, "token_acc": 0.946978672985782 }, { "epoch": 0.079957805907173, "grad_norm": 0.73046875, "learning_rate": 9.989790979207085e-07, "loss": 0.2547116279602051, "step": 379, "token_acc": 0.9301753306674869 }, { "epoch": 0.08016877637130802, "grad_norm": 0.71484375, "learning_rate": 9.98968269870904e-07, "loss": 0.2702917456626892, "step": 380, "token_acc": 0.9255110613273593 }, { "epoch": 0.08037974683544304, "grad_norm": 0.67578125, "learning_rate": 9.989573847598545e-07, "loss": 0.24545586109161377, "step": 381, "token_acc": 0.9353233830845771 }, { "epoch": 0.08059071729957806, "grad_norm": 0.70703125, "learning_rate": 9.98946442588805e-07, "loss": 0.25997835397720337, "step": 382, "token_acc": 0.9318181818181818 }, { "epoch": 0.08080168776371308, "grad_norm": 0.921875, "learning_rate": 9.989354433590067e-07, "loss": 0.2865683436393738, "step": 383, "token_acc": 0.9229352164568622 }, { "epoch": 0.0810126582278481, "grad_norm": 0.93359375, "learning_rate": 9.989243870717174e-07, "loss": 0.25773969292640686, "step": 384, "token_acc": 0.9284097340124505 }, { "epoch": 0.08122362869198312, "grad_norm": 0.87890625, "learning_rate": 9.989132737282015e-07, "loss": 0.2665586471557617, "step": 385, "token_acc": 0.92599672310213 }, { "epoch": 0.08143459915611814, "grad_norm": 0.7890625, "learning_rate": 9.989021033297302e-07, "loss": 0.29251331090927124, "step": 386, "token_acc": 0.9255349500713267 }, { "epoch": 0.08164556962025317, "grad_norm": 0.90625, "learning_rate": 9.988908758775807e-07, "loss": 0.31350889801979065, "step": 387, "token_acc": 0.9161179501860864 }, { "epoch": 0.08185654008438818, "grad_norm": 0.77734375, "learning_rate": 9.98879591373037e-07, "loss": 0.2779198884963989, "step": 388, "token_acc": 0.9238483234095894 }, { "epoch": 0.08206751054852321, "grad_norm": 0.60546875, "learning_rate": 9.988682498173895e-07, "loss": 0.22718225419521332, "step": 389, "token_acc": 0.9420247204237787 }, { "epoch": 0.08227848101265822, "grad_norm": 0.609375, "learning_rate": 9.98856851211935e-07, "loss": 0.22414159774780273, "step": 390, "token_acc": 0.9357366771159875 }, { "epoch": 0.08248945147679325, "grad_norm": 0.74609375, "learning_rate": 9.988453955579776e-07, "loss": 0.2700081467628479, "step": 391, "token_acc": 0.9235555555555556 }, { "epoch": 0.08270042194092828, "grad_norm": 0.66796875, "learning_rate": 9.98833882856827e-07, "loss": 0.24140852689743042, "step": 392, "token_acc": 0.9277988101676582 }, { "epoch": 0.08291139240506329, "grad_norm": 0.8359375, "learning_rate": 9.988223131097996e-07, "loss": 0.28851526975631714, "step": 393, "token_acc": 0.9173528514791095 }, { "epoch": 0.08312236286919832, "grad_norm": 0.6796875, "learning_rate": 9.98810686318219e-07, "loss": 0.2673947215080261, "step": 394, "token_acc": 0.9220706930141943 }, { "epoch": 0.08333333333333333, "grad_norm": 0.66796875, "learning_rate": 9.98799002483414e-07, "loss": 0.27388495206832886, "step": 395, "token_acc": 0.9260257562144355 }, { "epoch": 0.08354430379746836, "grad_norm": 0.6484375, "learning_rate": 9.987872616067216e-07, "loss": 0.2556672692298889, "step": 396, "token_acc": 0.9255903349807798 }, { "epoch": 0.08375527426160338, "grad_norm": 0.96875, "learning_rate": 9.987754636894843e-07, "loss": 0.32614314556121826, "step": 397, "token_acc": 0.9155054847316929 }, { "epoch": 0.0839662447257384, "grad_norm": 0.60546875, "learning_rate": 9.987636087330509e-07, "loss": 0.23008616268634796, "step": 398, "token_acc": 0.9367875647668393 }, { "epoch": 0.08417721518987342, "grad_norm": 0.73828125, "learning_rate": 9.987516967387775e-07, "loss": 0.2754250764846802, "step": 399, "token_acc": 0.9202546998180715 }, { "epoch": 0.08438818565400844, "grad_norm": 0.671875, "learning_rate": 9.98739727708026e-07, "loss": 0.23332414031028748, "step": 400, "token_acc": 0.9356028368794326 }, { "epoch": 0.08438818565400844, "eval_loss": 0.43364420533180237, "eval_runtime": 245.8014, "eval_samples_per_second": 137.123, "eval_steps_per_second": 2.144, "eval_token_acc": 0.8990711657542853, "step": 400 }, { "epoch": 0.08459915611814346, "grad_norm": 0.7578125, "learning_rate": 9.987277016421654e-07, "loss": 0.2699899673461914, "step": 401, "token_acc": 0.9288135593220339 }, { "epoch": 0.08481012658227848, "grad_norm": 0.78515625, "learning_rate": 9.98715618542571e-07, "loss": 0.25560492277145386, "step": 402, "token_acc": 0.9252772913018097 }, { "epoch": 0.0850210970464135, "grad_norm": 0.7890625, "learning_rate": 9.987034784106244e-07, "loss": 0.3024590015411377, "step": 403, "token_acc": 0.9186206896551724 }, { "epoch": 0.08523206751054853, "grad_norm": 0.703125, "learning_rate": 9.98691281247714e-07, "loss": 0.2880774438381195, "step": 404, "token_acc": 0.9188865609099072 }, { "epoch": 0.08544303797468354, "grad_norm": 0.859375, "learning_rate": 9.986790270552347e-07, "loss": 0.2641194760799408, "step": 405, "token_acc": 0.9306480920654149 }, { "epoch": 0.08565400843881857, "grad_norm": 0.72265625, "learning_rate": 9.98666715834588e-07, "loss": 0.25727787613868713, "step": 406, "token_acc": 0.9288548752834467 }, { "epoch": 0.08586497890295358, "grad_norm": 0.61328125, "learning_rate": 9.986543475871818e-07, "loss": 0.2398534119129181, "step": 407, "token_acc": 0.9377962085308057 }, { "epoch": 0.08607594936708861, "grad_norm": 0.671875, "learning_rate": 9.986419223144302e-07, "loss": 0.25430333614349365, "step": 408, "token_acc": 0.9305912596401028 }, { "epoch": 0.08628691983122364, "grad_norm": 0.60546875, "learning_rate": 9.986294400177544e-07, "loss": 0.20513233542442322, "step": 409, "token_acc": 0.9459538416593631 }, { "epoch": 0.08649789029535865, "grad_norm": 0.5625, "learning_rate": 9.986169006985817e-07, "loss": 0.20912200212478638, "step": 410, "token_acc": 0.9390818128310771 }, { "epoch": 0.08670886075949367, "grad_norm": 0.67578125, "learning_rate": 9.986043043583462e-07, "loss": 0.2466573864221573, "step": 411, "token_acc": 0.9298196948682386 }, { "epoch": 0.08691983122362869, "grad_norm": 0.7578125, "learning_rate": 9.98591650998488e-07, "loss": 0.23664775490760803, "step": 412, "token_acc": 0.9329004329004329 }, { "epoch": 0.08713080168776371, "grad_norm": 0.640625, "learning_rate": 9.985789406204547e-07, "loss": 0.23415768146514893, "step": 413, "token_acc": 0.9317912218268091 }, { "epoch": 0.08734177215189873, "grad_norm": 0.75, "learning_rate": 9.985661732256998e-07, "loss": 0.2954852283000946, "step": 414, "token_acc": 0.9218203033838973 }, { "epoch": 0.08755274261603375, "grad_norm": 0.703125, "learning_rate": 9.98553348815683e-07, "loss": 0.2565650939941406, "step": 415, "token_acc": 0.9298298906439855 }, { "epoch": 0.08776371308016878, "grad_norm": 0.7578125, "learning_rate": 9.98540467391871e-07, "loss": 0.22425369918346405, "step": 416, "token_acc": 0.9359098228663446 }, { "epoch": 0.0879746835443038, "grad_norm": 0.73046875, "learning_rate": 9.98527528955737e-07, "loss": 0.2637927532196045, "step": 417, "token_acc": 0.9261443414771132 }, { "epoch": 0.08818565400843882, "grad_norm": 0.70703125, "learning_rate": 9.985145335087605e-07, "loss": 0.27013063430786133, "step": 418, "token_acc": 0.9248719408081958 }, { "epoch": 0.08839662447257383, "grad_norm": 0.62890625, "learning_rate": 9.985014810524278e-07, "loss": 0.25381606817245483, "step": 419, "token_acc": 0.9342265529841657 }, { "epoch": 0.08860759493670886, "grad_norm": 1.59375, "learning_rate": 9.984883715882315e-07, "loss": 0.2093265801668167, "step": 420, "token_acc": 0.936447410231967 }, { "epoch": 0.08881856540084389, "grad_norm": 0.65625, "learning_rate": 9.984752051176707e-07, "loss": 0.2633010447025299, "step": 421, "token_acc": 0.9257308401369502 }, { "epoch": 0.0890295358649789, "grad_norm": 0.6796875, "learning_rate": 9.98461981642251e-07, "loss": 0.28037169575691223, "step": 422, "token_acc": 0.9264617239300783 }, { "epoch": 0.08924050632911393, "grad_norm": 0.640625, "learning_rate": 9.984487011634848e-07, "loss": 0.23874756693840027, "step": 423, "token_acc": 0.9345043167609407 }, { "epoch": 0.08945147679324894, "grad_norm": 4.9375, "learning_rate": 9.984353636828908e-07, "loss": 0.2935020923614502, "step": 424, "token_acc": 0.9228658536585366 }, { "epoch": 0.08966244725738397, "grad_norm": 0.62890625, "learning_rate": 9.984219692019943e-07, "loss": 0.2578403949737549, "step": 425, "token_acc": 0.9282193468884782 }, { "epoch": 0.08987341772151898, "grad_norm": 0.73828125, "learning_rate": 9.98408517722327e-07, "loss": 0.24426788091659546, "step": 426, "token_acc": 0.9371653987038603 }, { "epoch": 0.09008438818565401, "grad_norm": 0.80859375, "learning_rate": 9.983950092454272e-07, "loss": 0.2677040994167328, "step": 427, "token_acc": 0.9215219976218787 }, { "epoch": 0.09029535864978903, "grad_norm": 0.73828125, "learning_rate": 9.983814437728396e-07, "loss": 0.2604065239429474, "step": 428, "token_acc": 0.9289383561643836 }, { "epoch": 0.09050632911392405, "grad_norm": 1.3125, "learning_rate": 9.983678213061157e-07, "loss": 0.24889585375785828, "step": 429, "token_acc": 0.9251644736842105 }, { "epoch": 0.09071729957805907, "grad_norm": 0.796875, "learning_rate": 9.983541418468134e-07, "loss": 0.2805905342102051, "step": 430, "token_acc": 0.9248591108328115 }, { "epoch": 0.09092827004219409, "grad_norm": 0.7734375, "learning_rate": 9.983404053964967e-07, "loss": 0.2725668251514435, "step": 431, "token_acc": 0.9280293116985082 }, { "epoch": 0.09113924050632911, "grad_norm": 0.84375, "learning_rate": 9.98326611956737e-07, "loss": 0.3275222182273865, "step": 432, "token_acc": 0.9095477386934674 }, { "epoch": 0.09135021097046414, "grad_norm": 0.91015625, "learning_rate": 9.98312761529111e-07, "loss": 0.27813225984573364, "step": 433, "token_acc": 0.9216018048505358 }, { "epoch": 0.09156118143459915, "grad_norm": 0.74609375, "learning_rate": 9.982988541152036e-07, "loss": 0.2637915015220642, "step": 434, "token_acc": 0.9244929797191888 }, { "epoch": 0.09177215189873418, "grad_norm": 1.0703125, "learning_rate": 9.982848897166042e-07, "loss": 0.2686794102191925, "step": 435, "token_acc": 0.9231661351116266 }, { "epoch": 0.0919831223628692, "grad_norm": 0.62890625, "learning_rate": 9.982708683349105e-07, "loss": 0.24819687008857727, "step": 436, "token_acc": 0.9376739009460211 }, { "epoch": 0.09219409282700422, "grad_norm": 0.75, "learning_rate": 9.982567899717256e-07, "loss": 0.25789859890937805, "step": 437, "token_acc": 0.9243027888446215 }, { "epoch": 0.09240506329113925, "grad_norm": 0.65625, "learning_rate": 9.982426546286596e-07, "loss": 0.2573246359825134, "step": 438, "token_acc": 0.9290590679726922 }, { "epoch": 0.09261603375527426, "grad_norm": 0.7578125, "learning_rate": 9.98228462307329e-07, "loss": 0.2979215383529663, "step": 439, "token_acc": 0.9225908372827805 }, { "epoch": 0.09282700421940929, "grad_norm": 0.703125, "learning_rate": 9.982142130093566e-07, "loss": 0.2403128445148468, "step": 440, "token_acc": 0.9290377519159807 }, { "epoch": 0.0930379746835443, "grad_norm": 0.7890625, "learning_rate": 9.98199906736372e-07, "loss": 0.2767883241176605, "step": 441, "token_acc": 0.9238754325259516 }, { "epoch": 0.09324894514767933, "grad_norm": 0.609375, "learning_rate": 9.981855434900115e-07, "loss": 0.25662270188331604, "step": 442, "token_acc": 0.9294367050272562 }, { "epoch": 0.09345991561181434, "grad_norm": 0.7109375, "learning_rate": 9.981711232719175e-07, "loss": 0.24665901064872742, "step": 443, "token_acc": 0.9237830319888735 }, { "epoch": 0.09367088607594937, "grad_norm": 0.7890625, "learning_rate": 9.98156646083739e-07, "loss": 0.23571115732192993, "step": 444, "token_acc": 0.9281145293938471 }, { "epoch": 0.0938818565400844, "grad_norm": 1.21875, "learning_rate": 9.981421119271316e-07, "loss": 0.2607622742652893, "step": 445, "token_acc": 0.9253941441441441 }, { "epoch": 0.0940928270042194, "grad_norm": 0.73046875, "learning_rate": 9.981275208037575e-07, "loss": 0.2898206114768982, "step": 446, "token_acc": 0.918967587034814 }, { "epoch": 0.09430379746835443, "grad_norm": 0.6640625, "learning_rate": 9.981128727152854e-07, "loss": 0.2372782677412033, "step": 447, "token_acc": 0.9295408605255558 }, { "epoch": 0.09451476793248945, "grad_norm": 0.65234375, "learning_rate": 9.980981676633903e-07, "loss": 0.22987963259220123, "step": 448, "token_acc": 0.9354383986467437 }, { "epoch": 0.09472573839662447, "grad_norm": 0.95703125, "learning_rate": 9.980834056497538e-07, "loss": 0.26702481508255005, "step": 449, "token_acc": 0.9252548131370328 }, { "epoch": 0.0949367088607595, "grad_norm": 0.76171875, "learning_rate": 9.98068586676064e-07, "loss": 0.27268826961517334, "step": 450, "token_acc": 0.9244391971664699 }, { "epoch": 0.09514767932489451, "grad_norm": 0.93359375, "learning_rate": 9.98053710744016e-07, "loss": 0.22254578769207, "step": 451, "token_acc": 0.9397944199706314 }, { "epoch": 0.09535864978902954, "grad_norm": 0.6484375, "learning_rate": 9.980387778553103e-07, "loss": 0.2529526948928833, "step": 452, "token_acc": 0.9301044083526682 }, { "epoch": 0.09556962025316455, "grad_norm": 0.93359375, "learning_rate": 9.980237880116553e-07, "loss": 0.2600526809692383, "step": 453, "token_acc": 0.9255893212155638 }, { "epoch": 0.09578059071729958, "grad_norm": 0.74609375, "learning_rate": 9.980087412147648e-07, "loss": 0.2552299499511719, "step": 454, "token_acc": 0.9276672694394213 }, { "epoch": 0.09599156118143459, "grad_norm": 0.984375, "learning_rate": 9.979936374663595e-07, "loss": 0.28409841656684875, "step": 455, "token_acc": 0.9230544177881802 }, { "epoch": 0.09620253164556962, "grad_norm": 0.83984375, "learning_rate": 9.979784767681668e-07, "loss": 0.256397545337677, "step": 456, "token_acc": 0.9331357048748353 }, { "epoch": 0.09641350210970465, "grad_norm": 0.69140625, "learning_rate": 9.979632591219207e-07, "loss": 0.2313995659351349, "step": 457, "token_acc": 0.9336188436830836 }, { "epoch": 0.09662447257383966, "grad_norm": 0.87890625, "learning_rate": 9.97947984529361e-07, "loss": 0.2967644929885864, "step": 458, "token_acc": 0.9195630585898709 }, { "epoch": 0.09683544303797469, "grad_norm": 0.76953125, "learning_rate": 9.979326529922348e-07, "loss": 0.30269140005111694, "step": 459, "token_acc": 0.9189985272459499 }, { "epoch": 0.0970464135021097, "grad_norm": 0.68359375, "learning_rate": 9.97917264512295e-07, "loss": 0.2591363787651062, "step": 460, "token_acc": 0.9290578887627696 }, { "epoch": 0.09725738396624473, "grad_norm": 0.84375, "learning_rate": 9.979018190913018e-07, "loss": 0.32560282945632935, "step": 461, "token_acc": 0.9178757980266976 }, { "epoch": 0.09746835443037975, "grad_norm": 0.83984375, "learning_rate": 9.978863167310213e-07, "loss": 0.2893942892551422, "step": 462, "token_acc": 0.924191063174114 }, { "epoch": 0.09767932489451477, "grad_norm": 0.625, "learning_rate": 9.978707574332266e-07, "loss": 0.2492993026971817, "step": 463, "token_acc": 0.9310970081595649 }, { "epoch": 0.09789029535864979, "grad_norm": 0.734375, "learning_rate": 9.978551411996967e-07, "loss": 0.27646076679229736, "step": 464, "token_acc": 0.9283480238839921 }, { "epoch": 0.0981012658227848, "grad_norm": 0.87890625, "learning_rate": 9.978394680322176e-07, "loss": 0.22209137678146362, "step": 465, "token_acc": 0.9360902255639098 }, { "epoch": 0.09831223628691983, "grad_norm": 0.84375, "learning_rate": 9.978237379325818e-07, "loss": 0.2588399648666382, "step": 466, "token_acc": 0.9257776408992916 }, { "epoch": 0.09852320675105486, "grad_norm": 0.88671875, "learning_rate": 9.978079509025878e-07, "loss": 0.3038383722305298, "step": 467, "token_acc": 0.9133605600933489 }, { "epoch": 0.09873417721518987, "grad_norm": 0.8125, "learning_rate": 9.977921069440415e-07, "loss": 0.24923110008239746, "step": 468, "token_acc": 0.9317915690866511 }, { "epoch": 0.0989451476793249, "grad_norm": 0.8515625, "learning_rate": 9.97776206058754e-07, "loss": 0.23833706974983215, "step": 469, "token_acc": 0.9329593267882188 }, { "epoch": 0.09915611814345991, "grad_norm": 0.66015625, "learning_rate": 9.977602482485445e-07, "loss": 0.25747478008270264, "step": 470, "token_acc": 0.9295946357817738 }, { "epoch": 0.09936708860759494, "grad_norm": 0.68359375, "learning_rate": 9.977442335152377e-07, "loss": 0.2688140571117401, "step": 471, "token_acc": 0.9248041775456919 }, { "epoch": 0.09957805907172995, "grad_norm": 0.65625, "learning_rate": 9.977281618606649e-07, "loss": 0.19290462136268616, "step": 472, "token_acc": 0.9412288512911843 }, { "epoch": 0.09978902953586498, "grad_norm": 0.66015625, "learning_rate": 9.977120332866638e-07, "loss": 0.24847334623336792, "step": 473, "token_acc": 0.9335578689528475 }, { "epoch": 0.1, "grad_norm": 0.73828125, "learning_rate": 9.976958477950794e-07, "loss": 0.24599069356918335, "step": 474, "token_acc": 0.9284507042253521 }, { "epoch": 0.10021097046413502, "grad_norm": 0.83203125, "learning_rate": 9.976796053877622e-07, "loss": 0.2468043714761734, "step": 475, "token_acc": 0.9286099137931034 }, { "epoch": 0.10042194092827005, "grad_norm": 0.73828125, "learning_rate": 9.976633060665697e-07, "loss": 0.2741178572177887, "step": 476, "token_acc": 0.9224250325945241 }, { "epoch": 0.10063291139240506, "grad_norm": 0.6484375, "learning_rate": 9.97646949833366e-07, "loss": 0.23784510791301727, "step": 477, "token_acc": 0.9328483491885842 }, { "epoch": 0.10084388185654009, "grad_norm": 0.66015625, "learning_rate": 9.976305366900216e-07, "loss": 0.23838309943675995, "step": 478, "token_acc": 0.9320939839917377 }, { "epoch": 0.10105485232067511, "grad_norm": 0.86328125, "learning_rate": 9.976140666384134e-07, "loss": 0.2787632346153259, "step": 479, "token_acc": 0.9210836277974087 }, { "epoch": 0.10126582278481013, "grad_norm": 0.7265625, "learning_rate": 9.97597539680425e-07, "loss": 0.2557927370071411, "step": 480, "token_acc": 0.9304747320061256 }, { "epoch": 0.10147679324894515, "grad_norm": 0.8359375, "learning_rate": 9.975809558179463e-07, "loss": 0.2617788314819336, "step": 481, "token_acc": 0.9297163995067818 }, { "epoch": 0.10168776371308016, "grad_norm": 0.77734375, "learning_rate": 9.975643150528737e-07, "loss": 0.24790287017822266, "step": 482, "token_acc": 0.9321890827236916 }, { "epoch": 0.10189873417721519, "grad_norm": 0.62109375, "learning_rate": 9.975476173871102e-07, "loss": 0.22530625760555267, "step": 483, "token_acc": 0.9375520399666945 }, { "epoch": 0.1021097046413502, "grad_norm": 0.68359375, "learning_rate": 9.975308628225657e-07, "loss": 0.23794007301330566, "step": 484, "token_acc": 0.9279202279202279 }, { "epoch": 0.10232067510548523, "grad_norm": 0.71875, "learning_rate": 9.975140513611558e-07, "loss": 0.2270554155111313, "step": 485, "token_acc": 0.9365750528541226 }, { "epoch": 0.10253164556962026, "grad_norm": 0.77734375, "learning_rate": 9.974971830048033e-07, "loss": 0.23995614051818848, "step": 486, "token_acc": 0.9316101238556812 }, { "epoch": 0.10274261603375527, "grad_norm": 0.58203125, "learning_rate": 9.974802577554372e-07, "loss": 0.2599806487560272, "step": 487, "token_acc": 0.9297071129707113 }, { "epoch": 0.1029535864978903, "grad_norm": 0.703125, "learning_rate": 9.974632756149928e-07, "loss": 0.2610231935977936, "step": 488, "token_acc": 0.9277639922801213 }, { "epoch": 0.10316455696202531, "grad_norm": 0.69921875, "learning_rate": 9.974462365854124e-07, "loss": 0.2433297038078308, "step": 489, "token_acc": 0.9279082468596396 }, { "epoch": 0.10337552742616034, "grad_norm": 0.765625, "learning_rate": 9.974291406686446e-07, "loss": 0.21656793355941772, "step": 490, "token_acc": 0.9369074861065708 }, { "epoch": 0.10358649789029536, "grad_norm": 0.77734375, "learning_rate": 9.974119878666442e-07, "loss": 0.2721899151802063, "step": 491, "token_acc": 0.9287462605384824 }, { "epoch": 0.10379746835443038, "grad_norm": 0.87890625, "learning_rate": 9.973947781813731e-07, "loss": 0.25939926505088806, "step": 492, "token_acc": 0.9284844796104686 }, { "epoch": 0.1040084388185654, "grad_norm": 0.73828125, "learning_rate": 9.973775116147992e-07, "loss": 0.2712750732898712, "step": 493, "token_acc": 0.9242995689655172 }, { "epoch": 0.10421940928270042, "grad_norm": 0.8203125, "learning_rate": 9.97360188168897e-07, "loss": 0.2513953447341919, "step": 494, "token_acc": 0.929803328290469 }, { "epoch": 0.10443037974683544, "grad_norm": 0.62890625, "learning_rate": 9.973428078456475e-07, "loss": 0.2344273030757904, "step": 495, "token_acc": 0.9309220278683664 }, { "epoch": 0.10464135021097046, "grad_norm": 0.6796875, "learning_rate": 9.973253706470388e-07, "loss": 0.24709591269493103, "step": 496, "token_acc": 0.9282845668387837 }, { "epoch": 0.10485232067510548, "grad_norm": 0.734375, "learning_rate": 9.973078765750644e-07, "loss": 0.26154980063438416, "step": 497, "token_acc": 0.9249655172413793 }, { "epoch": 0.10506329113924051, "grad_norm": 0.671875, "learning_rate": 9.972903256317251e-07, "loss": 0.2260134369134903, "step": 498, "token_acc": 0.9395458566794456 }, { "epoch": 0.10527426160337552, "grad_norm": 0.80859375, "learning_rate": 9.972727178190281e-07, "loss": 0.33081650733947754, "step": 499, "token_acc": 0.9097568121886903 }, { "epoch": 0.10548523206751055, "grad_norm": 0.7109375, "learning_rate": 9.97255053138987e-07, "loss": 0.23815643787384033, "step": 500, "token_acc": 0.929639889196676 }, { "epoch": 0.10569620253164556, "grad_norm": 0.73046875, "learning_rate": 9.972373315936218e-07, "loss": 0.2648988962173462, "step": 501, "token_acc": 0.9245283018867925 }, { "epoch": 0.10590717299578059, "grad_norm": 0.765625, "learning_rate": 9.972195531849592e-07, "loss": 0.2421625256538391, "step": 502, "token_acc": 0.9339531901250401 }, { "epoch": 0.10611814345991562, "grad_norm": 0.53125, "learning_rate": 9.97201717915032e-07, "loss": 0.2174941599369049, "step": 503, "token_acc": 0.9379822806516147 }, { "epoch": 0.10632911392405063, "grad_norm": 0.9375, "learning_rate": 9.971838257858804e-07, "loss": 0.24187928438186646, "step": 504, "token_acc": 0.924503742271396 }, { "epoch": 0.10654008438818566, "grad_norm": 0.6640625, "learning_rate": 9.9716587679955e-07, "loss": 0.2514258921146393, "step": 505, "token_acc": 0.9230990783410138 }, { "epoch": 0.10675105485232067, "grad_norm": 0.69921875, "learning_rate": 9.971478709580937e-07, "loss": 0.282311350107193, "step": 506, "token_acc": 0.9201725997842503 }, { "epoch": 0.1069620253164557, "grad_norm": 0.66015625, "learning_rate": 9.971298082635705e-07, "loss": 0.2298332005739212, "step": 507, "token_acc": 0.936689779921616 }, { "epoch": 0.10717299578059072, "grad_norm": 0.7109375, "learning_rate": 9.971116887180461e-07, "loss": 0.26396387815475464, "step": 508, "token_acc": 0.9267986176562991 }, { "epoch": 0.10738396624472574, "grad_norm": 1.3046875, "learning_rate": 9.970935123235926e-07, "loss": 0.2639835476875305, "step": 509, "token_acc": 0.9236704326260677 }, { "epoch": 0.10759493670886076, "grad_norm": 0.75390625, "learning_rate": 9.970752790822886e-07, "loss": 0.27394697070121765, "step": 510, "token_acc": 0.9261565836298933 }, { "epoch": 0.10780590717299578, "grad_norm": 0.73828125, "learning_rate": 9.97056988996219e-07, "loss": 0.2229761779308319, "step": 511, "token_acc": 0.9351882160392798 }, { "epoch": 0.1080168776371308, "grad_norm": 0.73828125, "learning_rate": 9.970386420674758e-07, "loss": 0.26045358180999756, "step": 512, "token_acc": 0.9279547484370348 }, { "epoch": 0.10822784810126582, "grad_norm": 0.66015625, "learning_rate": 9.97020238298157e-07, "loss": 0.23026743531227112, "step": 513, "token_acc": 0.9363579080025205 }, { "epoch": 0.10843881856540084, "grad_norm": 0.76171875, "learning_rate": 9.970017776903671e-07, "loss": 0.2587951421737671, "step": 514, "token_acc": 0.9307073030477285 }, { "epoch": 0.10864978902953587, "grad_norm": 0.87109375, "learning_rate": 9.969832602462174e-07, "loss": 0.22050908207893372, "step": 515, "token_acc": 0.9343434343434344 }, { "epoch": 0.10886075949367088, "grad_norm": 0.7421875, "learning_rate": 9.969646859678256e-07, "loss": 0.25485992431640625, "step": 516, "token_acc": 0.9255828808687321 }, { "epoch": 0.10907172995780591, "grad_norm": 0.671875, "learning_rate": 9.969460548573156e-07, "loss": 0.24492983520030975, "step": 517, "token_acc": 0.9348314606741573 }, { "epoch": 0.10928270042194092, "grad_norm": 0.7265625, "learning_rate": 9.96927366916818e-07, "loss": 0.28373780846595764, "step": 518, "token_acc": 0.9258015267175572 }, { "epoch": 0.10949367088607595, "grad_norm": 0.75, "learning_rate": 9.969086221484701e-07, "loss": 0.2899026870727539, "step": 519, "token_acc": 0.9206824304100568 }, { "epoch": 0.10970464135021098, "grad_norm": 0.80078125, "learning_rate": 9.968898205544153e-07, "loss": 0.2812725305557251, "step": 520, "token_acc": 0.9226502311248074 }, { "epoch": 0.10991561181434599, "grad_norm": 0.6171875, "learning_rate": 9.968709621368041e-07, "loss": 0.24981510639190674, "step": 521, "token_acc": 0.9326704545454545 }, { "epoch": 0.11012658227848102, "grad_norm": 0.79296875, "learning_rate": 9.96852046897793e-07, "loss": 0.3039143681526184, "step": 522, "token_acc": 0.9203347799132052 }, { "epoch": 0.11033755274261603, "grad_norm": 0.92578125, "learning_rate": 9.968330748395448e-07, "loss": 0.2633418142795563, "step": 523, "token_acc": 0.9283835135925168 }, { "epoch": 0.11054852320675106, "grad_norm": 1.4609375, "learning_rate": 9.968140459642294e-07, "loss": 0.24586576223373413, "step": 524, "token_acc": 0.9281601316150261 }, { "epoch": 0.11075949367088607, "grad_norm": 0.8515625, "learning_rate": 9.967949602740228e-07, "loss": 0.2739730477333069, "step": 525, "token_acc": 0.9166417687481326 }, { "epoch": 0.1109704641350211, "grad_norm": 0.796875, "learning_rate": 9.967758177711076e-07, "loss": 0.2627703845500946, "step": 526, "token_acc": 0.9227409227409228 }, { "epoch": 0.11118143459915612, "grad_norm": 0.80859375, "learning_rate": 9.967566184576732e-07, "loss": 0.26023009419441223, "step": 527, "token_acc": 0.927381745502998 }, { "epoch": 0.11139240506329114, "grad_norm": 0.78125, "learning_rate": 9.967373623359148e-07, "loss": 0.24462240934371948, "step": 528, "token_acc": 0.9283416203568294 }, { "epoch": 0.11160337552742616, "grad_norm": 0.72265625, "learning_rate": 9.967180494080347e-07, "loss": 0.24981698393821716, "step": 529, "token_acc": 0.9291265153870065 }, { "epoch": 0.11181434599156118, "grad_norm": 0.78125, "learning_rate": 9.966986796762414e-07, "loss": 0.2446298450231552, "step": 530, "token_acc": 0.9370728929384966 }, { "epoch": 0.1120253164556962, "grad_norm": 2.421875, "learning_rate": 9.9667925314275e-07, "loss": 0.24656617641448975, "step": 531, "token_acc": 0.9356594110115237 }, { "epoch": 0.11223628691983123, "grad_norm": 0.8203125, "learning_rate": 9.966597698097823e-07, "loss": 0.2559359073638916, "step": 532, "token_acc": 0.9327158812312721 }, { "epoch": 0.11244725738396624, "grad_norm": 0.6640625, "learning_rate": 9.966402296795661e-07, "loss": 0.2284064143896103, "step": 533, "token_acc": 0.9354838709677419 }, { "epoch": 0.11265822784810127, "grad_norm": 0.77734375, "learning_rate": 9.966206327543362e-07, "loss": 0.2628895938396454, "step": 534, "token_acc": 0.923578751164958 }, { "epoch": 0.11286919831223628, "grad_norm": 0.6796875, "learning_rate": 9.966009790363337e-07, "loss": 0.2363075464963913, "step": 535, "token_acc": 0.9275167785234899 }, { "epoch": 0.11308016877637131, "grad_norm": 0.90625, "learning_rate": 9.965812685278059e-07, "loss": 0.2766547203063965, "step": 536, "token_acc": 0.9212160836874795 }, { "epoch": 0.11329113924050632, "grad_norm": 0.6875, "learning_rate": 9.96561501231007e-07, "loss": 0.24981704354286194, "step": 537, "token_acc": 0.9287794545935928 }, { "epoch": 0.11350210970464135, "grad_norm": 0.72265625, "learning_rate": 9.965416771481975e-07, "loss": 0.2477213591337204, "step": 538, "token_acc": 0.9247878255779924 }, { "epoch": 0.11371308016877638, "grad_norm": 0.97265625, "learning_rate": 9.965217962816446e-07, "loss": 0.2585391104221344, "step": 539, "token_acc": 0.9276463963963963 }, { "epoch": 0.11392405063291139, "grad_norm": 0.703125, "learning_rate": 9.965018586336218e-07, "loss": 0.24559935927391052, "step": 540, "token_acc": 0.9349939246658566 }, { "epoch": 0.11413502109704642, "grad_norm": 0.72265625, "learning_rate": 9.96481864206409e-07, "loss": 0.22781570255756378, "step": 541, "token_acc": 0.9306022623051055 }, { "epoch": 0.11434599156118143, "grad_norm": 1.0390625, "learning_rate": 9.964618130022931e-07, "loss": 0.2166275829076767, "step": 542, "token_acc": 0.9374454466104161 }, { "epoch": 0.11455696202531646, "grad_norm": 0.703125, "learning_rate": 9.964417050235665e-07, "loss": 0.267704039812088, "step": 543, "token_acc": 0.9261410788381743 }, { "epoch": 0.11476793248945148, "grad_norm": 0.8203125, "learning_rate": 9.964215402725294e-07, "loss": 0.23303918540477753, "step": 544, "token_acc": 0.9341683658607631 }, { "epoch": 0.1149789029535865, "grad_norm": 0.921875, "learning_rate": 9.964013187514872e-07, "loss": 0.33097875118255615, "step": 545, "token_acc": 0.9122987324426174 }, { "epoch": 0.11518987341772152, "grad_norm": 1.9453125, "learning_rate": 9.963810404627529e-07, "loss": 0.2524172067642212, "step": 546, "token_acc": 0.9373441396508728 }, { "epoch": 0.11540084388185654, "grad_norm": 0.60546875, "learning_rate": 9.963607054086453e-07, "loss": 0.25729498267173767, "step": 547, "token_acc": 0.9215870040612308 }, { "epoch": 0.11561181434599156, "grad_norm": 0.75, "learning_rate": 9.963403135914898e-07, "loss": 0.2928774356842041, "step": 548, "token_acc": 0.9192731605600238 }, { "epoch": 0.11582278481012659, "grad_norm": 0.6953125, "learning_rate": 9.963198650136184e-07, "loss": 0.25337544083595276, "step": 549, "token_acc": 0.9240650870682272 }, { "epoch": 0.1160337552742616, "grad_norm": 0.7109375, "learning_rate": 9.962993596773697e-07, "loss": 0.27310362458229065, "step": 550, "token_acc": 0.9247853124074622 }, { "epoch": 0.11624472573839663, "grad_norm": 0.640625, "learning_rate": 9.962787975850886e-07, "loss": 0.22571566700935364, "step": 551, "token_acc": 0.9384902143522833 }, { "epoch": 0.11645569620253164, "grad_norm": 0.8671875, "learning_rate": 9.962581787391265e-07, "loss": 0.25049251317977905, "step": 552, "token_acc": 0.9287239722370528 }, { "epoch": 0.11666666666666667, "grad_norm": 1.2734375, "learning_rate": 9.962375031418413e-07, "loss": 0.24676430225372314, "step": 553, "token_acc": 0.9325946445060018 }, { "epoch": 0.11687763713080168, "grad_norm": 0.75, "learning_rate": 9.962167707955977e-07, "loss": 0.22018642723560333, "step": 554, "token_acc": 0.9440233236151604 }, { "epoch": 0.11708860759493671, "grad_norm": 0.7421875, "learning_rate": 9.96195981702766e-07, "loss": 0.2333768904209137, "step": 555, "token_acc": 0.9352249928346231 }, { "epoch": 0.11729957805907174, "grad_norm": 0.703125, "learning_rate": 9.961751358657244e-07, "loss": 0.2830660939216614, "step": 556, "token_acc": 0.9188869153345175 }, { "epoch": 0.11751054852320675, "grad_norm": 0.81640625, "learning_rate": 9.961542332868564e-07, "loss": 0.26290833950042725, "step": 557, "token_acc": 0.9261704681872749 }, { "epoch": 0.11772151898734177, "grad_norm": 0.7421875, "learning_rate": 9.961332739685523e-07, "loss": 0.2768633961677551, "step": 558, "token_acc": 0.9245508982035928 }, { "epoch": 0.11793248945147679, "grad_norm": 0.80078125, "learning_rate": 9.96112257913209e-07, "loss": 0.2084287852048874, "step": 559, "token_acc": 0.9381261048909841 }, { "epoch": 0.11814345991561181, "grad_norm": 0.796875, "learning_rate": 9.960911851232301e-07, "loss": 0.2791953682899475, "step": 560, "token_acc": 0.924936025021325 }, { "epoch": 0.11835443037974684, "grad_norm": 1.34375, "learning_rate": 9.960700556010253e-07, "loss": 0.319602370262146, "step": 561, "token_acc": 0.9191286183228887 }, { "epoch": 0.11856540084388185, "grad_norm": 0.8515625, "learning_rate": 9.960488693490108e-07, "loss": 0.21053284406661987, "step": 562, "token_acc": 0.9407194244604317 }, { "epoch": 0.11877637130801688, "grad_norm": 0.7421875, "learning_rate": 9.960276263696097e-07, "loss": 0.27438345551490784, "step": 563, "token_acc": 0.9290875033449291 }, { "epoch": 0.1189873417721519, "grad_norm": 0.6328125, "learning_rate": 9.960063266652512e-07, "loss": 0.2969055771827698, "step": 564, "token_acc": 0.918200408997955 }, { "epoch": 0.11919831223628692, "grad_norm": 1.0234375, "learning_rate": 9.95984970238371e-07, "loss": 0.3027820587158203, "step": 565, "token_acc": 0.924516531503431 }, { "epoch": 0.11940928270042193, "grad_norm": 0.69921875, "learning_rate": 9.959635570914115e-07, "loss": 0.26206478476524353, "step": 566, "token_acc": 0.9267332727823191 }, { "epoch": 0.11962025316455696, "grad_norm": 0.8359375, "learning_rate": 9.959420872268214e-07, "loss": 0.22268003225326538, "step": 567, "token_acc": 0.9377475947934352 }, { "epoch": 0.11983122362869199, "grad_norm": 0.72265625, "learning_rate": 9.95920560647056e-07, "loss": 0.2821509838104248, "step": 568, "token_acc": 0.9257203277821835 }, { "epoch": 0.120042194092827, "grad_norm": 0.8125, "learning_rate": 9.958989773545772e-07, "loss": 0.2774399518966675, "step": 569, "token_acc": 0.9208851167020309 }, { "epoch": 0.12025316455696203, "grad_norm": 0.73046875, "learning_rate": 9.95877337351853e-07, "loss": 0.20950725674629211, "step": 570, "token_acc": 0.9383966244725739 }, { "epoch": 0.12046413502109704, "grad_norm": 2.375, "learning_rate": 9.95855640641358e-07, "loss": 0.24889153242111206, "step": 571, "token_acc": 0.9338555265448216 }, { "epoch": 0.12067510548523207, "grad_norm": 0.7265625, "learning_rate": 9.958338872255738e-07, "loss": 0.27347537875175476, "step": 572, "token_acc": 0.9261669024045261 }, { "epoch": 0.1208860759493671, "grad_norm": 0.625, "learning_rate": 9.958120771069878e-07, "loss": 0.2640995383262634, "step": 573, "token_acc": 0.9278178789561354 }, { "epoch": 0.12109704641350211, "grad_norm": 0.609375, "learning_rate": 9.957902102880945e-07, "loss": 0.23652713000774384, "step": 574, "token_acc": 0.9364988558352403 }, { "epoch": 0.12130801687763713, "grad_norm": 0.8359375, "learning_rate": 9.957682867713942e-07, "loss": 0.291990727186203, "step": 575, "token_acc": 0.9223826714801444 }, { "epoch": 0.12151898734177215, "grad_norm": 0.6328125, "learning_rate": 9.95746306559394e-07, "loss": 0.21727707982063293, "step": 576, "token_acc": 0.9308996088657105 }, { "epoch": 0.12172995780590717, "grad_norm": 0.66796875, "learning_rate": 9.957242696546077e-07, "loss": 0.2906607985496521, "step": 577, "token_acc": 0.9158725837190308 }, { "epoch": 0.1219409282700422, "grad_norm": 0.7265625, "learning_rate": 9.957021760595556e-07, "loss": 0.226593479514122, "step": 578, "token_acc": 0.9271303824149353 }, { "epoch": 0.12215189873417721, "grad_norm": 0.7265625, "learning_rate": 9.956800257767639e-07, "loss": 0.26656001806259155, "step": 579, "token_acc": 0.930952380952381 }, { "epoch": 0.12236286919831224, "grad_norm": 0.6875, "learning_rate": 9.956578188087658e-07, "loss": 0.2880259156227112, "step": 580, "token_acc": 0.9256572982774252 }, { "epoch": 0.12257383966244725, "grad_norm": 0.83984375, "learning_rate": 9.95635555158101e-07, "loss": 0.24349641799926758, "step": 581, "token_acc": 0.9316569954867827 }, { "epoch": 0.12278481012658228, "grad_norm": 0.7109375, "learning_rate": 9.956132348273157e-07, "loss": 0.24496349692344666, "step": 582, "token_acc": 0.9309711286089238 }, { "epoch": 0.1229957805907173, "grad_norm": 0.75390625, "learning_rate": 9.955908578189619e-07, "loss": 0.2652456760406494, "step": 583, "token_acc": 0.9269628727936701 }, { "epoch": 0.12320675105485232, "grad_norm": 0.87109375, "learning_rate": 9.955684241355988e-07, "loss": 0.2777090072631836, "step": 584, "token_acc": 0.9167887489012599 }, { "epoch": 0.12341772151898735, "grad_norm": 1.2109375, "learning_rate": 9.95545933779792e-07, "loss": 0.3164791464805603, "step": 585, "token_acc": 0.919442072302875 }, { "epoch": 0.12362869198312236, "grad_norm": 0.67578125, "learning_rate": 9.955233867541134e-07, "loss": 0.26809951663017273, "step": 586, "token_acc": 0.9227027027027027 }, { "epoch": 0.12383966244725739, "grad_norm": 0.70703125, "learning_rate": 9.955007830611414e-07, "loss": 0.25988298654556274, "step": 587, "token_acc": 0.9277679100059206 }, { "epoch": 0.1240506329113924, "grad_norm": 1.453125, "learning_rate": 9.954781227034612e-07, "loss": 0.22518092393875122, "step": 588, "token_acc": 0.935474701534963 }, { "epoch": 0.12426160337552743, "grad_norm": 0.62890625, "learning_rate": 9.954554056836637e-07, "loss": 0.23757173120975494, "step": 589, "token_acc": 0.929593589009731 }, { "epoch": 0.12447257383966245, "grad_norm": 0.7734375, "learning_rate": 9.954326320043472e-07, "loss": 0.2483949363231659, "step": 590, "token_acc": 0.9315320847405588 }, { "epoch": 0.12468354430379747, "grad_norm": 0.82421875, "learning_rate": 9.95409801668116e-07, "loss": 0.26530349254608154, "step": 591, "token_acc": 0.9214407260351674 }, { "epoch": 0.1248945147679325, "grad_norm": 0.703125, "learning_rate": 9.953869146775806e-07, "loss": 0.2826001048088074, "step": 592, "token_acc": 0.9162200282087447 }, { "epoch": 0.12510548523206752, "grad_norm": 0.59375, "learning_rate": 9.953639710353589e-07, "loss": 0.1961941421031952, "step": 593, "token_acc": 0.9426091825307951 }, { "epoch": 0.12531645569620253, "grad_norm": 0.82421875, "learning_rate": 9.953409707440742e-07, "loss": 0.26104363799095154, "step": 594, "token_acc": 0.9215813350615684 }, { "epoch": 0.12552742616033755, "grad_norm": 0.828125, "learning_rate": 9.95317913806357e-07, "loss": 0.30334994196891785, "step": 595, "token_acc": 0.9229497354497355 }, { "epoch": 0.1257383966244726, "grad_norm": 0.8125, "learning_rate": 9.95294800224844e-07, "loss": 0.27462461590766907, "step": 596, "token_acc": 0.9222963177732676 }, { "epoch": 0.1259493670886076, "grad_norm": 0.68359375, "learning_rate": 9.952716300021784e-07, "loss": 0.25919121503829956, "step": 597, "token_acc": 0.9274289099526066 }, { "epoch": 0.1261603375527426, "grad_norm": 0.6328125, "learning_rate": 9.952484031410102e-07, "loss": 0.24202126264572144, "step": 598, "token_acc": 0.9379543094496365 }, { "epoch": 0.12637130801687763, "grad_norm": 0.703125, "learning_rate": 9.95225119643995e-07, "loss": 0.22879423201084137, "step": 599, "token_acc": 0.9303405572755418 }, { "epoch": 0.12658227848101267, "grad_norm": 0.66015625, "learning_rate": 9.952017795137962e-07, "loss": 0.2557697892189026, "step": 600, "token_acc": 0.9307598039215687 }, { "epoch": 0.12658227848101267, "eval_loss": 0.4336538016796112, "eval_runtime": 246.0329, "eval_samples_per_second": 136.994, "eval_steps_per_second": 2.142, "eval_token_acc": 0.899098088486045, "step": 600 }, { "epoch": 0.12679324894514768, "grad_norm": 0.62109375, "learning_rate": 9.951783827530821e-07, "loss": 0.24460609257221222, "step": 601, "token_acc": 0.935226264418811 }, { "epoch": 0.1270042194092827, "grad_norm": 0.65625, "learning_rate": 9.951549293645292e-07, "loss": 0.24832656979560852, "step": 602, "token_acc": 0.9309408926417371 }, { "epoch": 0.12721518987341773, "grad_norm": 0.69140625, "learning_rate": 9.95131419350819e-07, "loss": 0.23030316829681396, "step": 603, "token_acc": 0.9332206255283179 }, { "epoch": 0.12742616033755275, "grad_norm": 0.73046875, "learning_rate": 9.951078527146403e-07, "loss": 0.2880566418170929, "step": 604, "token_acc": 0.9192907367777438 }, { "epoch": 0.12763713080168776, "grad_norm": 0.6796875, "learning_rate": 9.95084229458688e-07, "loss": 0.24888081848621368, "step": 605, "token_acc": 0.929957805907173 }, { "epoch": 0.12784810126582277, "grad_norm": 0.609375, "learning_rate": 9.950605495856637e-07, "loss": 0.2833850681781769, "step": 606, "token_acc": 0.9213352685050799 }, { "epoch": 0.1280590717299578, "grad_norm": 0.875, "learning_rate": 9.950368130982755e-07, "loss": 0.26693737506866455, "step": 607, "token_acc": 0.9250070482097548 }, { "epoch": 0.12827004219409283, "grad_norm": 0.62890625, "learning_rate": 9.950130199992377e-07, "loss": 0.23543164134025574, "step": 608, "token_acc": 0.9348515422311905 }, { "epoch": 0.12848101265822784, "grad_norm": 0.7265625, "learning_rate": 9.949891702912712e-07, "loss": 0.22989103198051453, "step": 609, "token_acc": 0.9318626082099972 }, { "epoch": 0.12869198312236288, "grad_norm": 0.74609375, "learning_rate": 9.949652639771036e-07, "loss": 0.24984115362167358, "step": 610, "token_acc": 0.9261559696342305 }, { "epoch": 0.1289029535864979, "grad_norm": 0.70703125, "learning_rate": 9.94941301059469e-07, "loss": 0.2549693286418915, "step": 611, "token_acc": 0.9259363559560687 }, { "epoch": 0.1291139240506329, "grad_norm": 0.66015625, "learning_rate": 9.94917281541107e-07, "loss": 0.2592216432094574, "step": 612, "token_acc": 0.9255747126436782 }, { "epoch": 0.12932489451476795, "grad_norm": 0.8046875, "learning_rate": 9.948932054247652e-07, "loss": 0.2784273624420166, "step": 613, "token_acc": 0.9198324022346369 }, { "epoch": 0.12953586497890296, "grad_norm": 1.359375, "learning_rate": 9.948690727131965e-07, "loss": 0.2754824161529541, "step": 614, "token_acc": 0.9211413748378728 }, { "epoch": 0.12974683544303797, "grad_norm": 0.62890625, "learning_rate": 9.948448834091608e-07, "loss": 0.22421778738498688, "step": 615, "token_acc": 0.9337539432176656 }, { "epoch": 0.12995780590717299, "grad_norm": 0.6640625, "learning_rate": 9.948206375154244e-07, "loss": 0.22916918992996216, "step": 616, "token_acc": 0.933944374209861 }, { "epoch": 0.13016877637130803, "grad_norm": 0.640625, "learning_rate": 9.947963350347598e-07, "loss": 0.23158694803714752, "step": 617, "token_acc": 0.9291338582677166 }, { "epoch": 0.13037974683544304, "grad_norm": 0.85546875, "learning_rate": 9.947719759699466e-07, "loss": 0.2788570523262024, "step": 618, "token_acc": 0.9231003039513678 }, { "epoch": 0.13059071729957805, "grad_norm": 0.69140625, "learning_rate": 9.947475603237702e-07, "loss": 0.28004133701324463, "step": 619, "token_acc": 0.9180544541369283 }, { "epoch": 0.1308016877637131, "grad_norm": 0.71484375, "learning_rate": 9.947230880990227e-07, "loss": 0.2773160934448242, "step": 620, "token_acc": 0.9248013620885358 }, { "epoch": 0.1310126582278481, "grad_norm": 0.71484375, "learning_rate": 9.946985592985028e-07, "loss": 0.2508021593093872, "step": 621, "token_acc": 0.9334818586887333 }, { "epoch": 0.13122362869198312, "grad_norm": 0.8046875, "learning_rate": 9.946739739250156e-07, "loss": 0.23769596219062805, "step": 622, "token_acc": 0.9302249755461363 }, { "epoch": 0.13143459915611813, "grad_norm": 1.2109375, "learning_rate": 9.946493319813725e-07, "loss": 0.21937592327594757, "step": 623, "token_acc": 0.9373860182370821 }, { "epoch": 0.13164556962025317, "grad_norm": 0.62109375, "learning_rate": 9.946246334703916e-07, "loss": 0.27754154801368713, "step": 624, "token_acc": 0.9261245159368484 }, { "epoch": 0.13185654008438819, "grad_norm": 0.78515625, "learning_rate": 9.945998783948975e-07, "loss": 0.2924942672252655, "step": 625, "token_acc": 0.9139688249400479 }, { "epoch": 0.1320675105485232, "grad_norm": 0.64453125, "learning_rate": 9.945750667577209e-07, "loss": 0.2303755283355713, "step": 626, "token_acc": 0.9341597796143251 }, { "epoch": 0.13227848101265824, "grad_norm": 0.7734375, "learning_rate": 9.945501985616995e-07, "loss": 0.20424559712409973, "step": 627, "token_acc": 0.9430594900849858 }, { "epoch": 0.13248945147679325, "grad_norm": 0.64453125, "learning_rate": 9.94525273809677e-07, "loss": 0.2620590031147003, "step": 628, "token_acc": 0.9274099883855982 }, { "epoch": 0.13270042194092826, "grad_norm": 0.76171875, "learning_rate": 9.945002925045038e-07, "loss": 0.2684752643108368, "step": 629, "token_acc": 0.9270194986072423 }, { "epoch": 0.13291139240506328, "grad_norm": 0.70703125, "learning_rate": 9.944752546490367e-07, "loss": 0.23374760150909424, "step": 630, "token_acc": 0.9325842696629213 }, { "epoch": 0.13312236286919832, "grad_norm": 1.1953125, "learning_rate": 9.94450160246139e-07, "loss": 0.22228139638900757, "step": 631, "token_acc": 0.9354534005037783 }, { "epoch": 0.13333333333333333, "grad_norm": 0.73046875, "learning_rate": 9.944250092986807e-07, "loss": 0.2018851488828659, "step": 632, "token_acc": 0.9405840886203424 }, { "epoch": 0.13354430379746834, "grad_norm": 1.1796875, "learning_rate": 9.943998018095377e-07, "loss": 0.26342812180519104, "step": 633, "token_acc": 0.9279495646952867 }, { "epoch": 0.13375527426160339, "grad_norm": 0.734375, "learning_rate": 9.943745377815927e-07, "loss": 0.24731256067752838, "step": 634, "token_acc": 0.9329608938547486 }, { "epoch": 0.1339662447257384, "grad_norm": 0.72265625, "learning_rate": 9.94349217217735e-07, "loss": 0.22763285040855408, "step": 635, "token_acc": 0.9298531810766721 }, { "epoch": 0.1341772151898734, "grad_norm": 0.72265625, "learning_rate": 9.943238401208602e-07, "loss": 0.25396978855133057, "step": 636, "token_acc": 0.9292196007259528 }, { "epoch": 0.13438818565400845, "grad_norm": 0.7421875, "learning_rate": 9.942984064938705e-07, "loss": 0.30096304416656494, "step": 637, "token_acc": 0.9151027703306523 }, { "epoch": 0.13459915611814346, "grad_norm": 0.6484375, "learning_rate": 9.942729163396741e-07, "loss": 0.29584643244743347, "step": 638, "token_acc": 0.919442072302875 }, { "epoch": 0.13481012658227848, "grad_norm": 0.65625, "learning_rate": 9.942473696611862e-07, "loss": 0.2260875701904297, "step": 639, "token_acc": 0.934560327198364 }, { "epoch": 0.1350210970464135, "grad_norm": 0.703125, "learning_rate": 9.942217664613284e-07, "loss": 0.25592464208602905, "step": 640, "token_acc": 0.926865671641791 }, { "epoch": 0.13523206751054853, "grad_norm": 0.6171875, "learning_rate": 9.941961067430285e-07, "loss": 0.24965469539165497, "step": 641, "token_acc": 0.92850705917693 }, { "epoch": 0.13544303797468354, "grad_norm": 0.7109375, "learning_rate": 9.94170390509221e-07, "loss": 0.2692350149154663, "step": 642, "token_acc": 0.9312614259597807 }, { "epoch": 0.13565400843881856, "grad_norm": 0.78125, "learning_rate": 9.941446177628467e-07, "loss": 0.2497376799583435, "step": 643, "token_acc": 0.925979262672811 }, { "epoch": 0.1358649789029536, "grad_norm": 1.046875, "learning_rate": 9.94118788506853e-07, "loss": 0.27541112899780273, "step": 644, "token_acc": 0.9211567732115677 }, { "epoch": 0.1360759493670886, "grad_norm": 0.6953125, "learning_rate": 9.940929027441936e-07, "loss": 0.2414344996213913, "step": 645, "token_acc": 0.9313432835820895 }, { "epoch": 0.13628691983122362, "grad_norm": 0.62109375, "learning_rate": 9.940669604778288e-07, "loss": 0.2977514863014221, "step": 646, "token_acc": 0.922911547911548 }, { "epoch": 0.13649789029535864, "grad_norm": 0.71484375, "learning_rate": 9.940409617107252e-07, "loss": 0.2718917727470398, "step": 647, "token_acc": 0.9186241610738255 }, { "epoch": 0.13670886075949368, "grad_norm": 0.765625, "learning_rate": 9.940149064458563e-07, "loss": 0.2234637290239334, "step": 648, "token_acc": 0.9408866995073891 }, { "epoch": 0.1369198312236287, "grad_norm": 0.640625, "learning_rate": 9.939887946862017e-07, "loss": 0.2774735689163208, "step": 649, "token_acc": 0.9290869155946031 }, { "epoch": 0.1371308016877637, "grad_norm": 0.90625, "learning_rate": 9.93962626434747e-07, "loss": 0.2615464925765991, "step": 650, "token_acc": 0.9272910881090634 }, { "epoch": 0.13734177215189874, "grad_norm": 0.65625, "learning_rate": 9.939364016944852e-07, "loss": 0.23573726415634155, "step": 651, "token_acc": 0.9329399141630901 }, { "epoch": 0.13755274261603376, "grad_norm": 0.6328125, "learning_rate": 9.939101204684151e-07, "loss": 0.2487039864063263, "step": 652, "token_acc": 0.932972972972973 }, { "epoch": 0.13776371308016877, "grad_norm": 0.6875, "learning_rate": 9.938837827595424e-07, "loss": 0.26214438676834106, "step": 653, "token_acc": 0.9278832116788321 }, { "epoch": 0.1379746835443038, "grad_norm": 0.69921875, "learning_rate": 9.938573885708792e-07, "loss": 0.24997225403785706, "step": 654, "token_acc": 0.9322553666016169 }, { "epoch": 0.13818565400843882, "grad_norm": 0.76953125, "learning_rate": 9.938309379054433e-07, "loss": 0.28863316774368286, "step": 655, "token_acc": 0.9217944831767202 }, { "epoch": 0.13839662447257384, "grad_norm": 0.625, "learning_rate": 9.9380443076626e-07, "loss": 0.23938237130641937, "step": 656, "token_acc": 0.9307647740440325 }, { "epoch": 0.13860759493670885, "grad_norm": 0.703125, "learning_rate": 9.937778671563606e-07, "loss": 0.26946017146110535, "step": 657, "token_acc": 0.9250278706800446 }, { "epoch": 0.1388185654008439, "grad_norm": 0.64453125, "learning_rate": 9.937512470787827e-07, "loss": 0.25879329442977905, "step": 658, "token_acc": 0.9263622974963182 }, { "epoch": 0.1390295358649789, "grad_norm": 0.75390625, "learning_rate": 9.937245705365707e-07, "loss": 0.26367712020874023, "step": 659, "token_acc": 0.9273255813953488 }, { "epoch": 0.13924050632911392, "grad_norm": 0.73828125, "learning_rate": 9.93697837532775e-07, "loss": 0.27003130316734314, "step": 660, "token_acc": 0.9255381035485748 }, { "epoch": 0.13945147679324896, "grad_norm": 0.77734375, "learning_rate": 9.936710480704531e-07, "loss": 0.3241864740848541, "step": 661, "token_acc": 0.9117466174661747 }, { "epoch": 0.13966244725738397, "grad_norm": 0.765625, "learning_rate": 9.936442021526685e-07, "loss": 0.254525363445282, "step": 662, "token_acc": 0.9274787535410765 }, { "epoch": 0.13987341772151898, "grad_norm": 0.85546875, "learning_rate": 9.936172997824912e-07, "loss": 0.22039127349853516, "step": 663, "token_acc": 0.9337885985748219 }, { "epoch": 0.140084388185654, "grad_norm": 0.71484375, "learning_rate": 9.935903409629977e-07, "loss": 0.26330018043518066, "step": 664, "token_acc": 0.9245566576495341 }, { "epoch": 0.14029535864978904, "grad_norm": 0.79296875, "learning_rate": 9.93563325697271e-07, "loss": 0.25053250789642334, "step": 665, "token_acc": 0.9321644150617994 }, { "epoch": 0.14050632911392405, "grad_norm": 1.015625, "learning_rate": 9.935362539884004e-07, "loss": 0.23359492421150208, "step": 666, "token_acc": 0.9295649600473513 }, { "epoch": 0.14071729957805906, "grad_norm": 0.80078125, "learning_rate": 9.935091258394821e-07, "loss": 0.3050011098384857, "step": 667, "token_acc": 0.9222160044767768 }, { "epoch": 0.1409282700421941, "grad_norm": 0.65625, "learning_rate": 9.93481941253618e-07, "loss": 0.24552345275878906, "step": 668, "token_acc": 0.9327153110047847 }, { "epoch": 0.14113924050632912, "grad_norm": 0.6328125, "learning_rate": 9.934547002339174e-07, "loss": 0.2593832015991211, "step": 669, "token_acc": 0.9236835410836938 }, { "epoch": 0.14135021097046413, "grad_norm": 0.765625, "learning_rate": 9.93427402783495e-07, "loss": 0.2546125054359436, "step": 670, "token_acc": 0.9320175438596491 }, { "epoch": 0.14156118143459914, "grad_norm": 0.84765625, "learning_rate": 9.93400048905473e-07, "loss": 0.27444595098495483, "step": 671, "token_acc": 0.9244940321743643 }, { "epoch": 0.14177215189873418, "grad_norm": 0.79296875, "learning_rate": 9.93372638602979e-07, "loss": 0.2675279378890991, "step": 672, "token_acc": 0.9254424136930665 }, { "epoch": 0.1419831223628692, "grad_norm": 0.63671875, "learning_rate": 9.933451718791481e-07, "loss": 0.22922030091285706, "step": 673, "token_acc": 0.9329147389292796 }, { "epoch": 0.1421940928270042, "grad_norm": 1.015625, "learning_rate": 9.933176487371213e-07, "loss": 0.3030126094818115, "step": 674, "token_acc": 0.9166399487015069 }, { "epoch": 0.14240506329113925, "grad_norm": 0.76171875, "learning_rate": 9.932900691800457e-07, "loss": 0.2756281793117523, "step": 675, "token_acc": 0.921146953405018 }, { "epoch": 0.14261603375527426, "grad_norm": 0.6875, "learning_rate": 9.932624332110758e-07, "loss": 0.23098278045654297, "step": 676, "token_acc": 0.937677859988617 }, { "epoch": 0.14282700421940928, "grad_norm": 0.58203125, "learning_rate": 9.932347408333715e-07, "loss": 0.22887524962425232, "step": 677, "token_acc": 0.9341611319665031 }, { "epoch": 0.14303797468354432, "grad_norm": 0.98046875, "learning_rate": 9.932069920501e-07, "loss": 0.2759955823421478, "step": 678, "token_acc": 0.9240579710144927 }, { "epoch": 0.14324894514767933, "grad_norm": 0.59375, "learning_rate": 9.931791868644341e-07, "loss": 0.2028590440750122, "step": 679, "token_acc": 0.9378352792679079 }, { "epoch": 0.14345991561181434, "grad_norm": 0.8203125, "learning_rate": 9.931513252795543e-07, "loss": 0.30346542596817017, "step": 680, "token_acc": 0.9129104062326099 }, { "epoch": 0.14367088607594936, "grad_norm": 0.98828125, "learning_rate": 9.931234072986466e-07, "loss": 0.27435851097106934, "step": 681, "token_acc": 0.9276353276353276 }, { "epoch": 0.1438818565400844, "grad_norm": 0.83203125, "learning_rate": 9.930954329249032e-07, "loss": 0.2799455523490906, "step": 682, "token_acc": 0.9241399588356366 }, { "epoch": 0.1440928270042194, "grad_norm": 0.82421875, "learning_rate": 9.930674021615237e-07, "loss": 0.28436923027038574, "step": 683, "token_acc": 0.9250146455770357 }, { "epoch": 0.14430379746835442, "grad_norm": 0.69921875, "learning_rate": 9.930393150117133e-07, "loss": 0.29506832361221313, "step": 684, "token_acc": 0.9232 }, { "epoch": 0.14451476793248946, "grad_norm": 0.66796875, "learning_rate": 9.930111714786844e-07, "loss": 0.27069365978240967, "step": 685, "token_acc": 0.926836079307456 }, { "epoch": 0.14472573839662448, "grad_norm": 0.78125, "learning_rate": 9.92982971565655e-07, "loss": 0.21447613835334778, "step": 686, "token_acc": 0.9363425925925926 }, { "epoch": 0.1449367088607595, "grad_norm": 0.66796875, "learning_rate": 9.929547152758505e-07, "loss": 0.2686905264854431, "step": 687, "token_acc": 0.9303255282695603 }, { "epoch": 0.1451476793248945, "grad_norm": 0.71484375, "learning_rate": 9.929264026125017e-07, "loss": 0.27938973903656006, "step": 688, "token_acc": 0.92536881689326 }, { "epoch": 0.14535864978902954, "grad_norm": 0.6953125, "learning_rate": 9.928980335788469e-07, "loss": 0.2390938103199005, "step": 689, "token_acc": 0.9279638490164805 }, { "epoch": 0.14556962025316456, "grad_norm": 0.69921875, "learning_rate": 9.928696081781299e-07, "loss": 0.2756063640117645, "step": 690, "token_acc": 0.920317553660688 }, { "epoch": 0.14578059071729957, "grad_norm": 0.76171875, "learning_rate": 9.928411264136017e-07, "loss": 0.23743261396884918, "step": 691, "token_acc": 0.9318757921419518 }, { "epoch": 0.1459915611814346, "grad_norm": 0.70703125, "learning_rate": 9.928125882885193e-07, "loss": 0.2753446102142334, "step": 692, "token_acc": 0.9266131265577402 }, { "epoch": 0.14620253164556962, "grad_norm": 0.8359375, "learning_rate": 9.927839938061461e-07, "loss": 0.24559064209461212, "step": 693, "token_acc": 0.9276517473942366 }, { "epoch": 0.14641350210970464, "grad_norm": 0.6640625, "learning_rate": 9.927553429697526e-07, "loss": 0.24906222522258759, "step": 694, "token_acc": 0.9354838709677419 }, { "epoch": 0.14662447257383968, "grad_norm": 0.73828125, "learning_rate": 9.92726635782615e-07, "loss": 0.26196521520614624, "step": 695, "token_acc": 0.9294971487817522 }, { "epoch": 0.1468354430379747, "grad_norm": 0.69921875, "learning_rate": 9.92697872248016e-07, "loss": 0.28846031427383423, "step": 696, "token_acc": 0.9216428779493154 }, { "epoch": 0.1470464135021097, "grad_norm": 0.71875, "learning_rate": 9.926690523692454e-07, "loss": 0.2781599164009094, "step": 697, "token_acc": 0.9191949534394713 }, { "epoch": 0.14725738396624471, "grad_norm": 0.6953125, "learning_rate": 9.926401761495986e-07, "loss": 0.24464154243469238, "step": 698, "token_acc": 0.9295774647887324 }, { "epoch": 0.14746835443037976, "grad_norm": 0.6328125, "learning_rate": 9.926112435923778e-07, "loss": 0.24627582728862762, "step": 699, "token_acc": 0.9308590242442383 }, { "epoch": 0.14767932489451477, "grad_norm": 0.80078125, "learning_rate": 9.92582254700892e-07, "loss": 0.27795839309692383, "step": 700, "token_acc": 0.9238838084991932 }, { "epoch": 0.14789029535864978, "grad_norm": 0.62890625, "learning_rate": 9.925532094784563e-07, "loss": 0.24588271975517273, "step": 701, "token_acc": 0.9284253578732107 }, { "epoch": 0.14810126582278482, "grad_norm": 0.66796875, "learning_rate": 9.92524107928392e-07, "loss": 0.24630197882652283, "step": 702, "token_acc": 0.9327267714364489 }, { "epoch": 0.14831223628691984, "grad_norm": 0.7421875, "learning_rate": 9.924949500540275e-07, "loss": 0.2578659653663635, "step": 703, "token_acc": 0.9267192784667418 }, { "epoch": 0.14852320675105485, "grad_norm": 0.671875, "learning_rate": 9.924657358586967e-07, "loss": 0.25091686844825745, "step": 704, "token_acc": 0.9329545454545455 }, { "epoch": 0.14873417721518986, "grad_norm": 0.71484375, "learning_rate": 9.924364653457411e-07, "loss": 0.2511135935783386, "step": 705, "token_acc": 0.9301768055139347 }, { "epoch": 0.1489451476793249, "grad_norm": 0.78125, "learning_rate": 9.924071385185075e-07, "loss": 0.2616545259952545, "step": 706, "token_acc": 0.927741935483871 }, { "epoch": 0.14915611814345991, "grad_norm": 0.68359375, "learning_rate": 9.9237775538035e-07, "loss": 0.2902517318725586, "step": 707, "token_acc": 0.9226774379688402 }, { "epoch": 0.14936708860759493, "grad_norm": 0.7421875, "learning_rate": 9.92348315934629e-07, "loss": 0.27046293020248413, "step": 708, "token_acc": 0.9296824368114064 }, { "epoch": 0.14957805907172997, "grad_norm": 0.62109375, "learning_rate": 9.923188201847107e-07, "loss": 0.20588457584381104, "step": 709, "token_acc": 0.9350493864112541 }, { "epoch": 0.14978902953586498, "grad_norm": 0.83984375, "learning_rate": 9.92289268133968e-07, "loss": 0.25359445810317993, "step": 710, "token_acc": 0.9285503395335105 }, { "epoch": 0.15, "grad_norm": 0.77734375, "learning_rate": 9.922596597857811e-07, "loss": 0.267612099647522, "step": 711, "token_acc": 0.9265569917743831 }, { "epoch": 0.150210970464135, "grad_norm": 0.9140625, "learning_rate": 9.922299951435357e-07, "loss": 0.2501794993877411, "step": 712, "token_acc": 0.9300184162062615 }, { "epoch": 0.15042194092827005, "grad_norm": 0.63671875, "learning_rate": 9.922002742106242e-07, "loss": 0.2614431381225586, "step": 713, "token_acc": 0.9250471825289835 }, { "epoch": 0.15063291139240506, "grad_norm": 0.61328125, "learning_rate": 9.921704969904453e-07, "loss": 0.2227068841457367, "step": 714, "token_acc": 0.934162192709805 }, { "epoch": 0.15084388185654007, "grad_norm": 0.95703125, "learning_rate": 9.92140663486404e-07, "loss": 0.2870650887489319, "step": 715, "token_acc": 0.9198347107438016 }, { "epoch": 0.15105485232067511, "grad_norm": 0.59765625, "learning_rate": 9.92110773701913e-07, "loss": 0.24414213001728058, "step": 716, "token_acc": 0.9290909090909091 }, { "epoch": 0.15126582278481013, "grad_norm": 0.6328125, "learning_rate": 9.920808276403893e-07, "loss": 0.27001482248306274, "step": 717, "token_acc": 0.9276070094800345 }, { "epoch": 0.15147679324894514, "grad_norm": 0.7265625, "learning_rate": 9.920508253052584e-07, "loss": 0.24048057198524475, "step": 718, "token_acc": 0.9305245535714286 }, { "epoch": 0.15168776371308018, "grad_norm": 0.9921875, "learning_rate": 9.92020766699951e-07, "loss": 0.267702579498291, "step": 719, "token_acc": 0.9254603916983338 }, { "epoch": 0.1518987341772152, "grad_norm": 0.66015625, "learning_rate": 9.919906518279043e-07, "loss": 0.23744544386863708, "step": 720, "token_acc": 0.9312857886517438 }, { "epoch": 0.1521097046413502, "grad_norm": 0.85546875, "learning_rate": 9.919604806925623e-07, "loss": 0.2514658570289612, "step": 721, "token_acc": 0.9293015332197615 }, { "epoch": 0.15232067510548522, "grad_norm": 0.7109375, "learning_rate": 9.919302532973754e-07, "loss": 0.2536316215991974, "step": 722, "token_acc": 0.9287620064034151 }, { "epoch": 0.15253164556962026, "grad_norm": 0.8359375, "learning_rate": 9.918999696458006e-07, "loss": 0.23538361489772797, "step": 723, "token_acc": 0.9311200744647844 }, { "epoch": 0.15274261603375527, "grad_norm": 0.64453125, "learning_rate": 9.918696297413008e-07, "loss": 0.2112676054239273, "step": 724, "token_acc": 0.9377406931964056 }, { "epoch": 0.1529535864978903, "grad_norm": 0.6171875, "learning_rate": 9.918392335873457e-07, "loss": 0.22141136229038239, "step": 725, "token_acc": 0.9383989145183175 }, { "epoch": 0.15316455696202533, "grad_norm": 0.8046875, "learning_rate": 9.91808781187411e-07, "loss": 0.27773499488830566, "step": 726, "token_acc": 0.9278820375335121 }, { "epoch": 0.15337552742616034, "grad_norm": 0.94921875, "learning_rate": 9.917782725449799e-07, "loss": 0.32096052169799805, "step": 727, "token_acc": 0.9175288205734555 }, { "epoch": 0.15358649789029535, "grad_norm": 0.71875, "learning_rate": 9.91747707663541e-07, "loss": 0.2451501190662384, "step": 728, "token_acc": 0.9271503803393798 }, { "epoch": 0.15379746835443037, "grad_norm": 1.546875, "learning_rate": 9.917170865465894e-07, "loss": 0.29911404848098755, "step": 729, "token_acc": 0.9180237372343362 }, { "epoch": 0.1540084388185654, "grad_norm": 0.953125, "learning_rate": 9.91686409197627e-07, "loss": 0.3019820749759674, "step": 730, "token_acc": 0.9172510518934082 }, { "epoch": 0.15421940928270042, "grad_norm": 1.1484375, "learning_rate": 9.916556756201624e-07, "loss": 0.281706839799881, "step": 731, "token_acc": 0.9272495213784301 }, { "epoch": 0.15443037974683543, "grad_norm": 0.93359375, "learning_rate": 9.916248858177099e-07, "loss": 0.27722233533859253, "step": 732, "token_acc": 0.9146039603960396 }, { "epoch": 0.15464135021097047, "grad_norm": 0.73046875, "learning_rate": 9.915940397937906e-07, "loss": 0.29295605421066284, "step": 733, "token_acc": 0.9225071225071225 }, { "epoch": 0.1548523206751055, "grad_norm": 0.80078125, "learning_rate": 9.91563137551932e-07, "loss": 0.24334131181240082, "step": 734, "token_acc": 0.9294417682062908 }, { "epoch": 0.1550632911392405, "grad_norm": 0.81640625, "learning_rate": 9.91532179095668e-07, "loss": 0.2572481334209442, "step": 735, "token_acc": 0.9311695579182988 }, { "epoch": 0.15527426160337554, "grad_norm": 0.75, "learning_rate": 9.915011644285391e-07, "loss": 0.26280131936073303, "step": 736, "token_acc": 0.9256795835743205 }, { "epoch": 0.15548523206751055, "grad_norm": 0.62890625, "learning_rate": 9.91470093554092e-07, "loss": 0.23156148195266724, "step": 737, "token_acc": 0.9377058999700509 }, { "epoch": 0.15569620253164557, "grad_norm": 0.60546875, "learning_rate": 9.914389664758799e-07, "loss": 0.24967870116233826, "step": 738, "token_acc": 0.9284906726964387 }, { "epoch": 0.15590717299578058, "grad_norm": 0.6484375, "learning_rate": 9.914077831974626e-07, "loss": 0.24829944968223572, "step": 739, "token_acc": 0.931044267877412 }, { "epoch": 0.15611814345991562, "grad_norm": 0.61328125, "learning_rate": 9.91376543722406e-07, "loss": 0.24180346727371216, "step": 740, "token_acc": 0.9371293001186239 }, { "epoch": 0.15632911392405063, "grad_norm": 0.84765625, "learning_rate": 9.913452480542825e-07, "loss": 0.26637858152389526, "step": 741, "token_acc": 0.9162153552086651 }, { "epoch": 0.15654008438818565, "grad_norm": 0.5625, "learning_rate": 9.913138961966715e-07, "loss": 0.22019389271736145, "step": 742, "token_acc": 0.9336415556159913 }, { "epoch": 0.1567510548523207, "grad_norm": 0.83203125, "learning_rate": 9.912824881531577e-07, "loss": 0.2972027361392975, "step": 743, "token_acc": 0.9233983286908078 }, { "epoch": 0.1569620253164557, "grad_norm": 0.6953125, "learning_rate": 9.912510239273332e-07, "loss": 0.26124250888824463, "step": 744, "token_acc": 0.9267277268942548 }, { "epoch": 0.1571729957805907, "grad_norm": 0.95703125, "learning_rate": 9.912195035227964e-07, "loss": 0.32723483443260193, "step": 745, "token_acc": 0.9195718654434251 }, { "epoch": 0.15738396624472573, "grad_norm": 0.71484375, "learning_rate": 9.911879269431517e-07, "loss": 0.23630690574645996, "step": 746, "token_acc": 0.9347326049453709 }, { "epoch": 0.15759493670886077, "grad_norm": 0.6796875, "learning_rate": 9.911562941920099e-07, "loss": 0.21784129738807678, "step": 747, "token_acc": 0.9337892446378614 }, { "epoch": 0.15780590717299578, "grad_norm": 0.76171875, "learning_rate": 9.911246052729891e-07, "loss": 0.26233282685279846, "step": 748, "token_acc": 0.9323260937991816 }, { "epoch": 0.1580168776371308, "grad_norm": 0.6328125, "learning_rate": 9.910928601897126e-07, "loss": 0.2327466756105423, "step": 749, "token_acc": 0.9362054681027341 }, { "epoch": 0.15822784810126583, "grad_norm": 0.7578125, "learning_rate": 9.91061058945811e-07, "loss": 0.27062827348709106, "step": 750, "token_acc": 0.918719909374115 }, { "epoch": 0.15843881856540085, "grad_norm": 0.671875, "learning_rate": 9.910292015449211e-07, "loss": 0.20303724706172943, "step": 751, "token_acc": 0.9412310547479121 }, { "epoch": 0.15864978902953586, "grad_norm": 0.65234375, "learning_rate": 9.909972879906858e-07, "loss": 0.24677664041519165, "step": 752, "token_acc": 0.925 }, { "epoch": 0.15886075949367087, "grad_norm": 0.7578125, "learning_rate": 9.90965318286755e-07, "loss": 0.2709593176841736, "step": 753, "token_acc": 0.9230769230769231 }, { "epoch": 0.1590717299578059, "grad_norm": 0.99609375, "learning_rate": 9.909332924367846e-07, "loss": 0.265384703874588, "step": 754, "token_acc": 0.9230769230769231 }, { "epoch": 0.15928270042194093, "grad_norm": 0.80078125, "learning_rate": 9.909012104444368e-07, "loss": 0.2868095636367798, "step": 755, "token_acc": 0.920038228735266 }, { "epoch": 0.15949367088607594, "grad_norm": 0.81640625, "learning_rate": 9.908690723133807e-07, "loss": 0.24986404180526733, "step": 756, "token_acc": 0.9256695756846224 }, { "epoch": 0.15970464135021098, "grad_norm": 0.62109375, "learning_rate": 9.908368780472916e-07, "loss": 0.20347082614898682, "step": 757, "token_acc": 0.9389263902282224 }, { "epoch": 0.159915611814346, "grad_norm": 0.59375, "learning_rate": 9.908046276498511e-07, "loss": 0.2612215578556061, "step": 758, "token_acc": 0.9279416235937975 }, { "epoch": 0.160126582278481, "grad_norm": 0.68359375, "learning_rate": 9.907723211247472e-07, "loss": 0.23647598922252655, "step": 759, "token_acc": 0.9354395604395604 }, { "epoch": 0.16033755274261605, "grad_norm": 0.72265625, "learning_rate": 9.907399584756744e-07, "loss": 0.28146815299987793, "step": 760, "token_acc": 0.92171219374824 }, { "epoch": 0.16054852320675106, "grad_norm": 0.72265625, "learning_rate": 9.90707539706334e-07, "loss": 0.2895510494709015, "step": 761, "token_acc": 0.9175170068027211 }, { "epoch": 0.16075949367088607, "grad_norm": 0.64453125, "learning_rate": 9.90675064820433e-07, "loss": 0.25281795859336853, "step": 762, "token_acc": 0.9276477832512315 }, { "epoch": 0.16097046413502109, "grad_norm": 0.67578125, "learning_rate": 9.906425338216852e-07, "loss": 0.2702397108078003, "step": 763, "token_acc": 0.9337220006136852 }, { "epoch": 0.16118143459915613, "grad_norm": 0.82421875, "learning_rate": 9.906099467138111e-07, "loss": 0.3201596736907959, "step": 764, "token_acc": 0.9149093599704032 }, { "epoch": 0.16139240506329114, "grad_norm": 0.8359375, "learning_rate": 9.90577303500537e-07, "loss": 0.26031017303466797, "step": 765, "token_acc": 0.922656699252444 }, { "epoch": 0.16160337552742615, "grad_norm": 0.84765625, "learning_rate": 9.90544604185596e-07, "loss": 0.2320261150598526, "step": 766, "token_acc": 0.933118216485773 }, { "epoch": 0.1618143459915612, "grad_norm": 0.69921875, "learning_rate": 9.905118487727277e-07, "loss": 0.2794190049171448, "step": 767, "token_acc": 0.9201467268623025 }, { "epoch": 0.1620253164556962, "grad_norm": 0.84375, "learning_rate": 9.904790372656778e-07, "loss": 0.2765384018421173, "step": 768, "token_acc": 0.9225014961101137 }, { "epoch": 0.16223628691983122, "grad_norm": 0.84765625, "learning_rate": 9.904461696681984e-07, "loss": 0.3068510890007019, "step": 769, "token_acc": 0.9177502267916541 }, { "epoch": 0.16244725738396623, "grad_norm": 0.71875, "learning_rate": 9.904132459840485e-07, "loss": 0.28465330600738525, "step": 770, "token_acc": 0.9240544629349471 }, { "epoch": 0.16265822784810127, "grad_norm": 0.75, "learning_rate": 9.903802662169932e-07, "loss": 0.2329617142677307, "step": 771, "token_acc": 0.9319875776397516 }, { "epoch": 0.16286919831223629, "grad_norm": 0.64453125, "learning_rate": 9.903472303708038e-07, "loss": 0.2284744679927826, "step": 772, "token_acc": 0.931237721021611 }, { "epoch": 0.1630801687763713, "grad_norm": 1.140625, "learning_rate": 9.903141384492583e-07, "loss": 0.23831237852573395, "step": 773, "token_acc": 0.9291455790413814 }, { "epoch": 0.16329113924050634, "grad_norm": 0.6796875, "learning_rate": 9.902809904561414e-07, "loss": 0.23870491981506348, "step": 774, "token_acc": 0.9348308374930671 }, { "epoch": 0.16350210970464135, "grad_norm": 0.75, "learning_rate": 9.902477863952431e-07, "loss": 0.27838945388793945, "step": 775, "token_acc": 0.9261146496815287 }, { "epoch": 0.16371308016877636, "grad_norm": 0.68359375, "learning_rate": 9.902145262703613e-07, "loss": 0.2492181956768036, "step": 776, "token_acc": 0.9293759512937595 }, { "epoch": 0.1639240506329114, "grad_norm": 0.58984375, "learning_rate": 9.901812100852993e-07, "loss": 0.2085292637348175, "step": 777, "token_acc": 0.9389517569982132 }, { "epoch": 0.16413502109704642, "grad_norm": 0.953125, "learning_rate": 9.90147837843867e-07, "loss": 0.2694481909275055, "step": 778, "token_acc": 0.9280432309442548 }, { "epoch": 0.16434599156118143, "grad_norm": 0.80859375, "learning_rate": 9.901144095498808e-07, "loss": 0.25209715962409973, "step": 779, "token_acc": 0.9302030456852792 }, { "epoch": 0.16455696202531644, "grad_norm": 0.88671875, "learning_rate": 9.900809252071635e-07, "loss": 0.31358861923217773, "step": 780, "token_acc": 0.9151069518716578 }, { "epoch": 0.16476793248945149, "grad_norm": 0.734375, "learning_rate": 9.900473848195446e-07, "loss": 0.23959940671920776, "step": 781, "token_acc": 0.9326456310679612 }, { "epoch": 0.1649789029535865, "grad_norm": 0.80078125, "learning_rate": 9.900137883908592e-07, "loss": 0.29789382219314575, "step": 782, "token_acc": 0.9166666666666666 }, { "epoch": 0.1651898734177215, "grad_norm": 0.796875, "learning_rate": 9.8998013592495e-07, "loss": 0.22469905018806458, "step": 783, "token_acc": 0.933932193567082 }, { "epoch": 0.16540084388185655, "grad_norm": 0.6875, "learning_rate": 9.89946427425665e-07, "loss": 0.28561171889305115, "step": 784, "token_acc": 0.9251565167899829 }, { "epoch": 0.16561181434599156, "grad_norm": 0.90625, "learning_rate": 9.89912662896859e-07, "loss": 0.28935301303863525, "step": 785, "token_acc": 0.9215164615896242 }, { "epoch": 0.16582278481012658, "grad_norm": 0.73046875, "learning_rate": 9.898788423423935e-07, "loss": 0.2708919048309326, "step": 786, "token_acc": 0.927591706539075 }, { "epoch": 0.1660337552742616, "grad_norm": 0.66015625, "learning_rate": 9.898449657661362e-07, "loss": 0.2672666311264038, "step": 787, "token_acc": 0.9263598326359833 }, { "epoch": 0.16624472573839663, "grad_norm": 0.8046875, "learning_rate": 9.89811033171961e-07, "loss": 0.2862321734428406, "step": 788, "token_acc": 0.9180470793374019 }, { "epoch": 0.16645569620253164, "grad_norm": 0.8046875, "learning_rate": 9.897770445637483e-07, "loss": 0.2871711850166321, "step": 789, "token_acc": 0.9249183895538629 }, { "epoch": 0.16666666666666666, "grad_norm": 0.80859375, "learning_rate": 9.897429999453852e-07, "loss": 0.2397966980934143, "step": 790, "token_acc": 0.9377081945369754 }, { "epoch": 0.1668776371308017, "grad_norm": 0.80859375, "learning_rate": 9.89708899320765e-07, "loss": 0.2998412847518921, "step": 791, "token_acc": 0.9202363367799113 }, { "epoch": 0.1670886075949367, "grad_norm": 0.6640625, "learning_rate": 9.89674742693787e-07, "loss": 0.27035748958587646, "step": 792, "token_acc": 0.9229891614375356 }, { "epoch": 0.16729957805907172, "grad_norm": 0.76953125, "learning_rate": 9.89640530068358e-07, "loss": 0.17933598160743713, "step": 793, "token_acc": 0.9483188044831881 }, { "epoch": 0.16751054852320676, "grad_norm": 0.91796875, "learning_rate": 9.896062614483898e-07, "loss": 0.2540227472782135, "step": 794, "token_acc": 0.929927414852038 }, { "epoch": 0.16772151898734178, "grad_norm": 0.68359375, "learning_rate": 9.895719368378016e-07, "loss": 0.24861711263656616, "step": 795, "token_acc": 0.9333129397369226 }, { "epoch": 0.1679324894514768, "grad_norm": 0.69140625, "learning_rate": 9.89537556240519e-07, "loss": 0.2633477449417114, "step": 796, "token_acc": 0.9196560924992588 }, { "epoch": 0.1681434599156118, "grad_norm": 0.8671875, "learning_rate": 9.89503119660473e-07, "loss": 0.2524856925010681, "step": 797, "token_acc": 0.9313725490196079 }, { "epoch": 0.16835443037974684, "grad_norm": 0.91796875, "learning_rate": 9.894686271016027e-07, "loss": 0.30388563871383667, "step": 798, "token_acc": 0.9198352344740177 }, { "epoch": 0.16856540084388186, "grad_norm": 0.6328125, "learning_rate": 9.894340785678517e-07, "loss": 0.2910333573818207, "step": 799, "token_acc": 0.9234449760765551 }, { "epoch": 0.16877637130801687, "grad_norm": 0.859375, "learning_rate": 9.893994740631713e-07, "loss": 0.25983309745788574, "step": 800, "token_acc": 0.9273416982783775 }, { "epoch": 0.16877637130801687, "eval_loss": 0.43369975686073303, "eval_runtime": 245.7245, "eval_samples_per_second": 137.166, "eval_steps_per_second": 2.145, "eval_token_acc": 0.8991631517544647, "step": 800 }, { "epoch": 0.1689873417721519, "grad_norm": 0.640625, "learning_rate": 9.893648135915188e-07, "loss": 0.26705414056777954, "step": 801, "token_acc": 0.9297355062783863 }, { "epoch": 0.16919831223628692, "grad_norm": 0.68359375, "learning_rate": 9.893300971568578e-07, "loss": 0.2386769950389862, "step": 802, "token_acc": 0.935367545076283 }, { "epoch": 0.16940928270042194, "grad_norm": 0.71875, "learning_rate": 9.892953247631589e-07, "loss": 0.2654857337474823, "step": 803, "token_acc": 0.9260304912478825 }, { "epoch": 0.16962025316455695, "grad_norm": 0.71484375, "learning_rate": 9.89260496414398e-07, "loss": 0.2585882842540741, "step": 804, "token_acc": 0.9277403551745255 }, { "epoch": 0.169831223628692, "grad_norm": 0.76953125, "learning_rate": 9.892256121145584e-07, "loss": 0.25679513812065125, "step": 805, "token_acc": 0.928305133352452 }, { "epoch": 0.170042194092827, "grad_norm": 0.671875, "learning_rate": 9.891906718676291e-07, "loss": 0.269248902797699, "step": 806, "token_acc": 0.9328318108543794 }, { "epoch": 0.17025316455696202, "grad_norm": 0.69140625, "learning_rate": 9.89155675677606e-07, "loss": 0.2557290196418762, "step": 807, "token_acc": 0.9275456919060052 }, { "epoch": 0.17046413502109706, "grad_norm": 0.63671875, "learning_rate": 9.891206235484913e-07, "loss": 0.23980513215065002, "step": 808, "token_acc": 0.9353355807539074 }, { "epoch": 0.17067510548523207, "grad_norm": 0.65625, "learning_rate": 9.890855154842935e-07, "loss": 0.2392064481973648, "step": 809, "token_acc": 0.9303870595031773 }, { "epoch": 0.17088607594936708, "grad_norm": 0.7890625, "learning_rate": 9.890503514890275e-07, "loss": 0.23739401996135712, "step": 810, "token_acc": 0.9328014728444308 }, { "epoch": 0.1710970464135021, "grad_norm": 0.7890625, "learning_rate": 9.89015131566714e-07, "loss": 0.2900955080986023, "step": 811, "token_acc": 0.9233797698364627 }, { "epoch": 0.17130801687763714, "grad_norm": 0.69921875, "learning_rate": 9.889798557213818e-07, "loss": 0.27924585342407227, "step": 812, "token_acc": 0.9214012363850457 }, { "epoch": 0.17151898734177215, "grad_norm": 0.7421875, "learning_rate": 9.88944523957064e-07, "loss": 0.2851244807243347, "step": 813, "token_acc": 0.9264660254421346 }, { "epoch": 0.17172995780590716, "grad_norm": 1.1875, "learning_rate": 9.889091362778017e-07, "loss": 0.23967793583869934, "step": 814, "token_acc": 0.9278113316077078 }, { "epoch": 0.1719409282700422, "grad_norm": 0.6796875, "learning_rate": 9.888736926876415e-07, "loss": 0.23629070818424225, "step": 815, "token_acc": 0.9284259528658714 }, { "epoch": 0.17215189873417722, "grad_norm": 0.93359375, "learning_rate": 9.88838193190637e-07, "loss": 0.3324328064918518, "step": 816, "token_acc": 0.9166411277965063 }, { "epoch": 0.17236286919831223, "grad_norm": 0.73046875, "learning_rate": 9.888026377908472e-07, "loss": 0.2603840231895447, "step": 817, "token_acc": 0.9286516853932584 }, { "epoch": 0.17257383966244727, "grad_norm": 0.7421875, "learning_rate": 9.887670264923387e-07, "loss": 0.25500792264938354, "step": 818, "token_acc": 0.9334559950935296 }, { "epoch": 0.17278481012658228, "grad_norm": 0.69921875, "learning_rate": 9.88731359299184e-07, "loss": 0.25971531867980957, "step": 819, "token_acc": 0.928698752228164 }, { "epoch": 0.1729957805907173, "grad_norm": 0.65625, "learning_rate": 9.886956362154617e-07, "loss": 0.2521659731864929, "step": 820, "token_acc": 0.9252336448598131 }, { "epoch": 0.1732067510548523, "grad_norm": 1.0078125, "learning_rate": 9.88659857245257e-07, "loss": 0.23598764836788177, "step": 821, "token_acc": 0.9364791288566243 }, { "epoch": 0.17341772151898735, "grad_norm": 2.28125, "learning_rate": 9.886240223926617e-07, "loss": 0.2466164529323578, "step": 822, "token_acc": 0.9289433384379786 }, { "epoch": 0.17362869198312236, "grad_norm": 0.609375, "learning_rate": 9.88588131661774e-07, "loss": 0.2287391722202301, "step": 823, "token_acc": 0.9339788732394366 }, { "epoch": 0.17383966244725738, "grad_norm": 0.9921875, "learning_rate": 9.885521850566977e-07, "loss": 0.3011782765388489, "step": 824, "token_acc": 0.9158669225847729 }, { "epoch": 0.17405063291139242, "grad_norm": 0.88671875, "learning_rate": 9.88516182581544e-07, "loss": 0.24732713401317596, "step": 825, "token_acc": 0.9297990096125838 }, { "epoch": 0.17426160337552743, "grad_norm": 0.8046875, "learning_rate": 9.884801242404303e-07, "loss": 0.2557525038719177, "step": 826, "token_acc": 0.9272430668841762 }, { "epoch": 0.17447257383966244, "grad_norm": 0.58203125, "learning_rate": 9.884440100374798e-07, "loss": 0.2030971348285675, "step": 827, "token_acc": 0.9397905759162304 }, { "epoch": 0.17468354430379746, "grad_norm": 0.72265625, "learning_rate": 9.884078399768226e-07, "loss": 0.23457200825214386, "step": 828, "token_acc": 0.9320594479830149 }, { "epoch": 0.1748945147679325, "grad_norm": 0.75, "learning_rate": 9.88371614062595e-07, "loss": 0.27027198672294617, "step": 829, "token_acc": 0.9281524926686217 }, { "epoch": 0.1751054852320675, "grad_norm": 0.578125, "learning_rate": 9.8833533229894e-07, "loss": 0.2117438018321991, "step": 830, "token_acc": 0.9373803664205633 }, { "epoch": 0.17531645569620252, "grad_norm": 0.71484375, "learning_rate": 9.882989946900063e-07, "loss": 0.21879255771636963, "step": 831, "token_acc": 0.9381270903010034 }, { "epoch": 0.17552742616033756, "grad_norm": 0.75, "learning_rate": 9.882626012399495e-07, "loss": 0.27527743577957153, "step": 832, "token_acc": 0.9288235294117647 }, { "epoch": 0.17573839662447258, "grad_norm": 0.74609375, "learning_rate": 9.882261519529318e-07, "loss": 0.2788648307323456, "step": 833, "token_acc": 0.9233587786259542 }, { "epoch": 0.1759493670886076, "grad_norm": 0.75, "learning_rate": 9.881896468331215e-07, "loss": 0.26945775747299194, "step": 834, "token_acc": 0.9267376330619912 }, { "epoch": 0.17616033755274263, "grad_norm": 0.89453125, "learning_rate": 9.881530858846928e-07, "loss": 0.30408790707588196, "step": 835, "token_acc": 0.9146719234018587 }, { "epoch": 0.17637130801687764, "grad_norm": 0.8125, "learning_rate": 9.88116469111827e-07, "loss": 0.2584350109100342, "step": 836, "token_acc": 0.9246458923512748 }, { "epoch": 0.17658227848101266, "grad_norm": 0.8125, "learning_rate": 9.880797965187119e-07, "loss": 0.3014131784439087, "step": 837, "token_acc": 0.9151281344723065 }, { "epoch": 0.17679324894514767, "grad_norm": 0.59375, "learning_rate": 9.880430681095407e-07, "loss": 0.21773123741149902, "step": 838, "token_acc": 0.9367166004280036 }, { "epoch": 0.1770042194092827, "grad_norm": 1.9609375, "learning_rate": 9.88006283888514e-07, "loss": 0.26315808296203613, "step": 839, "token_acc": 0.9259569712210115 }, { "epoch": 0.17721518987341772, "grad_norm": 0.93359375, "learning_rate": 9.879694438598383e-07, "loss": 0.2646620571613312, "step": 840, "token_acc": 0.9318522966076254 }, { "epoch": 0.17742616033755274, "grad_norm": 0.78125, "learning_rate": 9.879325480277266e-07, "loss": 0.2713755965232849, "step": 841, "token_acc": 0.9206214689265537 }, { "epoch": 0.17763713080168778, "grad_norm": 0.859375, "learning_rate": 9.878955963963979e-07, "loss": 0.26667535305023193, "step": 842, "token_acc": 0.9262686567164179 }, { "epoch": 0.1778481012658228, "grad_norm": 0.734375, "learning_rate": 9.878585889700785e-07, "loss": 0.24986431002616882, "step": 843, "token_acc": 0.9370695053224797 }, { "epoch": 0.1780590717299578, "grad_norm": 1.4140625, "learning_rate": 9.878215257530004e-07, "loss": 0.2651556730270386, "step": 844, "token_acc": 0.9286946520989074 }, { "epoch": 0.17827004219409281, "grad_norm": 0.828125, "learning_rate": 9.877844067494017e-07, "loss": 0.2608075737953186, "step": 845, "token_acc": 0.9263001485884101 }, { "epoch": 0.17848101265822786, "grad_norm": 0.75390625, "learning_rate": 9.877472319635275e-07, "loss": 0.28958860039711, "step": 846, "token_acc": 0.9233128834355828 }, { "epoch": 0.17869198312236287, "grad_norm": 1.0234375, "learning_rate": 9.877100013996291e-07, "loss": 0.2941049039363861, "step": 847, "token_acc": 0.9213372664700098 }, { "epoch": 0.17890295358649788, "grad_norm": 0.84375, "learning_rate": 9.876727150619642e-07, "loss": 0.2620714604854584, "step": 848, "token_acc": 0.9288433382137629 }, { "epoch": 0.17911392405063292, "grad_norm": 0.60546875, "learning_rate": 9.876353729547968e-07, "loss": 0.2020449936389923, "step": 849, "token_acc": 0.9392366412213741 }, { "epoch": 0.17932489451476794, "grad_norm": 0.953125, "learning_rate": 9.875979750823969e-07, "loss": 0.2892880439758301, "step": 850, "token_acc": 0.9246448424953675 }, { "epoch": 0.17953586497890295, "grad_norm": 0.70703125, "learning_rate": 9.875605214490417e-07, "loss": 0.2778629660606384, "step": 851, "token_acc": 0.923582580115037 }, { "epoch": 0.17974683544303796, "grad_norm": 0.5859375, "learning_rate": 9.875230120590142e-07, "loss": 0.23370903730392456, "step": 852, "token_acc": 0.9249401117913228 }, { "epoch": 0.179957805907173, "grad_norm": 0.75390625, "learning_rate": 9.874854469166038e-07, "loss": 0.28334856033325195, "step": 853, "token_acc": 0.9264705882352942 }, { "epoch": 0.18016877637130801, "grad_norm": 0.55078125, "learning_rate": 9.874478260261067e-07, "loss": 0.2282511293888092, "step": 854, "token_acc": 0.933620159803319 }, { "epoch": 0.18037974683544303, "grad_norm": 1.1015625, "learning_rate": 9.874101493918249e-07, "loss": 0.27366286516189575, "step": 855, "token_acc": 0.9260089686098655 }, { "epoch": 0.18059071729957807, "grad_norm": 0.6875, "learning_rate": 9.87372417018067e-07, "loss": 0.25619056820869446, "step": 856, "token_acc": 0.9255610290093049 }, { "epoch": 0.18080168776371308, "grad_norm": 0.828125, "learning_rate": 9.873346289091483e-07, "loss": 0.270757257938385, "step": 857, "token_acc": 0.923998738568275 }, { "epoch": 0.1810126582278481, "grad_norm": 0.66796875, "learning_rate": 9.8729678506939e-07, "loss": 0.28628918528556824, "step": 858, "token_acc": 0.9234957020057306 }, { "epoch": 0.18122362869198314, "grad_norm": 0.7265625, "learning_rate": 9.872588855031197e-07, "loss": 0.2525092661380768, "step": 859, "token_acc": 0.9317073170731708 }, { "epoch": 0.18143459915611815, "grad_norm": 0.83984375, "learning_rate": 9.872209302146718e-07, "loss": 0.28244319558143616, "step": 860, "token_acc": 0.9267202859696158 }, { "epoch": 0.18164556962025316, "grad_norm": 0.828125, "learning_rate": 9.871829192083867e-07, "loss": 0.254133403301239, "step": 861, "token_acc": 0.928436911487759 }, { "epoch": 0.18185654008438817, "grad_norm": 0.75, "learning_rate": 9.871448524886113e-07, "loss": 0.2619815468788147, "step": 862, "token_acc": 0.9243888573052871 }, { "epoch": 0.18206751054852321, "grad_norm": 0.91796875, "learning_rate": 9.87106730059699e-07, "loss": 0.2682092487812042, "step": 863, "token_acc": 0.9261158021712907 }, { "epoch": 0.18227848101265823, "grad_norm": 0.6015625, "learning_rate": 9.870685519260092e-07, "loss": 0.245108962059021, "step": 864, "token_acc": 0.9272880404267265 }, { "epoch": 0.18248945147679324, "grad_norm": 0.9609375, "learning_rate": 9.870303180919078e-07, "loss": 0.2907876670360565, "step": 865, "token_acc": 0.9245337159253946 }, { "epoch": 0.18270042194092828, "grad_norm": 0.60546875, "learning_rate": 9.869920285617676e-07, "loss": 0.24249601364135742, "step": 866, "token_acc": 0.931304347826087 }, { "epoch": 0.1829113924050633, "grad_norm": 0.83203125, "learning_rate": 9.869536833399669e-07, "loss": 0.2370653748512268, "step": 867, "token_acc": 0.9323520200438459 }, { "epoch": 0.1831223628691983, "grad_norm": 0.859375, "learning_rate": 9.869152824308912e-07, "loss": 0.3008883595466614, "step": 868, "token_acc": 0.9213197969543148 }, { "epoch": 0.18333333333333332, "grad_norm": 0.8125, "learning_rate": 9.868768258389314e-07, "loss": 0.2317754030227661, "step": 869, "token_acc": 0.9351635514018691 }, { "epoch": 0.18354430379746836, "grad_norm": 0.71484375, "learning_rate": 9.868383135684857e-07, "loss": 0.2313736081123352, "step": 870, "token_acc": 0.9373202990224266 }, { "epoch": 0.18375527426160337, "grad_norm": 0.9296875, "learning_rate": 9.867997456239586e-07, "loss": 0.28026607632637024, "step": 871, "token_acc": 0.9176308539944904 }, { "epoch": 0.1839662447257384, "grad_norm": 0.6875, "learning_rate": 9.8676112200976e-07, "loss": 0.254774272441864, "step": 872, "token_acc": 0.9306763962952568 }, { "epoch": 0.18417721518987343, "grad_norm": 0.75, "learning_rate": 9.867224427303073e-07, "loss": 0.24183842539787292, "step": 873, "token_acc": 0.9341463414634147 }, { "epoch": 0.18438818565400844, "grad_norm": 0.86328125, "learning_rate": 9.86683707790024e-07, "loss": 0.23453059792518616, "step": 874, "token_acc": 0.9323812299621101 }, { "epoch": 0.18459915611814345, "grad_norm": 0.734375, "learning_rate": 9.86644917193339e-07, "loss": 0.24839141964912415, "step": 875, "token_acc": 0.9287037037037037 }, { "epoch": 0.1848101265822785, "grad_norm": 0.671875, "learning_rate": 9.86606070944689e-07, "loss": 0.2521136403083801, "step": 876, "token_acc": 0.9300189993666877 }, { "epoch": 0.1850210970464135, "grad_norm": 1.046875, "learning_rate": 9.865671690485162e-07, "loss": 0.3050832748413086, "step": 877, "token_acc": 0.9174647887323943 }, { "epoch": 0.18523206751054852, "grad_norm": 0.74609375, "learning_rate": 9.865282115092692e-07, "loss": 0.2835577726364136, "step": 878, "token_acc": 0.9225286643941741 }, { "epoch": 0.18544303797468353, "grad_norm": 0.7890625, "learning_rate": 9.864891983314033e-07, "loss": 0.29184651374816895, "step": 879, "token_acc": 0.9218231210383339 }, { "epoch": 0.18565400843881857, "grad_norm": 2.859375, "learning_rate": 9.8645012951938e-07, "loss": 0.2807004451751709, "step": 880, "token_acc": 0.9238008500303583 }, { "epoch": 0.1858649789029536, "grad_norm": 0.62109375, "learning_rate": 9.864110050776672e-07, "loss": 0.25495046377182007, "step": 881, "token_acc": 0.9281183932346723 }, { "epoch": 0.1860759493670886, "grad_norm": 0.62890625, "learning_rate": 9.86371825010739e-07, "loss": 0.26357853412628174, "step": 882, "token_acc": 0.9274457329765091 }, { "epoch": 0.18628691983122364, "grad_norm": 0.703125, "learning_rate": 9.86332589323076e-07, "loss": 0.2856602966785431, "step": 883, "token_acc": 0.9247496423462088 }, { "epoch": 0.18649789029535865, "grad_norm": 0.63671875, "learning_rate": 9.862932980191652e-07, "loss": 0.26217591762542725, "step": 884, "token_acc": 0.9308156140907649 }, { "epoch": 0.18670886075949367, "grad_norm": 0.859375, "learning_rate": 9.862539511034997e-07, "loss": 0.2957126498222351, "step": 885, "token_acc": 0.9175007582650895 }, { "epoch": 0.18691983122362868, "grad_norm": 0.71875, "learning_rate": 9.862145485805793e-07, "loss": 0.2381889373064041, "step": 886, "token_acc": 0.9338040600176523 }, { "epoch": 0.18713080168776372, "grad_norm": 0.61328125, "learning_rate": 9.861750904549099e-07, "loss": 0.23038305342197418, "step": 887, "token_acc": 0.933588010578901 }, { "epoch": 0.18734177215189873, "grad_norm": 0.7578125, "learning_rate": 9.86135576731004e-07, "loss": 0.2670343518257141, "step": 888, "token_acc": 0.9209346991037132 }, { "epoch": 0.18755274261603375, "grad_norm": 0.875, "learning_rate": 9.860960074133802e-07, "loss": 0.3037135899066925, "step": 889, "token_acc": 0.9173450219160927 }, { "epoch": 0.1877637130801688, "grad_norm": 0.6953125, "learning_rate": 9.860563825065637e-07, "loss": 0.23587052524089813, "step": 890, "token_acc": 0.9326950971859588 }, { "epoch": 0.1879746835443038, "grad_norm": 1.046875, "learning_rate": 9.86016702015086e-07, "loss": 0.2837037444114685, "step": 891, "token_acc": 0.9186681222707423 }, { "epoch": 0.1881856540084388, "grad_norm": 0.80078125, "learning_rate": 9.85976965943485e-07, "loss": 0.273685485124588, "step": 892, "token_acc": 0.9198871650211565 }, { "epoch": 0.18839662447257383, "grad_norm": 0.74609375, "learning_rate": 9.859371742963043e-07, "loss": 0.24621078372001648, "step": 893, "token_acc": 0.9370564640543042 }, { "epoch": 0.18860759493670887, "grad_norm": 0.63671875, "learning_rate": 9.85897327078095e-07, "loss": 0.2355155646800995, "step": 894, "token_acc": 0.933295647258338 }, { "epoch": 0.18881856540084388, "grad_norm": 0.6875, "learning_rate": 9.858574242934136e-07, "loss": 0.29725679755210876, "step": 895, "token_acc": 0.9205632306057385 }, { "epoch": 0.1890295358649789, "grad_norm": 0.71484375, "learning_rate": 9.858174659468237e-07, "loss": 0.23919257521629333, "step": 896, "token_acc": 0.9367167919799498 }, { "epoch": 0.18924050632911393, "grad_norm": 0.6640625, "learning_rate": 9.857774520428945e-07, "loss": 0.2421645075082779, "step": 897, "token_acc": 0.928311057108141 }, { "epoch": 0.18945147679324895, "grad_norm": 0.90625, "learning_rate": 9.85737382586202e-07, "loss": 0.22805655002593994, "step": 898, "token_acc": 0.9353342428376534 }, { "epoch": 0.18966244725738396, "grad_norm": 1.125, "learning_rate": 9.856972575813285e-07, "loss": 0.2736568748950958, "step": 899, "token_acc": 0.9199507389162561 }, { "epoch": 0.189873417721519, "grad_norm": 0.71875, "learning_rate": 9.85657077032863e-07, "loss": 0.251034677028656, "step": 900, "token_acc": 0.933903806432576 }, { "epoch": 0.190084388185654, "grad_norm": 0.671875, "learning_rate": 9.856168409454e-07, "loss": 0.2377174347639084, "step": 901, "token_acc": 0.9313658201784488 }, { "epoch": 0.19029535864978903, "grad_norm": 0.625, "learning_rate": 9.855765493235408e-07, "loss": 0.27164188027381897, "step": 902, "token_acc": 0.9263128176171654 }, { "epoch": 0.19050632911392404, "grad_norm": 0.84765625, "learning_rate": 9.855362021718936e-07, "loss": 0.250331312417984, "step": 903, "token_acc": 0.9330877839165131 }, { "epoch": 0.19071729957805908, "grad_norm": 0.60546875, "learning_rate": 9.85495799495072e-07, "loss": 0.20965948700904846, "step": 904, "token_acc": 0.936726272352132 }, { "epoch": 0.1909282700421941, "grad_norm": 0.6640625, "learning_rate": 9.854553412976965e-07, "loss": 0.24447084963321686, "step": 905, "token_acc": 0.9254159495123351 }, { "epoch": 0.1911392405063291, "grad_norm": 0.6875, "learning_rate": 9.854148275843939e-07, "loss": 0.2490314543247223, "step": 906, "token_acc": 0.9285930408472012 }, { "epoch": 0.19135021097046415, "grad_norm": 0.61328125, "learning_rate": 9.853742583597973e-07, "loss": 0.21816563606262207, "step": 907, "token_acc": 0.9387067116150781 }, { "epoch": 0.19156118143459916, "grad_norm": 0.66015625, "learning_rate": 9.853336336285461e-07, "loss": 0.24077807366847992, "step": 908, "token_acc": 0.9278679026651216 }, { "epoch": 0.19177215189873417, "grad_norm": 0.82421875, "learning_rate": 9.852929533952858e-07, "loss": 0.2617112696170807, "step": 909, "token_acc": 0.9296264118158123 }, { "epoch": 0.19198312236286919, "grad_norm": 0.75, "learning_rate": 9.852522176646692e-07, "loss": 0.22484534978866577, "step": 910, "token_acc": 0.9368040926873308 }, { "epoch": 0.19219409282700423, "grad_norm": 0.671875, "learning_rate": 9.85211426441354e-07, "loss": 0.2532415986061096, "step": 911, "token_acc": 0.9334488734835356 }, { "epoch": 0.19240506329113924, "grad_norm": 0.87890625, "learning_rate": 9.851705797300056e-07, "loss": 0.31424853205680847, "step": 912, "token_acc": 0.9197608558842039 }, { "epoch": 0.19261603375527425, "grad_norm": 0.70703125, "learning_rate": 9.851296775352948e-07, "loss": 0.29285135865211487, "step": 913, "token_acc": 0.9182986536107711 }, { "epoch": 0.1928270042194093, "grad_norm": 0.59765625, "learning_rate": 9.850887198618996e-07, "loss": 0.21450576186180115, "step": 914, "token_acc": 0.9373626373626374 }, { "epoch": 0.1930379746835443, "grad_norm": 0.78515625, "learning_rate": 9.850477067145031e-07, "loss": 0.2844701111316681, "step": 915, "token_acc": 0.919885094158953 }, { "epoch": 0.19324894514767932, "grad_norm": 0.68359375, "learning_rate": 9.850066380977961e-07, "loss": 0.26549211144447327, "step": 916, "token_acc": 0.9245901639344263 }, { "epoch": 0.19345991561181436, "grad_norm": 0.765625, "learning_rate": 9.849655140164752e-07, "loss": 0.258350133895874, "step": 917, "token_acc": 0.918646080760095 }, { "epoch": 0.19367088607594937, "grad_norm": 0.7421875, "learning_rate": 9.849243344752427e-07, "loss": 0.2719504237174988, "step": 918, "token_acc": 0.9267654751525719 }, { "epoch": 0.19388185654008439, "grad_norm": 0.79296875, "learning_rate": 9.848830994788083e-07, "loss": 0.27195435762405396, "step": 919, "token_acc": 0.9220445459737293 }, { "epoch": 0.1940928270042194, "grad_norm": 0.71484375, "learning_rate": 9.848418090318876e-07, "loss": 0.24952857196331024, "step": 920, "token_acc": 0.9340033500837521 }, { "epoch": 0.19430379746835444, "grad_norm": 0.7421875, "learning_rate": 9.848004631392022e-07, "loss": 0.22502082586288452, "step": 921, "token_acc": 0.935454267360049 }, { "epoch": 0.19451476793248945, "grad_norm": 0.76171875, "learning_rate": 9.847590618054806e-07, "loss": 0.30236607789993286, "step": 922, "token_acc": 0.9160954208938854 }, { "epoch": 0.19472573839662446, "grad_norm": 0.7578125, "learning_rate": 9.847176050354573e-07, "loss": 0.26875466108322144, "step": 923, "token_acc": 0.9231622746185852 }, { "epoch": 0.1949367088607595, "grad_norm": 0.79296875, "learning_rate": 9.846760928338734e-07, "loss": 0.21099932491779327, "step": 924, "token_acc": 0.9381474710542352 }, { "epoch": 0.19514767932489452, "grad_norm": 2.921875, "learning_rate": 9.846345252054758e-07, "loss": 0.24902689456939697, "step": 925, "token_acc": 0.9338211899459116 }, { "epoch": 0.19535864978902953, "grad_norm": 0.80859375, "learning_rate": 9.845929021550184e-07, "loss": 0.22670647501945496, "step": 926, "token_acc": 0.9382314694408322 }, { "epoch": 0.19556962025316454, "grad_norm": 0.70703125, "learning_rate": 9.84551223687261e-07, "loss": 0.24977506697177887, "step": 927, "token_acc": 0.927700089259149 }, { "epoch": 0.19578059071729959, "grad_norm": 0.7109375, "learning_rate": 9.8450948980697e-07, "loss": 0.2701714038848877, "step": 928, "token_acc": 0.9202069716775599 }, { "epoch": 0.1959915611814346, "grad_norm": 0.82421875, "learning_rate": 9.844677005189182e-07, "loss": 0.2738378643989563, "step": 929, "token_acc": 0.9230093676814989 }, { "epoch": 0.1962025316455696, "grad_norm": 0.7890625, "learning_rate": 9.844258558278842e-07, "loss": 0.2802038788795471, "step": 930, "token_acc": 0.9218846869187849 }, { "epoch": 0.19641350210970465, "grad_norm": 1.125, "learning_rate": 9.843839557386534e-07, "loss": 0.28460338711738586, "step": 931, "token_acc": 0.9176701204144497 }, { "epoch": 0.19662447257383966, "grad_norm": 0.70703125, "learning_rate": 9.843420002560173e-07, "loss": 0.2364983856678009, "step": 932, "token_acc": 0.9339788732394366 }, { "epoch": 0.19683544303797468, "grad_norm": 0.69921875, "learning_rate": 9.842999893847744e-07, "loss": 0.24972565472126007, "step": 933, "token_acc": 0.9282414536495226 }, { "epoch": 0.19704641350210972, "grad_norm": 0.62890625, "learning_rate": 9.842579231297284e-07, "loss": 0.23772844672203064, "step": 934, "token_acc": 0.9301578024547048 }, { "epoch": 0.19725738396624473, "grad_norm": 0.86328125, "learning_rate": 9.842158014956901e-07, "loss": 0.2724204659461975, "step": 935, "token_acc": 0.920952380952381 }, { "epoch": 0.19746835443037974, "grad_norm": 0.67578125, "learning_rate": 9.841736244874769e-07, "loss": 0.20951035618782043, "step": 936, "token_acc": 0.9367752622860298 }, { "epoch": 0.19767932489451476, "grad_norm": 1.84375, "learning_rate": 9.841313921099112e-07, "loss": 0.2654408812522888, "step": 937, "token_acc": 0.9252709640616087 }, { "epoch": 0.1978902953586498, "grad_norm": 0.609375, "learning_rate": 9.840891043678235e-07, "loss": 0.20615626871585846, "step": 938, "token_acc": 0.9382829208677055 }, { "epoch": 0.1981012658227848, "grad_norm": 0.78515625, "learning_rate": 9.840467612660494e-07, "loss": 0.24997380375862122, "step": 939, "token_acc": 0.9305555555555556 }, { "epoch": 0.19831223628691982, "grad_norm": 0.62890625, "learning_rate": 9.84004362809431e-07, "loss": 0.2462070733308792, "step": 940, "token_acc": 0.9301426872770512 }, { "epoch": 0.19852320675105486, "grad_norm": 0.81640625, "learning_rate": 9.839619090028173e-07, "loss": 0.28827589750289917, "step": 941, "token_acc": 0.9256148770245951 }, { "epoch": 0.19873417721518988, "grad_norm": 0.7265625, "learning_rate": 9.83919399851063e-07, "loss": 0.27797433733940125, "step": 942, "token_acc": 0.9241207421766824 }, { "epoch": 0.1989451476793249, "grad_norm": 0.84765625, "learning_rate": 9.838768353590297e-07, "loss": 0.3198699951171875, "step": 943, "token_acc": 0.909556313993174 }, { "epoch": 0.1991561181434599, "grad_norm": 0.625, "learning_rate": 9.838342155315847e-07, "loss": 0.23603345453739166, "step": 944, "token_acc": 0.9322977725674091 }, { "epoch": 0.19936708860759494, "grad_norm": 0.63671875, "learning_rate": 9.837915403736017e-07, "loss": 0.1939564049243927, "step": 945, "token_acc": 0.9405116002379535 }, { "epoch": 0.19957805907172996, "grad_norm": 0.6953125, "learning_rate": 9.837488098899616e-07, "loss": 0.2682676911354065, "step": 946, "token_acc": 0.9287606711804534 }, { "epoch": 0.19978902953586497, "grad_norm": 0.94921875, "learning_rate": 9.837060240855506e-07, "loss": 0.264107882976532, "step": 947, "token_acc": 0.9270292429625581 }, { "epoch": 0.2, "grad_norm": 0.78515625, "learning_rate": 9.836631829652617e-07, "loss": 0.31936952471733093, "step": 948, "token_acc": 0.9171994884910486 }, { "epoch": 0.20021097046413502, "grad_norm": 0.76953125, "learning_rate": 9.83620286533994e-07, "loss": 0.2610703706741333, "step": 949, "token_acc": 0.9247813411078717 }, { "epoch": 0.20042194092827004, "grad_norm": 0.76953125, "learning_rate": 9.835773347966535e-07, "loss": 0.27383172512054443, "step": 950, "token_acc": 0.9266730707652898 }, { "epoch": 0.20063291139240505, "grad_norm": 0.7578125, "learning_rate": 9.835343277581513e-07, "loss": 0.253266841173172, "step": 951, "token_acc": 0.9326241134751773 }, { "epoch": 0.2008438818565401, "grad_norm": 0.6875, "learning_rate": 9.834912654234065e-07, "loss": 0.24679061770439148, "step": 952, "token_acc": 0.9246597024374802 }, { "epoch": 0.2010548523206751, "grad_norm": 0.92578125, "learning_rate": 9.834481477973433e-07, "loss": 0.25128480792045593, "step": 953, "token_acc": 0.9299694189602447 }, { "epoch": 0.20126582278481012, "grad_norm": 0.7578125, "learning_rate": 9.834049748848924e-07, "loss": 0.26062366366386414, "step": 954, "token_acc": 0.9283121597096189 }, { "epoch": 0.20147679324894516, "grad_norm": 0.765625, "learning_rate": 9.833617466909912e-07, "loss": 0.2557450234889984, "step": 955, "token_acc": 0.92808867261422 }, { "epoch": 0.20168776371308017, "grad_norm": 0.7265625, "learning_rate": 9.83318463220583e-07, "loss": 0.28644537925720215, "step": 956, "token_acc": 0.9155513065646909 }, { "epoch": 0.20189873417721518, "grad_norm": 0.9375, "learning_rate": 9.832751244786178e-07, "loss": 0.27308011054992676, "step": 957, "token_acc": 0.9252837326607818 }, { "epoch": 0.20210970464135022, "grad_norm": 0.6640625, "learning_rate": 9.832317304700517e-07, "loss": 0.2365753948688507, "step": 958, "token_acc": 0.9307948860478044 }, { "epoch": 0.20232067510548524, "grad_norm": 1.2265625, "learning_rate": 9.831882811998472e-07, "loss": 0.25882843136787415, "step": 959, "token_acc": 0.9280114041339986 }, { "epoch": 0.20253164556962025, "grad_norm": 0.6171875, "learning_rate": 9.83144776672973e-07, "loss": 0.24906384944915771, "step": 960, "token_acc": 0.9268585131894485 }, { "epoch": 0.20274261603375526, "grad_norm": 1.421875, "learning_rate": 9.831012168944045e-07, "loss": 0.25317683815956116, "step": 961, "token_acc": 0.9272997032640949 }, { "epoch": 0.2029535864978903, "grad_norm": 1.40625, "learning_rate": 9.830576018691227e-07, "loss": 0.2348695993423462, "step": 962, "token_acc": 0.9317293233082706 }, { "epoch": 0.20316455696202532, "grad_norm": 0.65625, "learning_rate": 9.830139316021155e-07, "loss": 0.22149190306663513, "step": 963, "token_acc": 0.9351633986928105 }, { "epoch": 0.20337552742616033, "grad_norm": 0.734375, "learning_rate": 9.829702060983772e-07, "loss": 0.2570660710334778, "step": 964, "token_acc": 0.9295127183573398 }, { "epoch": 0.20358649789029537, "grad_norm": 1.0234375, "learning_rate": 9.829264253629079e-07, "loss": 0.2847985625267029, "step": 965, "token_acc": 0.9199036434808793 }, { "epoch": 0.20379746835443038, "grad_norm": 0.72265625, "learning_rate": 9.828825894007146e-07, "loss": 0.267423540353775, "step": 966, "token_acc": 0.9270031365839749 }, { "epoch": 0.2040084388185654, "grad_norm": 6.0625, "learning_rate": 9.8283869821681e-07, "loss": 0.2526509165763855, "step": 967, "token_acc": 0.9267187106522287 }, { "epoch": 0.2042194092827004, "grad_norm": 0.66796875, "learning_rate": 9.827947518162135e-07, "loss": 0.22644475102424622, "step": 968, "token_acc": 0.9335453100158982 }, { "epoch": 0.20443037974683545, "grad_norm": 0.859375, "learning_rate": 9.827507502039507e-07, "loss": 0.313146710395813, "step": 969, "token_acc": 0.9218163195629908 }, { "epoch": 0.20464135021097046, "grad_norm": 0.72265625, "learning_rate": 9.82706693385054e-07, "loss": 0.2515157163143158, "step": 970, "token_acc": 0.9318849089841457 }, { "epoch": 0.20485232067510548, "grad_norm": 0.7890625, "learning_rate": 9.82662581364561e-07, "loss": 0.33044931292533875, "step": 971, "token_acc": 0.911062906724512 }, { "epoch": 0.20506329113924052, "grad_norm": 1.09375, "learning_rate": 9.826184141475165e-07, "loss": 0.3272978961467743, "step": 972, "token_acc": 0.9140117537890504 }, { "epoch": 0.20527426160337553, "grad_norm": 0.59375, "learning_rate": 9.825741917389717e-07, "loss": 0.21767356991767883, "step": 973, "token_acc": 0.9358974358974359 }, { "epoch": 0.20548523206751054, "grad_norm": 0.734375, "learning_rate": 9.825299141439835e-07, "loss": 0.28333914279937744, "step": 974, "token_acc": 0.9232728430436167 }, { "epoch": 0.20569620253164558, "grad_norm": 0.66796875, "learning_rate": 9.824855813676157e-07, "loss": 0.23762467503547668, "step": 975, "token_acc": 0.9316065192083819 }, { "epoch": 0.2059071729957806, "grad_norm": 0.80859375, "learning_rate": 9.824411934149377e-07, "loss": 0.2822648882865906, "step": 976, "token_acc": 0.9237147595356551 }, { "epoch": 0.2061181434599156, "grad_norm": 0.63671875, "learning_rate": 9.823967502910259e-07, "loss": 0.2508828043937683, "step": 977, "token_acc": 0.9297841726618705 }, { "epoch": 0.20632911392405062, "grad_norm": 0.625, "learning_rate": 9.82352252000963e-07, "loss": 0.2554951608181, "step": 978, "token_acc": 0.9255730872283418 }, { "epoch": 0.20654008438818566, "grad_norm": 0.74609375, "learning_rate": 9.823076985498373e-07, "loss": 0.2603085935115814, "step": 979, "token_acc": 0.927246790299572 }, { "epoch": 0.20675105485232068, "grad_norm": 0.75390625, "learning_rate": 9.82263089942744e-07, "loss": 0.27707576751708984, "step": 980, "token_acc": 0.9242250287026407 }, { "epoch": 0.2069620253164557, "grad_norm": 0.74609375, "learning_rate": 9.822184261847847e-07, "loss": 0.23693615198135376, "step": 981, "token_acc": 0.9334923948702655 }, { "epoch": 0.20717299578059073, "grad_norm": 0.69921875, "learning_rate": 9.821737072810668e-07, "loss": 0.2479907125234604, "step": 982, "token_acc": 0.9293939393939394 }, { "epoch": 0.20738396624472574, "grad_norm": 0.72265625, "learning_rate": 9.821289332367043e-07, "loss": 0.25571757555007935, "step": 983, "token_acc": 0.9304549405969285 }, { "epoch": 0.20759493670886076, "grad_norm": 0.7421875, "learning_rate": 9.820841040568177e-07, "loss": 0.2608758807182312, "step": 984, "token_acc": 0.9285266457680251 }, { "epoch": 0.20780590717299577, "grad_norm": 0.7578125, "learning_rate": 9.820392197465335e-07, "loss": 0.28490036725997925, "step": 985, "token_acc": 0.920461445051609 }, { "epoch": 0.2080168776371308, "grad_norm": 0.70703125, "learning_rate": 9.819942803109844e-07, "loss": 0.2503746449947357, "step": 986, "token_acc": 0.9277822689302075 }, { "epoch": 0.20822784810126582, "grad_norm": 0.6953125, "learning_rate": 9.8194928575531e-07, "loss": 0.2538071870803833, "step": 987, "token_acc": 0.9264251614714968 }, { "epoch": 0.20843881856540084, "grad_norm": 0.6875, "learning_rate": 9.819042360846554e-07, "loss": 0.2641909718513489, "step": 988, "token_acc": 0.9284467713787086 }, { "epoch": 0.20864978902953588, "grad_norm": 0.75, "learning_rate": 9.818591313041727e-07, "loss": 0.2759447395801544, "step": 989, "token_acc": 0.9222654081066074 }, { "epoch": 0.2088607594936709, "grad_norm": 0.8125, "learning_rate": 9.818139714190198e-07, "loss": 0.23161228001117706, "step": 990, "token_acc": 0.9333521604066648 }, { "epoch": 0.2090717299578059, "grad_norm": 0.765625, "learning_rate": 9.817687564343615e-07, "loss": 0.2939218580722809, "step": 991, "token_acc": 0.9156313204276221 }, { "epoch": 0.20928270042194091, "grad_norm": 0.7265625, "learning_rate": 9.817234863553681e-07, "loss": 0.259197473526001, "step": 992, "token_acc": 0.9242243436754176 }, { "epoch": 0.20949367088607596, "grad_norm": 0.76171875, "learning_rate": 9.816781611872167e-07, "loss": 0.27298709750175476, "step": 993, "token_acc": 0.9176136363636364 }, { "epoch": 0.20970464135021097, "grad_norm": 0.6328125, "learning_rate": 9.816327809350907e-07, "loss": 0.2868914008140564, "step": 994, "token_acc": 0.9294675216057987 }, { "epoch": 0.20991561181434598, "grad_norm": 0.6171875, "learning_rate": 9.815873456041797e-07, "loss": 0.26026803255081177, "step": 995, "token_acc": 0.9263676432460461 }, { "epoch": 0.21012658227848102, "grad_norm": 0.8359375, "learning_rate": 9.815418551996795e-07, "loss": 0.2792215049266815, "step": 996, "token_acc": 0.9241547365214743 }, { "epoch": 0.21033755274261604, "grad_norm": 0.6953125, "learning_rate": 9.814963097267925e-07, "loss": 0.23070243000984192, "step": 997, "token_acc": 0.9354066985645934 }, { "epoch": 0.21054852320675105, "grad_norm": 0.640625, "learning_rate": 9.814507091907271e-07, "loss": 0.2509482502937317, "step": 998, "token_acc": 0.9299403078856425 }, { "epoch": 0.2107594936708861, "grad_norm": 0.73828125, "learning_rate": 9.814050535966981e-07, "loss": 0.24006497859954834, "step": 999, "token_acc": 0.9315068493150684 }, { "epoch": 0.2109704641350211, "grad_norm": 0.83984375, "learning_rate": 9.813593429499268e-07, "loss": 0.28949546813964844, "step": 1000, "token_acc": 0.9210992907801419 }, { "epoch": 0.2109704641350211, "eval_loss": 0.4336377680301666, "eval_runtime": 245.5659, "eval_samples_per_second": 137.254, "eval_steps_per_second": 2.146, "eval_token_acc": 0.8990386341200753, "step": 1000 }, { "epoch": 0.21118143459915611, "grad_norm": 0.78125, "learning_rate": 9.813135772556405e-07, "loss": 0.28296542167663574, "step": 1001, "token_acc": 0.9212338198843294 }, { "epoch": 0.21139240506329113, "grad_norm": 0.74609375, "learning_rate": 9.812677565190728e-07, "loss": 0.2738898694515228, "step": 1002, "token_acc": 0.926865671641791 }, { "epoch": 0.21160337552742617, "grad_norm": 0.73828125, "learning_rate": 9.812218807454635e-07, "loss": 0.24410274624824524, "step": 1003, "token_acc": 0.9221090473337328 }, { "epoch": 0.21181434599156118, "grad_norm": 0.734375, "learning_rate": 9.811759499400593e-07, "loss": 0.2736046314239502, "step": 1004, "token_acc": 0.9195751138088012 }, { "epoch": 0.2120253164556962, "grad_norm": 0.6171875, "learning_rate": 9.811299641081126e-07, "loss": 0.24416279792785645, "step": 1005, "token_acc": 0.9294429708222812 }, { "epoch": 0.21223628691983124, "grad_norm": 0.734375, "learning_rate": 9.81083923254882e-07, "loss": 0.26535558700561523, "step": 1006, "token_acc": 0.9219015280135824 }, { "epoch": 0.21244725738396625, "grad_norm": 0.7734375, "learning_rate": 9.81037827385633e-07, "loss": 0.25298815965652466, "step": 1007, "token_acc": 0.9291294642857143 }, { "epoch": 0.21265822784810126, "grad_norm": 0.60546875, "learning_rate": 9.809916765056373e-07, "loss": 0.18925216794013977, "step": 1008, "token_acc": 0.9450386215092097 }, { "epoch": 0.21286919831223627, "grad_norm": 0.6171875, "learning_rate": 9.809454706201719e-07, "loss": 0.262004017829895, "step": 1009, "token_acc": 0.9288862768145754 }, { "epoch": 0.21308016877637131, "grad_norm": 0.71484375, "learning_rate": 9.808992097345216e-07, "loss": 0.289574533700943, "step": 1010, "token_acc": 0.9239130434782609 }, { "epoch": 0.21329113924050633, "grad_norm": 0.67578125, "learning_rate": 9.80852893853976e-07, "loss": 0.19858276844024658, "step": 1011, "token_acc": 0.9515328467153285 }, { "epoch": 0.21350210970464134, "grad_norm": 0.58984375, "learning_rate": 9.808065229838323e-07, "loss": 0.24258631467819214, "step": 1012, "token_acc": 0.9321187187775493 }, { "epoch": 0.21371308016877638, "grad_norm": 0.7578125, "learning_rate": 9.807600971293932e-07, "loss": 0.27060335874557495, "step": 1013, "token_acc": 0.9243027888446215 }, { "epoch": 0.2139240506329114, "grad_norm": 0.625, "learning_rate": 9.807136162959678e-07, "loss": 0.27900418639183044, "step": 1014, "token_acc": 0.9236888626988804 }, { "epoch": 0.2141350210970464, "grad_norm": 0.6875, "learning_rate": 9.806670804888716e-07, "loss": 0.2552199959754944, "step": 1015, "token_acc": 0.9293139293139293 }, { "epoch": 0.21434599156118145, "grad_norm": 0.76171875, "learning_rate": 9.806204897134265e-07, "loss": 0.2198866307735443, "step": 1016, "token_acc": 0.933115823817292 }, { "epoch": 0.21455696202531646, "grad_norm": 0.94140625, "learning_rate": 9.805738439749604e-07, "loss": 0.2905495762825012, "step": 1017, "token_acc": 0.9233921815889029 }, { "epoch": 0.21476793248945147, "grad_norm": 0.7421875, "learning_rate": 9.805271432788077e-07, "loss": 0.23594287037849426, "step": 1018, "token_acc": 0.9323369565217391 }, { "epoch": 0.2149789029535865, "grad_norm": 0.73828125, "learning_rate": 9.80480387630309e-07, "loss": 0.2431170493364334, "step": 1019, "token_acc": 0.9300763358778626 }, { "epoch": 0.21518987341772153, "grad_norm": 0.78515625, "learning_rate": 9.804335770348115e-07, "loss": 0.32254183292388916, "step": 1020, "token_acc": 0.9199491740787802 }, { "epoch": 0.21540084388185654, "grad_norm": 0.65234375, "learning_rate": 9.803867114976678e-07, "loss": 0.2706320285797119, "step": 1021, "token_acc": 0.9207768975210836 }, { "epoch": 0.21561181434599155, "grad_norm": 0.671875, "learning_rate": 9.803397910242378e-07, "loss": 0.26340216398239136, "step": 1022, "token_acc": 0.9282065834279228 }, { "epoch": 0.2158227848101266, "grad_norm": 0.859375, "learning_rate": 9.802928156198871e-07, "loss": 0.27076318860054016, "step": 1023, "token_acc": 0.9234196602617655 }, { "epoch": 0.2160337552742616, "grad_norm": 0.68359375, "learning_rate": 9.802457852899878e-07, "loss": 0.24231645464897156, "step": 1024, "token_acc": 0.9345052452509215 }, { "epoch": 0.21624472573839662, "grad_norm": 0.6484375, "learning_rate": 9.80198700039918e-07, "loss": 0.26031437516212463, "step": 1025, "token_acc": 0.9236091092533871 }, { "epoch": 0.21645569620253163, "grad_norm": 0.69921875, "learning_rate": 9.801515598750626e-07, "loss": 0.29187077283859253, "step": 1026, "token_acc": 0.9215414471860272 }, { "epoch": 0.21666666666666667, "grad_norm": 0.578125, "learning_rate": 9.801043648008126e-07, "loss": 0.21546629071235657, "step": 1027, "token_acc": 0.9334701055099648 }, { "epoch": 0.2168776371308017, "grad_norm": 0.69921875, "learning_rate": 9.800571148225647e-07, "loss": 0.2522656321525574, "step": 1028, "token_acc": 0.930021868166198 }, { "epoch": 0.2170886075949367, "grad_norm": 0.74609375, "learning_rate": 9.800098099457225e-07, "loss": 0.293454647064209, "step": 1029, "token_acc": 0.9204295154185022 }, { "epoch": 0.21729957805907174, "grad_norm": 0.53125, "learning_rate": 9.799624501756957e-07, "loss": 0.22426486015319824, "step": 1030, "token_acc": 0.943021582733813 }, { "epoch": 0.21751054852320675, "grad_norm": 0.7109375, "learning_rate": 9.799150355179007e-07, "loss": 0.25977182388305664, "step": 1031, "token_acc": 0.9289996844430419 }, { "epoch": 0.21772151898734177, "grad_norm": 0.734375, "learning_rate": 9.79867565977759e-07, "loss": 0.23819683492183685, "step": 1032, "token_acc": 0.932983323038913 }, { "epoch": 0.21793248945147678, "grad_norm": 0.65234375, "learning_rate": 9.798200415607e-07, "loss": 0.22482289373874664, "step": 1033, "token_acc": 0.9358479358479358 }, { "epoch": 0.21814345991561182, "grad_norm": 1.015625, "learning_rate": 9.797724622721578e-07, "loss": 0.2759067416191101, "step": 1034, "token_acc": 0.9251207729468599 }, { "epoch": 0.21835443037974683, "grad_norm": 0.671875, "learning_rate": 9.797248281175737e-07, "loss": 0.28211140632629395, "step": 1035, "token_acc": 0.9303056269637247 }, { "epoch": 0.21856540084388185, "grad_norm": 0.7421875, "learning_rate": 9.796771391023952e-07, "loss": 0.26458674669265747, "step": 1036, "token_acc": 0.9243227326266196 }, { "epoch": 0.2187763713080169, "grad_norm": 0.85546875, "learning_rate": 9.79629395232076e-07, "loss": 0.24054330587387085, "step": 1037, "token_acc": 0.9339233038348083 }, { "epoch": 0.2189873417721519, "grad_norm": 0.6171875, "learning_rate": 9.795815965120757e-07, "loss": 0.21638503670692444, "step": 1038, "token_acc": 0.9390750484630296 }, { "epoch": 0.2191983122362869, "grad_norm": 0.8515625, "learning_rate": 9.79533742947861e-07, "loss": 0.247116357088089, "step": 1039, "token_acc": 0.9303322615219721 }, { "epoch": 0.21940928270042195, "grad_norm": 0.71484375, "learning_rate": 9.794858345449039e-07, "loss": 0.24935264885425568, "step": 1040, "token_acc": 0.9318113547611896 }, { "epoch": 0.21962025316455697, "grad_norm": 0.5546875, "learning_rate": 9.794378713086833e-07, "loss": 0.2215247005224228, "step": 1041, "token_acc": 0.9337332969730024 }, { "epoch": 0.21983122362869198, "grad_norm": 0.86328125, "learning_rate": 9.793898532446841e-07, "loss": 0.2293887734413147, "step": 1042, "token_acc": 0.9368061485909479 }, { "epoch": 0.220042194092827, "grad_norm": 0.75390625, "learning_rate": 9.793417803583979e-07, "loss": 0.24102169275283813, "step": 1043, "token_acc": 0.9339817270851754 }, { "epoch": 0.22025316455696203, "grad_norm": 0.6484375, "learning_rate": 9.792936526553218e-07, "loss": 0.23523153364658356, "step": 1044, "token_acc": 0.9332579185520362 }, { "epoch": 0.22046413502109705, "grad_norm": 2.34375, "learning_rate": 9.7924547014096e-07, "loss": 0.2483244389295578, "step": 1045, "token_acc": 0.9263482280431433 }, { "epoch": 0.22067510548523206, "grad_norm": 0.7265625, "learning_rate": 9.79197232820822e-07, "loss": 0.2850404679775238, "step": 1046, "token_acc": 0.9232827832292596 }, { "epoch": 0.2208860759493671, "grad_norm": 0.56640625, "learning_rate": 9.791489407004248e-07, "loss": 0.22615236043930054, "step": 1047, "token_acc": 0.9381818181818182 }, { "epoch": 0.2210970464135021, "grad_norm": 1.203125, "learning_rate": 9.791005937852906e-07, "loss": 0.2923763692378998, "step": 1048, "token_acc": 0.9200850805226375 }, { "epoch": 0.22130801687763713, "grad_norm": 0.67578125, "learning_rate": 9.790521920809485e-07, "loss": 0.24023482203483582, "step": 1049, "token_acc": 0.9306590257879657 }, { "epoch": 0.22151898734177214, "grad_norm": 0.7265625, "learning_rate": 9.790037355929336e-07, "loss": 0.24101434648036957, "step": 1050, "token_acc": 0.9310859188544153 }, { "epoch": 0.22172995780590718, "grad_norm": 0.8359375, "learning_rate": 9.789552243267873e-07, "loss": 0.24817001819610596, "step": 1051, "token_acc": 0.9296261388627082 }, { "epoch": 0.2219409282700422, "grad_norm": 0.65625, "learning_rate": 9.789066582880573e-07, "loss": 0.24397243559360504, "step": 1052, "token_acc": 0.9321930360415394 }, { "epoch": 0.2221518987341772, "grad_norm": 0.62890625, "learning_rate": 9.788580374822974e-07, "loss": 0.2505425810813904, "step": 1053, "token_acc": 0.9343989343989344 }, { "epoch": 0.22236286919831225, "grad_norm": 0.859375, "learning_rate": 9.78809361915068e-07, "loss": 0.2832062244415283, "step": 1054, "token_acc": 0.9229067930489732 }, { "epoch": 0.22257383966244726, "grad_norm": 0.6875, "learning_rate": 9.787606315919353e-07, "loss": 0.2667236328125, "step": 1055, "token_acc": 0.9272619751626257 }, { "epoch": 0.22278481012658227, "grad_norm": 0.59765625, "learning_rate": 9.787118465184723e-07, "loss": 0.2523917853832245, "step": 1056, "token_acc": 0.930327868852459 }, { "epoch": 0.2229957805907173, "grad_norm": 0.9921875, "learning_rate": 9.78663006700258e-07, "loss": 0.2004603147506714, "step": 1057, "token_acc": 0.9434229137199435 }, { "epoch": 0.22320675105485233, "grad_norm": 0.7890625, "learning_rate": 9.786141121428773e-07, "loss": 0.2887023687362671, "step": 1058, "token_acc": 0.9227068633739577 }, { "epoch": 0.22341772151898734, "grad_norm": 0.66796875, "learning_rate": 9.78565162851922e-07, "loss": 0.2264334261417389, "step": 1059, "token_acc": 0.9340407226842558 }, { "epoch": 0.22362869198312235, "grad_norm": 0.66015625, "learning_rate": 9.785161588329896e-07, "loss": 0.25807666778564453, "step": 1060, "token_acc": 0.9267456359102244 }, { "epoch": 0.2238396624472574, "grad_norm": 0.73828125, "learning_rate": 9.784671000916844e-07, "loss": 0.2947354018688202, "step": 1061, "token_acc": 0.919414969888156 }, { "epoch": 0.2240506329113924, "grad_norm": 0.70703125, "learning_rate": 9.784179866336167e-07, "loss": 0.25580474734306335, "step": 1062, "token_acc": 0.9270348837209302 }, { "epoch": 0.22426160337552742, "grad_norm": 0.5390625, "learning_rate": 9.783688184644027e-07, "loss": 0.23662078380584717, "step": 1063, "token_acc": 0.9339968569931901 }, { "epoch": 0.22447257383966246, "grad_norm": 0.765625, "learning_rate": 9.783195955896656e-07, "loss": 0.24781087040901184, "step": 1064, "token_acc": 0.9331036462819409 }, { "epoch": 0.22468354430379747, "grad_norm": 0.63671875, "learning_rate": 9.782703180150345e-07, "loss": 0.25100642442703247, "step": 1065, "token_acc": 0.9296745725317154 }, { "epoch": 0.22489451476793249, "grad_norm": 0.61328125, "learning_rate": 9.782209857461441e-07, "loss": 0.25573456287384033, "step": 1066, "token_acc": 0.9306306306306307 }, { "epoch": 0.2251054852320675, "grad_norm": 0.75, "learning_rate": 9.781715987886365e-07, "loss": 0.28791964054107666, "step": 1067, "token_acc": 0.9302730970366067 }, { "epoch": 0.22531645569620254, "grad_norm": 0.67578125, "learning_rate": 9.781221571481594e-07, "loss": 0.2471371442079544, "step": 1068, "token_acc": 0.9301613800779076 }, { "epoch": 0.22552742616033755, "grad_norm": 0.6796875, "learning_rate": 9.780726608303669e-07, "loss": 0.27067384123802185, "step": 1069, "token_acc": 0.9273190045248869 }, { "epoch": 0.22573839662447256, "grad_norm": 0.734375, "learning_rate": 9.780231098409191e-07, "loss": 0.28875893354415894, "step": 1070, "token_acc": 0.927710843373494 }, { "epoch": 0.2259493670886076, "grad_norm": 0.609375, "learning_rate": 9.779735041854829e-07, "loss": 0.2351369857788086, "step": 1071, "token_acc": 0.9268510258697591 }, { "epoch": 0.22616033755274262, "grad_norm": 0.69921875, "learning_rate": 9.779238438697309e-07, "loss": 0.2744210958480835, "step": 1072, "token_acc": 0.9206251915415262 }, { "epoch": 0.22637130801687763, "grad_norm": 0.84375, "learning_rate": 9.778741288993423e-07, "loss": 0.3181830942630768, "step": 1073, "token_acc": 0.9197530864197531 }, { "epoch": 0.22658227848101264, "grad_norm": 0.79296875, "learning_rate": 9.778243592800021e-07, "loss": 0.26819008588790894, "step": 1074, "token_acc": 0.9223744292237442 }, { "epoch": 0.22679324894514769, "grad_norm": 0.765625, "learning_rate": 9.777745350174023e-07, "loss": 0.2756550908088684, "step": 1075, "token_acc": 0.9224232456140351 }, { "epoch": 0.2270042194092827, "grad_norm": 0.75390625, "learning_rate": 9.777246561172408e-07, "loss": 0.23100243508815765, "step": 1076, "token_acc": 0.9374826340650181 }, { "epoch": 0.2272151898734177, "grad_norm": 0.8984375, "learning_rate": 9.776747225852212e-07, "loss": 0.2505825161933899, "step": 1077, "token_acc": 0.9271615234917987 }, { "epoch": 0.22742616033755275, "grad_norm": 1.828125, "learning_rate": 9.77624734427054e-07, "loss": 0.2634645998477936, "step": 1078, "token_acc": 0.9258319232938522 }, { "epoch": 0.22763713080168776, "grad_norm": 0.71484375, "learning_rate": 9.77574691648456e-07, "loss": 0.2142142951488495, "step": 1079, "token_acc": 0.9364801864801865 }, { "epoch": 0.22784810126582278, "grad_norm": 1.34375, "learning_rate": 9.775245942551499e-07, "loss": 0.3275116980075836, "step": 1080, "token_acc": 0.9079022171688459 }, { "epoch": 0.22805907172995782, "grad_norm": 0.82421875, "learning_rate": 9.774744422528645e-07, "loss": 0.2572461664676666, "step": 1081, "token_acc": 0.9303560274828232 }, { "epoch": 0.22827004219409283, "grad_norm": 0.921875, "learning_rate": 9.774242356473355e-07, "loss": 0.26090332865715027, "step": 1082, "token_acc": 0.9227313566936208 }, { "epoch": 0.22848101265822784, "grad_norm": 0.6953125, "learning_rate": 9.773739744443041e-07, "loss": 0.25752192735671997, "step": 1083, "token_acc": 0.9295694325634445 }, { "epoch": 0.22869198312236286, "grad_norm": 0.77734375, "learning_rate": 9.773236586495184e-07, "loss": 0.26559850573539734, "step": 1084, "token_acc": 0.9298945568538045 }, { "epoch": 0.2289029535864979, "grad_norm": 0.92578125, "learning_rate": 9.772732882687322e-07, "loss": 0.27687737345695496, "step": 1085, "token_acc": 0.928654970760234 }, { "epoch": 0.2291139240506329, "grad_norm": 0.8125, "learning_rate": 9.772228633077059e-07, "loss": 0.23229250311851501, "step": 1086, "token_acc": 0.9331412103746398 }, { "epoch": 0.22932489451476792, "grad_norm": 0.76171875, "learning_rate": 9.77172383772206e-07, "loss": 0.2462151050567627, "step": 1087, "token_acc": 0.9313640312771503 }, { "epoch": 0.22953586497890296, "grad_norm": 0.80859375, "learning_rate": 9.771218496680052e-07, "loss": 0.2657579183578491, "step": 1088, "token_acc": 0.9255605381165919 }, { "epoch": 0.22974683544303798, "grad_norm": 0.703125, "learning_rate": 9.770712610008826e-07, "loss": 0.22832798957824707, "step": 1089, "token_acc": 0.9297372060857538 }, { "epoch": 0.229957805907173, "grad_norm": 0.71484375, "learning_rate": 9.770206177766236e-07, "loss": 0.23893365263938904, "step": 1090, "token_acc": 0.9337880079568059 }, { "epoch": 0.230168776371308, "grad_norm": 0.734375, "learning_rate": 9.769699200010193e-07, "loss": 0.25824564695358276, "step": 1091, "token_acc": 0.9248587570621469 }, { "epoch": 0.23037974683544304, "grad_norm": 0.58203125, "learning_rate": 9.769191676798677e-07, "loss": 0.24014432728290558, "step": 1092, "token_acc": 0.9332155477031802 }, { "epoch": 0.23059071729957806, "grad_norm": 0.75390625, "learning_rate": 9.768683608189726e-07, "loss": 0.22704805433750153, "step": 1093, "token_acc": 0.9340954942837929 }, { "epoch": 0.23080168776371307, "grad_norm": 0.796875, "learning_rate": 9.768174994241443e-07, "loss": 0.32337048649787903, "step": 1094, "token_acc": 0.9150032195750161 }, { "epoch": 0.2310126582278481, "grad_norm": 0.59765625, "learning_rate": 9.76766583501199e-07, "loss": 0.23895105719566345, "step": 1095, "token_acc": 0.9323377019201463 }, { "epoch": 0.23122362869198312, "grad_norm": 0.8125, "learning_rate": 9.767156130559598e-07, "loss": 0.3005239963531494, "step": 1096, "token_acc": 0.9185533666568656 }, { "epoch": 0.23143459915611814, "grad_norm": 0.765625, "learning_rate": 9.766645880942553e-07, "loss": 0.2561134099960327, "step": 1097, "token_acc": 0.9301909307875895 }, { "epoch": 0.23164556962025318, "grad_norm": 1.0703125, "learning_rate": 9.766135086219208e-07, "loss": 0.273573100566864, "step": 1098, "token_acc": 0.9235312402859808 }, { "epoch": 0.2318565400843882, "grad_norm": 0.73828125, "learning_rate": 9.765623746447973e-07, "loss": 0.23628413677215576, "step": 1099, "token_acc": 0.9250535331905781 }, { "epoch": 0.2320675105485232, "grad_norm": 1.0703125, "learning_rate": 9.765111861687328e-07, "loss": 0.3046630620956421, "step": 1100, "token_acc": 0.9216944801026957 }, { "epoch": 0.23227848101265822, "grad_norm": 0.79296875, "learning_rate": 9.76459943199581e-07, "loss": 0.24554237723350525, "step": 1101, "token_acc": 0.9288537549407114 }, { "epoch": 0.23248945147679326, "grad_norm": 0.796875, "learning_rate": 9.76408645743202e-07, "loss": 0.2889293432235718, "step": 1102, "token_acc": 0.9233661075766338 }, { "epoch": 0.23270042194092827, "grad_norm": 0.70703125, "learning_rate": 9.763572938054621e-07, "loss": 0.25873494148254395, "step": 1103, "token_acc": 0.9205917874396136 }, { "epoch": 0.23291139240506328, "grad_norm": 0.61328125, "learning_rate": 9.763058873922336e-07, "loss": 0.23966625332832336, "step": 1104, "token_acc": 0.9342347879532883 }, { "epoch": 0.23312236286919832, "grad_norm": 0.703125, "learning_rate": 9.762544265093958e-07, "loss": 0.25258344411849976, "step": 1105, "token_acc": 0.9244996967859309 }, { "epoch": 0.23333333333333334, "grad_norm": 0.80078125, "learning_rate": 9.76202911162833e-07, "loss": 0.2642197608947754, "step": 1106, "token_acc": 0.9265809217577706 }, { "epoch": 0.23354430379746835, "grad_norm": 0.77734375, "learning_rate": 9.761513413584369e-07, "loss": 0.27011817693710327, "step": 1107, "token_acc": 0.927093282394142 }, { "epoch": 0.23375527426160336, "grad_norm": 0.83203125, "learning_rate": 9.760997171021047e-07, "loss": 0.2859058380126953, "step": 1108, "token_acc": 0.9265121537591859 }, { "epoch": 0.2339662447257384, "grad_norm": 0.7265625, "learning_rate": 9.760480383997403e-07, "loss": 0.2449111044406891, "step": 1109, "token_acc": 0.9347949886104784 }, { "epoch": 0.23417721518987342, "grad_norm": 0.78515625, "learning_rate": 9.759963052572535e-07, "loss": 0.2845684289932251, "step": 1110, "token_acc": 0.9204577169969299 }, { "epoch": 0.23438818565400843, "grad_norm": 0.72265625, "learning_rate": 9.759445176805603e-07, "loss": 0.25201043486595154, "step": 1111, "token_acc": 0.9282962962962963 }, { "epoch": 0.23459915611814347, "grad_norm": 0.55859375, "learning_rate": 9.758926756755832e-07, "loss": 0.22519253194332123, "step": 1112, "token_acc": 0.9384904646790223 }, { "epoch": 0.23481012658227848, "grad_norm": 0.65234375, "learning_rate": 9.758407792482508e-07, "loss": 0.21577298641204834, "step": 1113, "token_acc": 0.9366028708133971 }, { "epoch": 0.2350210970464135, "grad_norm": 1.0234375, "learning_rate": 9.757888284044978e-07, "loss": 0.2565447688102722, "step": 1114, "token_acc": 0.9269433080070134 }, { "epoch": 0.23523206751054854, "grad_norm": 0.60546875, "learning_rate": 9.75736823150265e-07, "loss": 0.2519682049751282, "step": 1115, "token_acc": 0.9242471282210494 }, { "epoch": 0.23544303797468355, "grad_norm": 0.76171875, "learning_rate": 9.756847634915e-07, "loss": 0.2238023430109024, "step": 1116, "token_acc": 0.9373587342589603 }, { "epoch": 0.23565400843881856, "grad_norm": 0.68359375, "learning_rate": 9.75632649434156e-07, "loss": 0.23814505338668823, "step": 1117, "token_acc": 0.9302662037037037 }, { "epoch": 0.23586497890295358, "grad_norm": 0.65234375, "learning_rate": 9.755804809841932e-07, "loss": 0.2618996500968933, "step": 1118, "token_acc": 0.9262518968133535 }, { "epoch": 0.23607594936708862, "grad_norm": 0.77734375, "learning_rate": 9.755282581475767e-07, "loss": 0.2755908966064453, "step": 1119, "token_acc": 0.9285714285714286 }, { "epoch": 0.23628691983122363, "grad_norm": 0.58984375, "learning_rate": 9.754759809302793e-07, "loss": 0.22718246281147003, "step": 1120, "token_acc": 0.936778449697636 }, { "epoch": 0.23649789029535864, "grad_norm": 1.109375, "learning_rate": 9.75423649338279e-07, "loss": 0.25046756863594055, "step": 1121, "token_acc": 0.9300994275384152 }, { "epoch": 0.23670886075949368, "grad_norm": 3.6875, "learning_rate": 9.753712633775603e-07, "loss": 0.2738564610481262, "step": 1122, "token_acc": 0.9289681379713534 }, { "epoch": 0.2369198312236287, "grad_norm": 0.59765625, "learning_rate": 9.753188230541144e-07, "loss": 0.2159212827682495, "step": 1123, "token_acc": 0.9403917116094238 }, { "epoch": 0.2371308016877637, "grad_norm": 0.7890625, "learning_rate": 9.752663283739378e-07, "loss": 0.27135610580444336, "step": 1124, "token_acc": 0.9201945080091534 }, { "epoch": 0.23734177215189872, "grad_norm": 0.63671875, "learning_rate": 9.752137793430338e-07, "loss": 0.20492365956306458, "step": 1125, "token_acc": 0.9434542102028273 }, { "epoch": 0.23755274261603376, "grad_norm": 0.75, "learning_rate": 9.751611759674123e-07, "loss": 0.24978362023830414, "step": 1126, "token_acc": 0.9274985557481225 }, { "epoch": 0.23776371308016878, "grad_norm": 0.640625, "learning_rate": 9.751085182530885e-07, "loss": 0.2612837553024292, "step": 1127, "token_acc": 0.9279176201372997 }, { "epoch": 0.2379746835443038, "grad_norm": 0.6328125, "learning_rate": 9.750558062060844e-07, "loss": 0.22179251909255981, "step": 1128, "token_acc": 0.9389685688129387 }, { "epoch": 0.23818565400843883, "grad_norm": 0.890625, "learning_rate": 9.750030398324279e-07, "loss": 0.2751111388206482, "step": 1129, "token_acc": 0.9258064516129032 }, { "epoch": 0.23839662447257384, "grad_norm": 0.7265625, "learning_rate": 9.749502191381533e-07, "loss": 0.259870707988739, "step": 1130, "token_acc": 0.9296420923829918 }, { "epoch": 0.23860759493670886, "grad_norm": 0.69921875, "learning_rate": 9.748973441293014e-07, "loss": 0.24654024839401245, "step": 1131, "token_acc": 0.933527696793003 }, { "epoch": 0.23881856540084387, "grad_norm": 0.734375, "learning_rate": 9.748444148119185e-07, "loss": 0.2358575463294983, "step": 1132, "token_acc": 0.9305291723202171 }, { "epoch": 0.2390295358649789, "grad_norm": 1.0, "learning_rate": 9.74791431192058e-07, "loss": 0.2531391978263855, "step": 1133, "token_acc": 0.9241610738255034 }, { "epoch": 0.23924050632911392, "grad_norm": 0.890625, "learning_rate": 9.747383932757787e-07, "loss": 0.26321518421173096, "step": 1134, "token_acc": 0.9286318758815233 }, { "epoch": 0.23945147679324894, "grad_norm": 0.5625, "learning_rate": 9.746853010691457e-07, "loss": 0.2001633495092392, "step": 1135, "token_acc": 0.9387040280210157 }, { "epoch": 0.23966244725738398, "grad_norm": 1.0703125, "learning_rate": 9.74632154578231e-07, "loss": 0.2789958715438843, "step": 1136, "token_acc": 0.9239491150442478 }, { "epoch": 0.239873417721519, "grad_norm": 0.7734375, "learning_rate": 9.745789538091123e-07, "loss": 0.2799132168292999, "step": 1137, "token_acc": 0.919815668202765 }, { "epoch": 0.240084388185654, "grad_norm": 0.703125, "learning_rate": 9.745256987678733e-07, "loss": 0.3027660846710205, "step": 1138, "token_acc": 0.9214015151515151 }, { "epoch": 0.24029535864978904, "grad_norm": 1.453125, "learning_rate": 9.74472389460604e-07, "loss": 0.2762015461921692, "step": 1139, "token_acc": 0.9253100338218715 }, { "epoch": 0.24050632911392406, "grad_norm": 0.6640625, "learning_rate": 9.744190258934015e-07, "loss": 0.2636283040046692, "step": 1140, "token_acc": 0.9310656231186033 }, { "epoch": 0.24071729957805907, "grad_norm": 0.69921875, "learning_rate": 9.743656080723676e-07, "loss": 0.24154981970787048, "step": 1141, "token_acc": 0.9304322084073416 }, { "epoch": 0.24092827004219408, "grad_norm": 0.7890625, "learning_rate": 9.743121360036117e-07, "loss": 0.26985490322113037, "step": 1142, "token_acc": 0.9234859675036927 }, { "epoch": 0.24113924050632912, "grad_norm": 0.7109375, "learning_rate": 9.742586096932484e-07, "loss": 0.26405149698257446, "step": 1143, "token_acc": 0.925842060571752 }, { "epoch": 0.24135021097046414, "grad_norm": 0.75390625, "learning_rate": 9.74205029147399e-07, "loss": 0.2645023465156555, "step": 1144, "token_acc": 0.9224408326204734 }, { "epoch": 0.24156118143459915, "grad_norm": 0.59375, "learning_rate": 9.74151394372191e-07, "loss": 0.24868255853652954, "step": 1145, "token_acc": 0.9265844565529108 }, { "epoch": 0.2417721518987342, "grad_norm": 0.78125, "learning_rate": 9.740977053737575e-07, "loss": 0.2382085621356964, "step": 1146, "token_acc": 0.9304396215915415 }, { "epoch": 0.2419831223628692, "grad_norm": 0.70703125, "learning_rate": 9.74043962158239e-07, "loss": 0.27882516384124756, "step": 1147, "token_acc": 0.9233477789815818 }, { "epoch": 0.24219409282700421, "grad_norm": 0.640625, "learning_rate": 9.73990164731781e-07, "loss": 0.2696080207824707, "step": 1148, "token_acc": 0.9303629389200354 }, { "epoch": 0.24240506329113923, "grad_norm": 0.84375, "learning_rate": 9.739363131005358e-07, "loss": 0.2302226722240448, "step": 1149, "token_acc": 0.9319037960011591 }, { "epoch": 0.24261603375527427, "grad_norm": 0.78125, "learning_rate": 9.738824072706619e-07, "loss": 0.300628125667572, "step": 1150, "token_acc": 0.9245404708158659 }, { "epoch": 0.24282700421940928, "grad_norm": 0.74609375, "learning_rate": 9.738284472483239e-07, "loss": 0.28427547216415405, "step": 1151, "token_acc": 0.9175200664267922 }, { "epoch": 0.2430379746835443, "grad_norm": 0.69140625, "learning_rate": 9.737744330396924e-07, "loss": 0.25487983226776123, "step": 1152, "token_acc": 0.9297150610583447 }, { "epoch": 0.24324894514767934, "grad_norm": 0.76953125, "learning_rate": 9.737203646509445e-07, "loss": 0.23934927582740784, "step": 1153, "token_acc": 0.9319465081723626 }, { "epoch": 0.24345991561181435, "grad_norm": 0.72265625, "learning_rate": 9.736662420882636e-07, "loss": 0.24117323756217957, "step": 1154, "token_acc": 0.9246076233183856 }, { "epoch": 0.24367088607594936, "grad_norm": 0.64453125, "learning_rate": 9.736120653578385e-07, "loss": 0.21268248558044434, "step": 1155, "token_acc": 0.9357798165137615 }, { "epoch": 0.2438818565400844, "grad_norm": 0.65234375, "learning_rate": 9.735578344658652e-07, "loss": 0.20961320400238037, "step": 1156, "token_acc": 0.936529933481153 }, { "epoch": 0.24409282700421941, "grad_norm": 0.75, "learning_rate": 9.735035494185454e-07, "loss": 0.23409229516983032, "step": 1157, "token_acc": 0.9367359413202934 }, { "epoch": 0.24430379746835443, "grad_norm": 0.68359375, "learning_rate": 9.73449210222087e-07, "loss": 0.2215413749217987, "step": 1158, "token_acc": 0.9337132573485303 }, { "epoch": 0.24451476793248944, "grad_norm": 0.5078125, "learning_rate": 9.733948168827042e-07, "loss": 0.20670649409294128, "step": 1159, "token_acc": 0.9404427814156533 }, { "epoch": 0.24472573839662448, "grad_norm": 0.65625, "learning_rate": 9.733403694066174e-07, "loss": 0.22479060292243958, "step": 1160, "token_acc": 0.9324430479183032 }, { "epoch": 0.2449367088607595, "grad_norm": 0.77734375, "learning_rate": 9.732858678000528e-07, "loss": 0.26787155866622925, "step": 1161, "token_acc": 0.9296438033559022 }, { "epoch": 0.2451476793248945, "grad_norm": 0.65234375, "learning_rate": 9.732313120692436e-07, "loss": 0.23902547359466553, "step": 1162, "token_acc": 0.9335614485315085 }, { "epoch": 0.24535864978902955, "grad_norm": 0.6796875, "learning_rate": 9.731767022204283e-07, "loss": 0.25326600670814514, "step": 1163, "token_acc": 0.9322571346209282 }, { "epoch": 0.24556962025316456, "grad_norm": 0.7421875, "learning_rate": 9.73122038259852e-07, "loss": 0.25212883949279785, "step": 1164, "token_acc": 0.928149300155521 }, { "epoch": 0.24578059071729957, "grad_norm": 0.61328125, "learning_rate": 9.730673201937667e-07, "loss": 0.22139108180999756, "step": 1165, "token_acc": 0.9381711682395054 }, { "epoch": 0.2459915611814346, "grad_norm": 0.71875, "learning_rate": 9.73012548028429e-07, "loss": 0.2780754566192627, "step": 1166, "token_acc": 0.9247701309556979 }, { "epoch": 0.24620253164556963, "grad_norm": 0.72265625, "learning_rate": 9.729577217701028e-07, "loss": 0.25920483469963074, "step": 1167, "token_acc": 0.9288939051918735 }, { "epoch": 0.24641350210970464, "grad_norm": 0.91796875, "learning_rate": 9.729028414250581e-07, "loss": 0.2644927501678467, "step": 1168, "token_acc": 0.9281803542673108 }, { "epoch": 0.24662447257383965, "grad_norm": 0.6796875, "learning_rate": 9.72847906999571e-07, "loss": 0.2646119296550751, "step": 1169, "token_acc": 0.9231905465288035 }, { "epoch": 0.2468354430379747, "grad_norm": 0.73828125, "learning_rate": 9.727929184999235e-07, "loss": 0.28313902020454407, "step": 1170, "token_acc": 0.9207523897625656 }, { "epoch": 0.2470464135021097, "grad_norm": 0.5625, "learning_rate": 9.72737875932404e-07, "loss": 0.2552646994590759, "step": 1171, "token_acc": 0.9269677419354839 }, { "epoch": 0.24725738396624472, "grad_norm": 0.8046875, "learning_rate": 9.726827793033072e-07, "loss": 0.31298452615737915, "step": 1172, "token_acc": 0.9097605893186004 }, { "epoch": 0.24746835443037973, "grad_norm": 0.6796875, "learning_rate": 9.726276286189338e-07, "loss": 0.262067973613739, "step": 1173, "token_acc": 0.927277716794731 }, { "epoch": 0.24767932489451477, "grad_norm": 0.78125, "learning_rate": 9.72572423885591e-07, "loss": 0.2698643207550049, "step": 1174, "token_acc": 0.9250425894378195 }, { "epoch": 0.2478902953586498, "grad_norm": 0.5390625, "learning_rate": 9.725171651095914e-07, "loss": 0.2563115954399109, "step": 1175, "token_acc": 0.9273853081902618 }, { "epoch": 0.2481012658227848, "grad_norm": 0.76953125, "learning_rate": 9.724618522972547e-07, "loss": 0.2558833658695221, "step": 1176, "token_acc": 0.9279216235129462 }, { "epoch": 0.24831223628691984, "grad_norm": 0.92578125, "learning_rate": 9.724064854549066e-07, "loss": 0.2242472767829895, "step": 1177, "token_acc": 0.9351190476190476 }, { "epoch": 0.24852320675105485, "grad_norm": 0.66015625, "learning_rate": 9.723510645888782e-07, "loss": 0.24171394109725952, "step": 1178, "token_acc": 0.9324016899577511 }, { "epoch": 0.24873417721518987, "grad_norm": 0.98046875, "learning_rate": 9.722955897055077e-07, "loss": 0.2775258719921112, "step": 1179, "token_acc": 0.9166889900884008 }, { "epoch": 0.2489451476793249, "grad_norm": 0.6484375, "learning_rate": 9.72240060811139e-07, "loss": 0.2454729527235031, "step": 1180, "token_acc": 0.9307559145989613 }, { "epoch": 0.24915611814345992, "grad_norm": 0.91796875, "learning_rate": 9.721844779121222e-07, "loss": 0.3090250790119171, "step": 1181, "token_acc": 0.9210233592880979 }, { "epoch": 0.24936708860759493, "grad_norm": 0.77734375, "learning_rate": 9.721288410148139e-07, "loss": 0.2278854250907898, "step": 1182, "token_acc": 0.9349336702463676 }, { "epoch": 0.24957805907172995, "grad_norm": 0.82421875, "learning_rate": 9.720731501255766e-07, "loss": 0.28732359409332275, "step": 1183, "token_acc": 0.9273584905660377 }, { "epoch": 0.249789029535865, "grad_norm": 0.79296875, "learning_rate": 9.72017405250779e-07, "loss": 0.25907230377197266, "step": 1184, "token_acc": 0.9236153377967133 }, { "epoch": 0.25, "grad_norm": 0.78125, "learning_rate": 9.71961606396796e-07, "loss": 0.25944262742996216, "step": 1185, "token_acc": 0.9268510258697591 }, { "epoch": 0.25021097046413504, "grad_norm": 0.703125, "learning_rate": 9.719057535700087e-07, "loss": 0.22414150834083557, "step": 1186, "token_acc": 0.9399524375743162 }, { "epoch": 0.25042194092827, "grad_norm": 0.7109375, "learning_rate": 9.71849846776804e-07, "loss": 0.22336113452911377, "step": 1187, "token_acc": 0.9373601789709173 }, { "epoch": 0.25063291139240507, "grad_norm": 0.91015625, "learning_rate": 9.717938860235761e-07, "loss": 0.266832172870636, "step": 1188, "token_acc": 0.9330294530154277 }, { "epoch": 0.2508438818565401, "grad_norm": 0.68359375, "learning_rate": 9.717378713167238e-07, "loss": 0.2418878674507141, "step": 1189, "token_acc": 0.9303391384051329 }, { "epoch": 0.2510548523206751, "grad_norm": 1.359375, "learning_rate": 9.716818026626531e-07, "loss": 0.2523130774497986, "step": 1190, "token_acc": 0.9264534883720931 }, { "epoch": 0.25126582278481013, "grad_norm": 0.765625, "learning_rate": 9.716256800677763e-07, "loss": 0.24182310700416565, "step": 1191, "token_acc": 0.9301447451227187 }, { "epoch": 0.2514767932489452, "grad_norm": 0.6875, "learning_rate": 9.715695035385109e-07, "loss": 0.2770576477050781, "step": 1192, "token_acc": 0.9195145320983711 }, { "epoch": 0.25168776371308016, "grad_norm": 0.6953125, "learning_rate": 9.715132730812816e-07, "loss": 0.23406967520713806, "step": 1193, "token_acc": 0.9375539568345324 }, { "epoch": 0.2518987341772152, "grad_norm": 0.734375, "learning_rate": 9.714569887025185e-07, "loss": 0.24692702293395996, "step": 1194, "token_acc": 0.9298245614035088 }, { "epoch": 0.2521097046413502, "grad_norm": 0.7578125, "learning_rate": 9.714006504086584e-07, "loss": 0.30922287702560425, "step": 1195, "token_acc": 0.9170774137431139 }, { "epoch": 0.2523206751054852, "grad_norm": 1.078125, "learning_rate": 9.71344258206144e-07, "loss": 0.25276121497154236, "step": 1196, "token_acc": 0.9233000322268772 }, { "epoch": 0.25253164556962027, "grad_norm": 0.72265625, "learning_rate": 9.712878121014243e-07, "loss": 0.24556684494018555, "step": 1197, "token_acc": 0.9253149370125975 }, { "epoch": 0.25274261603375525, "grad_norm": 0.89453125, "learning_rate": 9.712313121009545e-07, "loss": 0.24084654450416565, "step": 1198, "token_acc": 0.9290652239254583 }, { "epoch": 0.2529535864978903, "grad_norm": 0.86328125, "learning_rate": 9.711747582111956e-07, "loss": 0.28878283500671387, "step": 1199, "token_acc": 0.9240226986128626 }, { "epoch": 0.25316455696202533, "grad_norm": 0.81640625, "learning_rate": 9.71118150438615e-07, "loss": 0.2748972177505493, "step": 1200, "token_acc": 0.9292035398230089 }, { "epoch": 0.25316455696202533, "eval_loss": 0.43360278010368347, "eval_runtime": 245.4913, "eval_samples_per_second": 137.296, "eval_steps_per_second": 2.147, "eval_token_acc": 0.8991687606569146, "step": 1200 }, { "epoch": 0.2533755274261603, "grad_norm": 0.69921875, "learning_rate": 9.710614887896864e-07, "loss": 0.2842106521129608, "step": 1201, "token_acc": 0.9268008165645961 }, { "epoch": 0.25358649789029536, "grad_norm": 0.6640625, "learning_rate": 9.710047732708896e-07, "loss": 0.2714993357658386, "step": 1202, "token_acc": 0.9253355704697986 }, { "epoch": 0.2537974683544304, "grad_norm": 0.7265625, "learning_rate": 9.709480038887104e-07, "loss": 0.2943306863307953, "step": 1203, "token_acc": 0.9169639961076873 }, { "epoch": 0.2540084388185654, "grad_norm": 0.6796875, "learning_rate": 9.708911806496409e-07, "loss": 0.2546621859073639, "step": 1204, "token_acc": 0.925904145839459 }, { "epoch": 0.2542194092827004, "grad_norm": 0.8828125, "learning_rate": 9.708343035601792e-07, "loss": 0.28815045952796936, "step": 1205, "token_acc": 0.9163055254604551 }, { "epoch": 0.25443037974683547, "grad_norm": 0.78125, "learning_rate": 9.707773726268297e-07, "loss": 0.2761947214603424, "step": 1206, "token_acc": 0.9241419094317854 }, { "epoch": 0.25464135021097045, "grad_norm": 0.671875, "learning_rate": 9.707203878561032e-07, "loss": 0.2541855573654175, "step": 1207, "token_acc": 0.9287749287749287 }, { "epoch": 0.2548523206751055, "grad_norm": 0.73828125, "learning_rate": 9.706633492545163e-07, "loss": 0.24647629261016846, "step": 1208, "token_acc": 0.9260204081632653 }, { "epoch": 0.25506329113924053, "grad_norm": 0.77734375, "learning_rate": 9.706062568285915e-07, "loss": 0.26219797134399414, "step": 1209, "token_acc": 0.9297507283910651 }, { "epoch": 0.2552742616033755, "grad_norm": 0.8671875, "learning_rate": 9.705491105848582e-07, "loss": 0.33000296354293823, "step": 1210, "token_acc": 0.9137880129908473 }, { "epoch": 0.25548523206751056, "grad_norm": 0.94140625, "learning_rate": 9.704919105298515e-07, "loss": 0.2628134489059448, "step": 1211, "token_acc": 0.9242511520737328 }, { "epoch": 0.25569620253164554, "grad_norm": 0.74609375, "learning_rate": 9.704346566701123e-07, "loss": 0.2608003616333008, "step": 1212, "token_acc": 0.9264790350373349 }, { "epoch": 0.2559071729957806, "grad_norm": 0.65234375, "learning_rate": 9.703773490121888e-07, "loss": 0.2128331959247589, "step": 1213, "token_acc": 0.9373394966615306 }, { "epoch": 0.2561181434599156, "grad_norm": 0.7421875, "learning_rate": 9.703199875626338e-07, "loss": 0.2637559771537781, "step": 1214, "token_acc": 0.9277810133954572 }, { "epoch": 0.2563291139240506, "grad_norm": 0.65625, "learning_rate": 9.702625723280076e-07, "loss": 0.24556344747543335, "step": 1215, "token_acc": 0.9323801012809055 }, { "epoch": 0.25654008438818565, "grad_norm": 0.61328125, "learning_rate": 9.70205103314876e-07, "loss": 0.25457581877708435, "step": 1216, "token_acc": 0.9285921625544267 }, { "epoch": 0.2567510548523207, "grad_norm": 0.78515625, "learning_rate": 9.701475805298111e-07, "loss": 0.24435514211654663, "step": 1217, "token_acc": 0.9288 }, { "epoch": 0.2569620253164557, "grad_norm": 0.734375, "learning_rate": 9.70090003979391e-07, "loss": 0.27876341342926025, "step": 1218, "token_acc": 0.9308375634517766 }, { "epoch": 0.2571729957805907, "grad_norm": 0.890625, "learning_rate": 9.700323736702003e-07, "loss": 0.2775731682777405, "step": 1219, "token_acc": 0.9191780821917809 }, { "epoch": 0.25738396624472576, "grad_norm": 0.77734375, "learning_rate": 9.699746896088293e-07, "loss": 0.28725969791412354, "step": 1220, "token_acc": 0.9266795865633075 }, { "epoch": 0.25759493670886074, "grad_norm": 0.62109375, "learning_rate": 9.699169518018747e-07, "loss": 0.2132992148399353, "step": 1221, "token_acc": 0.9392470051340559 }, { "epoch": 0.2578059071729958, "grad_norm": 0.69140625, "learning_rate": 9.698591602559392e-07, "loss": 0.22622618079185486, "step": 1222, "token_acc": 0.9333721268548153 }, { "epoch": 0.2580168776371308, "grad_norm": 0.71484375, "learning_rate": 9.698013149776318e-07, "loss": 0.260597288608551, "step": 1223, "token_acc": 0.9280405405405405 }, { "epoch": 0.2582278481012658, "grad_norm": 0.57421875, "learning_rate": 9.697434159735679e-07, "loss": 0.24745848774909973, "step": 1224, "token_acc": 0.9323770491803278 }, { "epoch": 0.25843881856540085, "grad_norm": 0.66015625, "learning_rate": 9.696854632503684e-07, "loss": 0.2582542896270752, "step": 1225, "token_acc": 0.9207642596234897 }, { "epoch": 0.2586497890295359, "grad_norm": 0.765625, "learning_rate": 9.696274568146607e-07, "loss": 0.23025202751159668, "step": 1226, "token_acc": 0.9355661881977672 }, { "epoch": 0.2588607594936709, "grad_norm": 0.70703125, "learning_rate": 9.695693966730786e-07, "loss": 0.2466697096824646, "step": 1227, "token_acc": 0.9332688588007737 }, { "epoch": 0.2590717299578059, "grad_norm": 0.5859375, "learning_rate": 9.695112828322614e-07, "loss": 0.24845033884048462, "step": 1228, "token_acc": 0.9291573452647278 }, { "epoch": 0.2592827004219409, "grad_norm": 0.87890625, "learning_rate": 9.694531152988553e-07, "loss": 0.27860498428344727, "step": 1229, "token_acc": 0.921844342707652 }, { "epoch": 0.25949367088607594, "grad_norm": 0.62109375, "learning_rate": 9.69394894079512e-07, "loss": 0.2347266525030136, "step": 1230, "token_acc": 0.9367695534911598 }, { "epoch": 0.259704641350211, "grad_norm": 1.0234375, "learning_rate": 9.693366191808895e-07, "loss": 0.2848036289215088, "step": 1231, "token_acc": 0.9186012342051131 }, { "epoch": 0.25991561181434597, "grad_norm": 0.71875, "learning_rate": 9.692782906096522e-07, "loss": 0.271634578704834, "step": 1232, "token_acc": 0.9254937163375224 }, { "epoch": 0.260126582278481, "grad_norm": 0.703125, "learning_rate": 9.692199083724704e-07, "loss": 0.3552762567996979, "step": 1233, "token_acc": 0.9169062679700978 }, { "epoch": 0.26033755274261605, "grad_norm": 0.69140625, "learning_rate": 9.691614724760208e-07, "loss": 0.2829202115535736, "step": 1234, "token_acc": 0.92018779342723 }, { "epoch": 0.26054852320675104, "grad_norm": 0.91015625, "learning_rate": 9.691029829269856e-07, "loss": 0.24786871671676636, "step": 1235, "token_acc": 0.9295311214697011 }, { "epoch": 0.2607594936708861, "grad_norm": 0.69921875, "learning_rate": 9.690444397320543e-07, "loss": 0.23441259562969208, "step": 1236, "token_acc": 0.9349985807550383 }, { "epoch": 0.2609704641350211, "grad_norm": 0.67578125, "learning_rate": 9.68985842897921e-07, "loss": 0.21858146786689758, "step": 1237, "token_acc": 0.9393670511682934 }, { "epoch": 0.2611814345991561, "grad_norm": 0.7890625, "learning_rate": 9.689271924312873e-07, "loss": 0.28490114212036133, "step": 1238, "token_acc": 0.9234604105571848 }, { "epoch": 0.26139240506329114, "grad_norm": 0.7421875, "learning_rate": 9.688684883388598e-07, "loss": 0.26761138439178467, "step": 1239, "token_acc": 0.9277440706012134 }, { "epoch": 0.2616033755274262, "grad_norm": 0.70703125, "learning_rate": 9.688097306273525e-07, "loss": 0.2610659599304199, "step": 1240, "token_acc": 0.9297409943435546 }, { "epoch": 0.26181434599156117, "grad_norm": 0.59765625, "learning_rate": 9.687509193034844e-07, "loss": 0.22601675987243652, "step": 1241, "token_acc": 0.9416980118216013 }, { "epoch": 0.2620253164556962, "grad_norm": 0.78515625, "learning_rate": 9.686920543739815e-07, "loss": 0.248170405626297, "step": 1242, "token_acc": 0.9330609679446888 }, { "epoch": 0.2622362869198312, "grad_norm": 0.68359375, "learning_rate": 9.686331358455747e-07, "loss": 0.23889891803264618, "step": 1243, "token_acc": 0.9359560841720037 }, { "epoch": 0.26244725738396624, "grad_norm": 0.640625, "learning_rate": 9.685741637250027e-07, "loss": 0.2646980285644531, "step": 1244, "token_acc": 0.9301587301587302 }, { "epoch": 0.2626582278481013, "grad_norm": 0.70703125, "learning_rate": 9.68515138019009e-07, "loss": 0.23318523168563843, "step": 1245, "token_acc": 0.9326036866359447 }, { "epoch": 0.26286919831223626, "grad_norm": 0.7265625, "learning_rate": 9.684560587343439e-07, "loss": 0.25974488258361816, "step": 1246, "token_acc": 0.9271044258027191 }, { "epoch": 0.2630801687763713, "grad_norm": 0.78125, "learning_rate": 9.683969258777634e-07, "loss": 0.2580402195453644, "step": 1247, "token_acc": 0.9221480775341595 }, { "epoch": 0.26329113924050634, "grad_norm": 0.65234375, "learning_rate": 9.6833773945603e-07, "loss": 0.26659655570983887, "step": 1248, "token_acc": 0.9241719930273097 }, { "epoch": 0.26350210970464133, "grad_norm": 0.6875, "learning_rate": 9.68278499475912e-07, "loss": 0.24891090393066406, "step": 1249, "token_acc": 0.9277293695540748 }, { "epoch": 0.26371308016877637, "grad_norm": 0.80078125, "learning_rate": 9.68219205944184e-07, "loss": 0.23200133442878723, "step": 1250, "token_acc": 0.9329558998808105 }, { "epoch": 0.2639240506329114, "grad_norm": 0.8359375, "learning_rate": 9.68159858867627e-07, "loss": 0.2909882664680481, "step": 1251, "token_acc": 0.9168533034714446 }, { "epoch": 0.2641350210970464, "grad_norm": 0.703125, "learning_rate": 9.681004582530279e-07, "loss": 0.2549628019332886, "step": 1252, "token_acc": 0.924122926298613 }, { "epoch": 0.26434599156118144, "grad_norm": 0.8984375, "learning_rate": 9.68041004107179e-07, "loss": 0.24394650757312775, "step": 1253, "token_acc": 0.9308855291576674 }, { "epoch": 0.2645569620253165, "grad_norm": 0.609375, "learning_rate": 9.6798149643688e-07, "loss": 0.2381609082221985, "step": 1254, "token_acc": 0.9395761741122566 }, { "epoch": 0.26476793248945146, "grad_norm": 0.8515625, "learning_rate": 9.67921935248936e-07, "loss": 0.2466859370470047, "step": 1255, "token_acc": 0.9292594822396147 }, { "epoch": 0.2649789029535865, "grad_norm": 0.73828125, "learning_rate": 9.67862320550158e-07, "loss": 0.2523610591888428, "step": 1256, "token_acc": 0.9285103958986044 }, { "epoch": 0.26518987341772154, "grad_norm": 0.75, "learning_rate": 9.67802652347364e-07, "loss": 0.25371021032333374, "step": 1257, "token_acc": 0.9257921067259589 }, { "epoch": 0.26540084388185653, "grad_norm": 0.84765625, "learning_rate": 9.67742930647377e-07, "loss": 0.26243138313293457, "step": 1258, "token_acc": 0.9317498496692724 }, { "epoch": 0.26561181434599157, "grad_norm": 0.72265625, "learning_rate": 9.67683155457027e-07, "loss": 0.26387542486190796, "step": 1259, "token_acc": 0.9278320023661638 }, { "epoch": 0.26582278481012656, "grad_norm": 0.71484375, "learning_rate": 9.6762332678315e-07, "loss": 0.2729257047176361, "step": 1260, "token_acc": 0.9249352890422778 }, { "epoch": 0.2660337552742616, "grad_norm": 0.79296875, "learning_rate": 9.675634446325873e-07, "loss": 0.3026050329208374, "step": 1261, "token_acc": 0.9185979142526072 }, { "epoch": 0.26624472573839664, "grad_norm": 0.71484375, "learning_rate": 9.675035090121875e-07, "loss": 0.2565396726131439, "step": 1262, "token_acc": 0.9267410310521556 }, { "epoch": 0.2664556962025316, "grad_norm": 0.67578125, "learning_rate": 9.674435199288045e-07, "loss": 0.25251418352127075, "step": 1263, "token_acc": 0.9283837056504599 }, { "epoch": 0.26666666666666666, "grad_norm": 0.63671875, "learning_rate": 9.673834773892984e-07, "loss": 0.24896693229675293, "step": 1264, "token_acc": 0.930945558739255 }, { "epoch": 0.2668776371308017, "grad_norm": 1.203125, "learning_rate": 9.673233814005359e-07, "loss": 0.23103883862495422, "step": 1265, "token_acc": 0.9342560553633218 }, { "epoch": 0.2670886075949367, "grad_norm": 1.0703125, "learning_rate": 9.672632319693894e-07, "loss": 0.24049502611160278, "step": 1266, "token_acc": 0.9309718437783833 }, { "epoch": 0.26729957805907173, "grad_norm": 0.72265625, "learning_rate": 9.672030291027374e-07, "loss": 0.26001042127609253, "step": 1267, "token_acc": 0.9262520638414969 }, { "epoch": 0.26751054852320677, "grad_norm": 0.72265625, "learning_rate": 9.671427728074644e-07, "loss": 0.28741368651390076, "step": 1268, "token_acc": 0.9232505643340858 }, { "epoch": 0.26772151898734176, "grad_norm": 0.84765625, "learning_rate": 9.670824630904617e-07, "loss": 0.27266865968704224, "step": 1269, "token_acc": 0.9303104077906269 }, { "epoch": 0.2679324894514768, "grad_norm": 0.6953125, "learning_rate": 9.67022099958626e-07, "loss": 0.23278406262397766, "step": 1270, "token_acc": 0.9318857822724569 }, { "epoch": 0.26814345991561184, "grad_norm": 0.6640625, "learning_rate": 9.669616834188604e-07, "loss": 0.24085257947444916, "step": 1271, "token_acc": 0.9365031597953656 }, { "epoch": 0.2683544303797468, "grad_norm": 0.7265625, "learning_rate": 9.66901213478074e-07, "loss": 0.23926323652267456, "step": 1272, "token_acc": 0.9300341296928327 }, { "epoch": 0.26856540084388186, "grad_norm": 0.69921875, "learning_rate": 9.668406901431816e-07, "loss": 0.2380378544330597, "step": 1273, "token_acc": 0.9354371742906775 }, { "epoch": 0.2687763713080169, "grad_norm": 0.8046875, "learning_rate": 9.667801134211054e-07, "loss": 0.2831774353981018, "step": 1274, "token_acc": 0.9198927933293627 }, { "epoch": 0.2689873417721519, "grad_norm": 0.7109375, "learning_rate": 9.667194833187722e-07, "loss": 0.25443994998931885, "step": 1275, "token_acc": 0.927960927960928 }, { "epoch": 0.26919831223628693, "grad_norm": 0.68359375, "learning_rate": 9.66658799843116e-07, "loss": 0.2516897916793823, "step": 1276, "token_acc": 0.9287622439893143 }, { "epoch": 0.2694092827004219, "grad_norm": 0.890625, "learning_rate": 9.665980630010762e-07, "loss": 0.2877979874610901, "step": 1277, "token_acc": 0.9202143495087823 }, { "epoch": 0.26962025316455696, "grad_norm": 0.74609375, "learning_rate": 9.665372727995985e-07, "loss": 0.2647358775138855, "step": 1278, "token_acc": 0.9289904291447978 }, { "epoch": 0.269831223628692, "grad_norm": 0.66015625, "learning_rate": 9.66476429245635e-07, "loss": 0.22759586572647095, "step": 1279, "token_acc": 0.9376068376068376 }, { "epoch": 0.270042194092827, "grad_norm": 0.65234375, "learning_rate": 9.664155323461436e-07, "loss": 0.25757282972335815, "step": 1280, "token_acc": 0.927784222737819 }, { "epoch": 0.270253164556962, "grad_norm": 0.6796875, "learning_rate": 9.663545821080884e-07, "loss": 0.26837342977523804, "step": 1281, "token_acc": 0.9224065223502952 }, { "epoch": 0.27046413502109706, "grad_norm": 0.6484375, "learning_rate": 9.662935785384395e-07, "loss": 0.28320473432540894, "step": 1282, "token_acc": 0.9173576561636263 }, { "epoch": 0.27067510548523205, "grad_norm": 0.859375, "learning_rate": 9.662325216441733e-07, "loss": 0.23279771208763123, "step": 1283, "token_acc": 0.9394752534287418 }, { "epoch": 0.2708860759493671, "grad_norm": 0.671875, "learning_rate": 9.66171411432272e-07, "loss": 0.2523917555809021, "step": 1284, "token_acc": 0.9287851585876721 }, { "epoch": 0.27109704641350213, "grad_norm": 0.69921875, "learning_rate": 9.661102479097241e-07, "loss": 0.2521704435348511, "step": 1285, "token_acc": 0.9344652963955913 }, { "epoch": 0.2713080168776371, "grad_norm": 1.0, "learning_rate": 9.660490310835243e-07, "loss": 0.22997595369815826, "step": 1286, "token_acc": 0.9331612162937464 }, { "epoch": 0.27151898734177216, "grad_norm": 0.734375, "learning_rate": 9.659877609606732e-07, "loss": 0.2617974579334259, "step": 1287, "token_acc": 0.9331476323119777 }, { "epoch": 0.2717299578059072, "grad_norm": 0.796875, "learning_rate": 9.659264375481777e-07, "loss": 0.2871522605419159, "step": 1288, "token_acc": 0.9222536984576645 }, { "epoch": 0.2719409282700422, "grad_norm": 0.828125, "learning_rate": 9.658650608530503e-07, "loss": 0.2694163918495178, "step": 1289, "token_acc": 0.9271523178807947 }, { "epoch": 0.2721518987341772, "grad_norm": 1.015625, "learning_rate": 9.658036308823101e-07, "loss": 0.2750623822212219, "step": 1290, "token_acc": 0.9309335674568335 }, { "epoch": 0.27236286919831226, "grad_norm": 1.0703125, "learning_rate": 9.657421476429823e-07, "loss": 0.24293242394924164, "step": 1291, "token_acc": 0.9321070234113712 }, { "epoch": 0.27257383966244725, "grad_norm": 0.734375, "learning_rate": 9.656806111420978e-07, "loss": 0.26937633752822876, "step": 1292, "token_acc": 0.9326610279765778 }, { "epoch": 0.2727848101265823, "grad_norm": 0.75, "learning_rate": 9.656190213866942e-07, "loss": 0.258544385433197, "step": 1293, "token_acc": 0.9324324324324325 }, { "epoch": 0.2729957805907173, "grad_norm": 0.80078125, "learning_rate": 9.655573783838142e-07, "loss": 0.30913591384887695, "step": 1294, "token_acc": 0.9182915506035283 }, { "epoch": 0.2732067510548523, "grad_norm": 0.70703125, "learning_rate": 9.654956821405076e-07, "loss": 0.2871444821357727, "step": 1295, "token_acc": 0.9226289517470881 }, { "epoch": 0.27341772151898736, "grad_norm": 0.71484375, "learning_rate": 9.6543393266383e-07, "loss": 0.2559030055999756, "step": 1296, "token_acc": 0.9310548025928108 }, { "epoch": 0.27362869198312234, "grad_norm": 0.875, "learning_rate": 9.653721299608425e-07, "loss": 0.2956152558326721, "step": 1297, "token_acc": 0.9208739297313256 }, { "epoch": 0.2738396624472574, "grad_norm": 0.71484375, "learning_rate": 9.653102740386134e-07, "loss": 0.21772214770317078, "step": 1298, "token_acc": 0.9364857302742026 }, { "epoch": 0.2740506329113924, "grad_norm": 0.734375, "learning_rate": 9.65248364904216e-07, "loss": 0.30199459195137024, "step": 1299, "token_acc": 0.9205334815226451 }, { "epoch": 0.2742616033755274, "grad_norm": 0.87109375, "learning_rate": 9.651864025647303e-07, "loss": 0.24779856204986572, "step": 1300, "token_acc": 0.9276583381754794 }, { "epoch": 0.27447257383966245, "grad_norm": 0.68359375, "learning_rate": 9.65124387027242e-07, "loss": 0.2366098165512085, "step": 1301, "token_acc": 0.931989247311828 }, { "epoch": 0.2746835443037975, "grad_norm": 0.609375, "learning_rate": 9.650623182988434e-07, "loss": 0.24503357708454132, "step": 1302, "token_acc": 0.9304397815464214 }, { "epoch": 0.2748945147679325, "grad_norm": 0.765625, "learning_rate": 9.650001963866324e-07, "loss": 0.24037089943885803, "step": 1303, "token_acc": 0.9337579617834395 }, { "epoch": 0.2751054852320675, "grad_norm": 1.21875, "learning_rate": 9.64938021297713e-07, "loss": 0.24094949662685394, "step": 1304, "token_acc": 0.9320071258907363 }, { "epoch": 0.27531645569620256, "grad_norm": 0.8125, "learning_rate": 9.64875793039196e-07, "loss": 0.26656514406204224, "step": 1305, "token_acc": 0.9331143624962676 }, { "epoch": 0.27552742616033754, "grad_norm": 0.71484375, "learning_rate": 9.64813511618197e-07, "loss": 0.27888917922973633, "step": 1306, "token_acc": 0.9265919811320755 }, { "epoch": 0.2757383966244726, "grad_norm": 0.89453125, "learning_rate": 9.64751177041839e-07, "loss": 0.2879449725151062, "step": 1307, "token_acc": 0.9250924784217016 }, { "epoch": 0.2759493670886076, "grad_norm": 0.71875, "learning_rate": 9.646887893172504e-07, "loss": 0.2646368443965912, "step": 1308, "token_acc": 0.9223654708520179 }, { "epoch": 0.2761603375527426, "grad_norm": 0.82421875, "learning_rate": 9.646263484515657e-07, "loss": 0.23699407279491425, "step": 1309, "token_acc": 0.9358803986710963 }, { "epoch": 0.27637130801687765, "grad_norm": 0.66015625, "learning_rate": 9.645638544519253e-07, "loss": 0.20495551824569702, "step": 1310, "token_acc": 0.9403642773207991 }, { "epoch": 0.27658227848101263, "grad_norm": 0.73828125, "learning_rate": 9.645013073254762e-07, "loss": 0.24341881275177002, "step": 1311, "token_acc": 0.9322884012539185 }, { "epoch": 0.2767932489451477, "grad_norm": 0.640625, "learning_rate": 9.64438707079371e-07, "loss": 0.23052801191806793, "step": 1312, "token_acc": 0.9312657166806371 }, { "epoch": 0.2770042194092827, "grad_norm": 0.7109375, "learning_rate": 9.643760537207688e-07, "loss": 0.3025025725364685, "step": 1313, "token_acc": 0.9218884120171674 }, { "epoch": 0.2772151898734177, "grad_norm": 0.640625, "learning_rate": 9.643133472568344e-07, "loss": 0.20893415808677673, "step": 1314, "token_acc": 0.9359681372549019 }, { "epoch": 0.27742616033755274, "grad_norm": 0.8046875, "learning_rate": 9.642505876947386e-07, "loss": 0.2443920075893402, "step": 1315, "token_acc": 0.928676254769592 }, { "epoch": 0.2776371308016878, "grad_norm": 1.234375, "learning_rate": 9.64187775041659e-07, "loss": 0.25716376304626465, "step": 1316, "token_acc": 0.9291851851851852 }, { "epoch": 0.27784810126582277, "grad_norm": 0.68359375, "learning_rate": 9.641249093047784e-07, "loss": 0.23076121509075165, "step": 1317, "token_acc": 0.9285714285714286 }, { "epoch": 0.2780590717299578, "grad_norm": 0.9375, "learning_rate": 9.640619904912859e-07, "loss": 0.2742810845375061, "step": 1318, "token_acc": 0.9297956493078444 }, { "epoch": 0.27827004219409285, "grad_norm": 0.609375, "learning_rate": 9.63999018608377e-07, "loss": 0.22001489996910095, "step": 1319, "token_acc": 0.936816192560175 }, { "epoch": 0.27848101265822783, "grad_norm": 0.65625, "learning_rate": 9.639359936632535e-07, "loss": 0.2529153525829315, "step": 1320, "token_acc": 0.9257350712337071 }, { "epoch": 0.2786919831223629, "grad_norm": 0.65625, "learning_rate": 9.63872915663122e-07, "loss": 0.23905983567237854, "step": 1321, "token_acc": 0.9326950971859588 }, { "epoch": 0.2789029535864979, "grad_norm": 0.63671875, "learning_rate": 9.638097846151965e-07, "loss": 0.2406499683856964, "step": 1322, "token_acc": 0.930352798053528 }, { "epoch": 0.2791139240506329, "grad_norm": 0.98046875, "learning_rate": 9.637466005266963e-07, "loss": 0.26858270168304443, "step": 1323, "token_acc": 0.9200705260064649 }, { "epoch": 0.27932489451476794, "grad_norm": 0.859375, "learning_rate": 9.636833634048475e-07, "loss": 0.2660675644874573, "step": 1324, "token_acc": 0.9298245614035088 }, { "epoch": 0.2795358649789029, "grad_norm": 0.77734375, "learning_rate": 9.636200732568814e-07, "loss": 0.26166456937789917, "step": 1325, "token_acc": 0.9308510638297872 }, { "epoch": 0.27974683544303797, "grad_norm": 0.8671875, "learning_rate": 9.63556730090036e-07, "loss": 0.2833145260810852, "step": 1326, "token_acc": 0.9278801123946301 }, { "epoch": 0.279957805907173, "grad_norm": 0.81640625, "learning_rate": 9.634933339115547e-07, "loss": 0.28151270747184753, "step": 1327, "token_acc": 0.9252669039145908 }, { "epoch": 0.280168776371308, "grad_norm": 0.76953125, "learning_rate": 9.63429884728688e-07, "loss": 0.2141590267419815, "step": 1328, "token_acc": 0.9334285714285714 }, { "epoch": 0.28037974683544303, "grad_norm": 0.79296875, "learning_rate": 9.633663825486914e-07, "loss": 0.2234039306640625, "step": 1329, "token_acc": 0.9323050556983719 }, { "epoch": 0.2805907172995781, "grad_norm": 0.7734375, "learning_rate": 9.633028273788272e-07, "loss": 0.2672632336616516, "step": 1330, "token_acc": 0.9254259080681453 }, { "epoch": 0.28080168776371306, "grad_norm": 0.69140625, "learning_rate": 9.63239219226363e-07, "loss": 0.2287733405828476, "step": 1331, "token_acc": 0.9398969662278192 }, { "epoch": 0.2810126582278481, "grad_norm": 1.34375, "learning_rate": 9.631755580985735e-07, "loss": 0.26213157176971436, "step": 1332, "token_acc": 0.9299184505606524 }, { "epoch": 0.28122362869198314, "grad_norm": 0.79296875, "learning_rate": 9.631118440027386e-07, "loss": 0.28404700756073, "step": 1333, "token_acc": 0.9141087017741482 }, { "epoch": 0.2814345991561181, "grad_norm": 0.7109375, "learning_rate": 9.630480769461447e-07, "loss": 0.239895761013031, "step": 1334, "token_acc": 0.9308807134894092 }, { "epoch": 0.28164556962025317, "grad_norm": 0.78125, "learning_rate": 9.629842569360838e-07, "loss": 0.26395344734191895, "step": 1335, "token_acc": 0.9277073732718893 }, { "epoch": 0.2818565400843882, "grad_norm": 0.609375, "learning_rate": 9.629203839798546e-07, "loss": 0.21334236860275269, "step": 1336, "token_acc": 0.936906584992343 }, { "epoch": 0.2820675105485232, "grad_norm": 0.6796875, "learning_rate": 9.628564580847613e-07, "loss": 0.23438285291194916, "step": 1337, "token_acc": 0.9287510477787091 }, { "epoch": 0.28227848101265823, "grad_norm": 0.74609375, "learning_rate": 9.627924792581143e-07, "loss": 0.26787251234054565, "step": 1338, "token_acc": 0.9322139303482587 }, { "epoch": 0.2824894514767933, "grad_norm": 0.62890625, "learning_rate": 9.627284475072303e-07, "loss": 0.25793296098709106, "step": 1339, "token_acc": 0.927893175074184 }, { "epoch": 0.28270042194092826, "grad_norm": 0.71484375, "learning_rate": 9.626643628394321e-07, "loss": 0.2458716779947281, "step": 1340, "token_acc": 0.9302045728038508 }, { "epoch": 0.2829113924050633, "grad_norm": 0.73828125, "learning_rate": 9.626002252620478e-07, "loss": 0.2576259970664978, "step": 1341, "token_acc": 0.9324796274738067 }, { "epoch": 0.2831223628691983, "grad_norm": 0.7421875, "learning_rate": 9.625360347824123e-07, "loss": 0.27822190523147583, "step": 1342, "token_acc": 0.9268867924528302 }, { "epoch": 0.2833333333333333, "grad_norm": 0.71875, "learning_rate": 9.624717914078666e-07, "loss": 0.2430208921432495, "step": 1343, "token_acc": 0.9362327358213341 }, { "epoch": 0.28354430379746837, "grad_norm": 0.9453125, "learning_rate": 9.62407495145757e-07, "loss": 0.2376978099346161, "step": 1344, "token_acc": 0.9352154531946508 }, { "epoch": 0.28375527426160335, "grad_norm": 0.69921875, "learning_rate": 9.623431460034365e-07, "loss": 0.28660112619400024, "step": 1345, "token_acc": 0.9231664726426076 }, { "epoch": 0.2839662447257384, "grad_norm": 1.078125, "learning_rate": 9.622787439882642e-07, "loss": 0.22470048069953918, "step": 1346, "token_acc": 0.9390454284071306 }, { "epoch": 0.28417721518987343, "grad_norm": 0.78125, "learning_rate": 9.622142891076049e-07, "loss": 0.3022032380104065, "step": 1347, "token_acc": 0.9156626506024096 }, { "epoch": 0.2843881856540084, "grad_norm": 0.890625, "learning_rate": 9.621497813688292e-07, "loss": 0.24632784724235535, "step": 1348, "token_acc": 0.9298975672215108 }, { "epoch": 0.28459915611814346, "grad_norm": 0.6328125, "learning_rate": 9.620852207793146e-07, "loss": 0.26282811164855957, "step": 1349, "token_acc": 0.9305875576036866 }, { "epoch": 0.2848101265822785, "grad_norm": 0.7109375, "learning_rate": 9.620206073464438e-07, "loss": 0.2368626892566681, "step": 1350, "token_acc": 0.9320417287630403 }, { "epoch": 0.2850210970464135, "grad_norm": 0.7421875, "learning_rate": 9.619559410776062e-07, "loss": 0.27300721406936646, "step": 1351, "token_acc": 0.9214989059080962 }, { "epoch": 0.2852320675105485, "grad_norm": 0.73046875, "learning_rate": 9.618912219801968e-07, "loss": 0.3053458333015442, "step": 1352, "token_acc": 0.9201213346814965 }, { "epoch": 0.28544303797468357, "grad_norm": 0.796875, "learning_rate": 9.618264500616164e-07, "loss": 0.2645854353904724, "step": 1353, "token_acc": 0.9302251823660006 }, { "epoch": 0.28565400843881855, "grad_norm": 0.6640625, "learning_rate": 9.61761625329273e-07, "loss": 0.27379700541496277, "step": 1354, "token_acc": 0.9288623404833016 }, { "epoch": 0.2858649789029536, "grad_norm": 0.6640625, "learning_rate": 9.61696747790579e-07, "loss": 0.2626631557941437, "step": 1355, "token_acc": 0.9282009282009283 }, { "epoch": 0.28607594936708863, "grad_norm": 0.69140625, "learning_rate": 9.616318174529544e-07, "loss": 0.2584306001663208, "step": 1356, "token_acc": 0.9292520935604967 }, { "epoch": 0.2862869198312236, "grad_norm": 0.55078125, "learning_rate": 9.615668343238243e-07, "loss": 0.25304126739501953, "step": 1357, "token_acc": 0.9346613545816733 }, { "epoch": 0.28649789029535866, "grad_norm": 1.171875, "learning_rate": 9.6150179841062e-07, "loss": 0.3054851293563843, "step": 1358, "token_acc": 0.9169445286450439 }, { "epoch": 0.28670886075949364, "grad_norm": 0.86328125, "learning_rate": 9.614367097207788e-07, "loss": 0.33616381883621216, "step": 1359, "token_acc": 0.9140366696723775 }, { "epoch": 0.2869198312236287, "grad_norm": 0.703125, "learning_rate": 9.613715682617442e-07, "loss": 0.2768439054489136, "step": 1360, "token_acc": 0.9238784370477569 }, { "epoch": 0.2871308016877637, "grad_norm": 0.6875, "learning_rate": 9.61306374040966e-07, "loss": 0.2657453417778015, "step": 1361, "token_acc": 0.9254338394793926 }, { "epoch": 0.2873417721518987, "grad_norm": 0.765625, "learning_rate": 9.612411270658994e-07, "loss": 0.30385810136795044, "step": 1362, "token_acc": 0.9217443249701314 }, { "epoch": 0.28755274261603375, "grad_norm": 0.83203125, "learning_rate": 9.611758273440058e-07, "loss": 0.2552698254585266, "step": 1363, "token_acc": 0.9243302954984811 }, { "epoch": 0.2877637130801688, "grad_norm": 0.68359375, "learning_rate": 9.611104748827533e-07, "loss": 0.290875107049942, "step": 1364, "token_acc": 0.9198113207547169 }, { "epoch": 0.2879746835443038, "grad_norm": 0.71484375, "learning_rate": 9.61045069689615e-07, "loss": 0.24223566055297852, "step": 1365, "token_acc": 0.931023419955085 }, { "epoch": 0.2881856540084388, "grad_norm": 0.66015625, "learning_rate": 9.609796117720708e-07, "loss": 0.2444673329591751, "step": 1366, "token_acc": 0.933668936926266 }, { "epoch": 0.28839662447257386, "grad_norm": 0.875, "learning_rate": 9.609141011376062e-07, "loss": 0.28656303882598877, "step": 1367, "token_acc": 0.9205431956082057 }, { "epoch": 0.28860759493670884, "grad_norm": 0.71484375, "learning_rate": 9.60848537793713e-07, "loss": 0.22629567980766296, "step": 1368, "token_acc": 0.9353916523727844 }, { "epoch": 0.2888185654008439, "grad_norm": 0.7265625, "learning_rate": 9.60782921747889e-07, "loss": 0.21632830798625946, "step": 1369, "token_acc": 0.9382108822625269 }, { "epoch": 0.2890295358649789, "grad_norm": 0.95703125, "learning_rate": 9.607172530076377e-07, "loss": 0.27289730310440063, "step": 1370, "token_acc": 0.9276901987662782 }, { "epoch": 0.2892405063291139, "grad_norm": 0.671875, "learning_rate": 9.606515315804691e-07, "loss": 0.23308590054512024, "step": 1371, "token_acc": 0.933515731874145 }, { "epoch": 0.28945147679324895, "grad_norm": 0.7578125, "learning_rate": 9.605857574738991e-07, "loss": 0.23849669098854065, "step": 1372, "token_acc": 0.9353348729792148 }, { "epoch": 0.289662447257384, "grad_norm": 1.3828125, "learning_rate": 9.605199306954492e-07, "loss": 0.24058642983436584, "step": 1373, "token_acc": 0.9371130661453242 }, { "epoch": 0.289873417721519, "grad_norm": 0.67578125, "learning_rate": 9.604540512526475e-07, "loss": 0.27291715145111084, "step": 1374, "token_acc": 0.92439293598234 }, { "epoch": 0.290084388185654, "grad_norm": 0.75390625, "learning_rate": 9.603881191530279e-07, "loss": 0.26681673526763916, "step": 1375, "token_acc": 0.9251798561151079 }, { "epoch": 0.290295358649789, "grad_norm": 0.6796875, "learning_rate": 9.6032213440413e-07, "loss": 0.3197612166404724, "step": 1376, "token_acc": 0.9159369527145359 }, { "epoch": 0.29050632911392404, "grad_norm": 0.70703125, "learning_rate": 9.602560970134998e-07, "loss": 0.2690644860267639, "step": 1377, "token_acc": 0.9309221244705116 }, { "epoch": 0.2907172995780591, "grad_norm": 0.65234375, "learning_rate": 9.601900069886896e-07, "loss": 0.2277158945798874, "step": 1378, "token_acc": 0.9329285920786354 }, { "epoch": 0.29092827004219407, "grad_norm": 0.75390625, "learning_rate": 9.601238643372568e-07, "loss": 0.2508387565612793, "step": 1379, "token_acc": 0.9271716101694916 }, { "epoch": 0.2911392405063291, "grad_norm": 0.78125, "learning_rate": 9.600576690667659e-07, "loss": 0.30543452501296997, "step": 1380, "token_acc": 0.9198903441973805 }, { "epoch": 0.29135021097046415, "grad_norm": 0.6796875, "learning_rate": 9.599914211847866e-07, "loss": 0.2164490967988968, "step": 1381, "token_acc": 0.9413218035824583 }, { "epoch": 0.29156118143459914, "grad_norm": 0.6953125, "learning_rate": 9.59925120698895e-07, "loss": 0.2733488082885742, "step": 1382, "token_acc": 0.9247685185185185 }, { "epoch": 0.2917721518987342, "grad_norm": 0.59375, "learning_rate": 9.59858767616673e-07, "loss": 0.24653790891170502, "step": 1383, "token_acc": 0.9360535931790499 }, { "epoch": 0.2919831223628692, "grad_norm": 0.77734375, "learning_rate": 9.597923619457085e-07, "loss": 0.297510027885437, "step": 1384, "token_acc": 0.9154051647373108 }, { "epoch": 0.2921940928270042, "grad_norm": 0.70703125, "learning_rate": 9.59725903693596e-07, "loss": 0.2266286015510559, "step": 1385, "token_acc": 0.9409148665819568 }, { "epoch": 0.29240506329113924, "grad_norm": 0.65234375, "learning_rate": 9.596593928679354e-07, "loss": 0.2754872441291809, "step": 1386, "token_acc": 0.927317523868186 }, { "epoch": 0.2926160337552743, "grad_norm": 0.703125, "learning_rate": 9.595928294763324e-07, "loss": 0.2459161877632141, "step": 1387, "token_acc": 0.9298745724059293 }, { "epoch": 0.29282700421940927, "grad_norm": 0.74609375, "learning_rate": 9.595262135263996e-07, "loss": 0.23659512400627136, "step": 1388, "token_acc": 0.9361702127659575 }, { "epoch": 0.2930379746835443, "grad_norm": 0.8125, "learning_rate": 9.594595450257549e-07, "loss": 0.3071826100349426, "step": 1389, "token_acc": 0.9145141451414515 }, { "epoch": 0.29324894514767935, "grad_norm": 0.73046875, "learning_rate": 9.593928239820225e-07, "loss": 0.23544950783252716, "step": 1390, "token_acc": 0.9367909238249594 }, { "epoch": 0.29345991561181434, "grad_norm": 0.828125, "learning_rate": 9.59326050402832e-07, "loss": 0.25213587284088135, "step": 1391, "token_acc": 0.931865516215412 }, { "epoch": 0.2936708860759494, "grad_norm": 0.80078125, "learning_rate": 9.5925922429582e-07, "loss": 0.304402619600296, "step": 1392, "token_acc": 0.9191438763376932 }, { "epoch": 0.29388185654008436, "grad_norm": 0.7109375, "learning_rate": 9.59192345668629e-07, "loss": 0.24235710501670837, "step": 1393, "token_acc": 0.931580519868539 }, { "epoch": 0.2940928270042194, "grad_norm": 0.56640625, "learning_rate": 9.591254145289066e-07, "loss": 0.26642921566963196, "step": 1394, "token_acc": 0.9295583852614467 }, { "epoch": 0.29430379746835444, "grad_norm": 0.6875, "learning_rate": 9.590584308843067e-07, "loss": 0.25638318061828613, "step": 1395, "token_acc": 0.9266789895255699 }, { "epoch": 0.29451476793248943, "grad_norm": 0.72265625, "learning_rate": 9.5899139474249e-07, "loss": 0.27632462978363037, "step": 1396, "token_acc": 0.9211767971135165 }, { "epoch": 0.29472573839662447, "grad_norm": 0.70703125, "learning_rate": 9.589243061111224e-07, "loss": 0.2598225772380829, "step": 1397, "token_acc": 0.9285505978419364 }, { "epoch": 0.2949367088607595, "grad_norm": 0.77734375, "learning_rate": 9.58857164997876e-07, "loss": 0.2595940828323364, "step": 1398, "token_acc": 0.9247278382581648 }, { "epoch": 0.2951476793248945, "grad_norm": 0.73828125, "learning_rate": 9.587899714104294e-07, "loss": 0.25946277379989624, "step": 1399, "token_acc": 0.9253688989784336 }, { "epoch": 0.29535864978902954, "grad_norm": 0.625, "learning_rate": 9.58722725356466e-07, "loss": 0.20068538188934326, "step": 1400, "token_acc": 0.93887460725507 }, { "epoch": 0.29535864978902954, "eval_loss": 0.43371880054473877, "eval_runtime": 246.0163, "eval_samples_per_second": 137.003, "eval_steps_per_second": 2.142, "eval_token_acc": 0.8990397559005654, "step": 1400 }, { "epoch": 0.2955696202531646, "grad_norm": 0.609375, "learning_rate": 9.586554268436765e-07, "loss": 0.1949131339788437, "step": 1401, "token_acc": 0.9440918706557873 }, { "epoch": 0.29578059071729956, "grad_norm": 0.671875, "learning_rate": 9.585880758797569e-07, "loss": 0.2542431950569153, "step": 1402, "token_acc": 0.9296116504854369 }, { "epoch": 0.2959915611814346, "grad_norm": 0.84375, "learning_rate": 9.585206724724094e-07, "loss": 0.2671182155609131, "step": 1403, "token_acc": 0.9345066358915176 }, { "epoch": 0.29620253164556964, "grad_norm": 1.46875, "learning_rate": 9.584532166293422e-07, "loss": 0.23155038058757782, "step": 1404, "token_acc": 0.9271665642286416 }, { "epoch": 0.29641350210970463, "grad_norm": 0.80859375, "learning_rate": 9.583857083582691e-07, "loss": 0.3051491379737854, "step": 1405, "token_acc": 0.9181929181929182 }, { "epoch": 0.29662447257383967, "grad_norm": 0.69921875, "learning_rate": 9.583181476669108e-07, "loss": 0.2135426104068756, "step": 1406, "token_acc": 0.9333539987600744 }, { "epoch": 0.2968354430379747, "grad_norm": 0.71875, "learning_rate": 9.58250534562993e-07, "loss": 0.24095328152179718, "step": 1407, "token_acc": 0.9283995186522263 }, { "epoch": 0.2970464135021097, "grad_norm": 1.125, "learning_rate": 9.58182869054248e-07, "loss": 0.2426932454109192, "step": 1408, "token_acc": 0.928467998841587 }, { "epoch": 0.29725738396624474, "grad_norm": 0.93359375, "learning_rate": 9.581151511484137e-07, "loss": 0.2751479148864746, "step": 1409, "token_acc": 0.9293640054127199 }, { "epoch": 0.2974683544303797, "grad_norm": 1.921875, "learning_rate": 9.580473808532348e-07, "loss": 0.2397601157426834, "step": 1410, "token_acc": 0.9367764915405165 }, { "epoch": 0.29767932489451476, "grad_norm": 0.90625, "learning_rate": 9.579795581764606e-07, "loss": 0.24342301487922668, "step": 1411, "token_acc": 0.9319277108433734 }, { "epoch": 0.2978902953586498, "grad_norm": 0.6640625, "learning_rate": 9.579116831258478e-07, "loss": 0.24738198518753052, "step": 1412, "token_acc": 0.9260881467287161 }, { "epoch": 0.2981012658227848, "grad_norm": 0.7890625, "learning_rate": 9.578437557091586e-07, "loss": 0.30160653591156006, "step": 1413, "token_acc": 0.9232409381663113 }, { "epoch": 0.29831223628691983, "grad_norm": 0.66015625, "learning_rate": 9.577757759341603e-07, "loss": 0.25977736711502075, "step": 1414, "token_acc": 0.9217270194986072 }, { "epoch": 0.29852320675105487, "grad_norm": 0.67578125, "learning_rate": 9.577077438086276e-07, "loss": 0.2611219882965088, "step": 1415, "token_acc": 0.9298686784017882 }, { "epoch": 0.29873417721518986, "grad_norm": 0.7890625, "learning_rate": 9.576396593403405e-07, "loss": 0.3144347369670868, "step": 1416, "token_acc": 0.9093191088798243 }, { "epoch": 0.2989451476793249, "grad_norm": 0.68359375, "learning_rate": 9.57571522537085e-07, "loss": 0.24973896145820618, "step": 1417, "token_acc": 0.9304597701149425 }, { "epoch": 0.29915611814345994, "grad_norm": 0.9296875, "learning_rate": 9.575033334066527e-07, "loss": 0.2714025378227234, "step": 1418, "token_acc": 0.9243498817966903 }, { "epoch": 0.2993670886075949, "grad_norm": 0.71875, "learning_rate": 9.574350919568421e-07, "loss": 0.24550315737724304, "step": 1419, "token_acc": 0.9313218390804597 }, { "epoch": 0.29957805907172996, "grad_norm": 0.71484375, "learning_rate": 9.573667981954573e-07, "loss": 0.24378708004951477, "step": 1420, "token_acc": 0.9386270167668459 }, { "epoch": 0.299789029535865, "grad_norm": 0.734375, "learning_rate": 9.572984521303076e-07, "loss": 0.25155583024024963, "step": 1421, "token_acc": 0.9310128566948886 }, { "epoch": 0.3, "grad_norm": 0.82421875, "learning_rate": 9.572300537692094e-07, "loss": 0.27014413475990295, "step": 1422, "token_acc": 0.9274395329441201 }, { "epoch": 0.30021097046413503, "grad_norm": 0.66015625, "learning_rate": 9.57161603119985e-07, "loss": 0.259027898311615, "step": 1423, "token_acc": 0.9269093895428415 }, { "epoch": 0.30042194092827, "grad_norm": 0.96484375, "learning_rate": 9.570931001904616e-07, "loss": 0.30663132667541504, "step": 1424, "token_acc": 0.921604938271605 }, { "epoch": 0.30063291139240506, "grad_norm": 0.7578125, "learning_rate": 9.570245449884733e-07, "loss": 0.2871406376361847, "step": 1425, "token_acc": 0.9261591299370349 }, { "epoch": 0.3008438818565401, "grad_norm": 0.6640625, "learning_rate": 9.5695593752186e-07, "loss": 0.26376545429229736, "step": 1426, "token_acc": 0.9314728682170542 }, { "epoch": 0.3010548523206751, "grad_norm": 0.734375, "learning_rate": 9.568872777984681e-07, "loss": 0.24171243607997894, "step": 1427, "token_acc": 0.9354838709677419 }, { "epoch": 0.3012658227848101, "grad_norm": 0.90234375, "learning_rate": 9.568185658261485e-07, "loss": 0.29318904876708984, "step": 1428, "token_acc": 0.9312581063553826 }, { "epoch": 0.30147679324894516, "grad_norm": 0.703125, "learning_rate": 9.567498016127595e-07, "loss": 0.21925503015518188, "step": 1429, "token_acc": 0.9372488408037094 }, { "epoch": 0.30168776371308015, "grad_norm": 0.796875, "learning_rate": 9.566809851661648e-07, "loss": 0.28585126996040344, "step": 1430, "token_acc": 0.9222972972972973 }, { "epoch": 0.3018987341772152, "grad_norm": 0.65625, "learning_rate": 9.56612116494234e-07, "loss": 0.22612613439559937, "step": 1431, "token_acc": 0.93677303906949 }, { "epoch": 0.30210970464135023, "grad_norm": 0.73828125, "learning_rate": 9.56543195604843e-07, "loss": 0.26892632246017456, "step": 1432, "token_acc": 0.9255504055619931 }, { "epoch": 0.3023206751054852, "grad_norm": 0.796875, "learning_rate": 9.564742225058734e-07, "loss": 0.27015364170074463, "step": 1433, "token_acc": 0.9248231132075472 }, { "epoch": 0.30253164556962026, "grad_norm": 0.7421875, "learning_rate": 9.564051972052132e-07, "loss": 0.24233023822307587, "step": 1434, "token_acc": 0.9338118022328549 }, { "epoch": 0.3027426160337553, "grad_norm": 0.76953125, "learning_rate": 9.563361197107554e-07, "loss": 0.2551451623439789, "step": 1435, "token_acc": 0.9307293256026854 }, { "epoch": 0.3029535864978903, "grad_norm": 0.71484375, "learning_rate": 9.562669900304002e-07, "loss": 0.2726331949234009, "step": 1436, "token_acc": 0.9276739197057922 }, { "epoch": 0.3031645569620253, "grad_norm": 0.703125, "learning_rate": 9.561978081720524e-07, "loss": 0.24130704998970032, "step": 1437, "token_acc": 0.9269317814419658 }, { "epoch": 0.30337552742616036, "grad_norm": 2.0, "learning_rate": 9.561285741436245e-07, "loss": 0.2952617108821869, "step": 1438, "token_acc": 0.9171339563862928 }, { "epoch": 0.30358649789029535, "grad_norm": 0.71484375, "learning_rate": 9.560592879530333e-07, "loss": 0.2580583691596985, "step": 1439, "token_acc": 0.9276932084309133 }, { "epoch": 0.3037974683544304, "grad_norm": 1.1640625, "learning_rate": 9.559899496082024e-07, "loss": 0.2909284234046936, "step": 1440, "token_acc": 0.9199145755472504 }, { "epoch": 0.3040084388185654, "grad_norm": 0.66015625, "learning_rate": 9.559205591170614e-07, "loss": 0.25040456652641296, "step": 1441, "token_acc": 0.9311377245508982 }, { "epoch": 0.3042194092827004, "grad_norm": 0.70703125, "learning_rate": 9.558511164875455e-07, "loss": 0.23942893743515015, "step": 1442, "token_acc": 0.93700356066831 }, { "epoch": 0.30443037974683546, "grad_norm": 0.69921875, "learning_rate": 9.557816217275962e-07, "loss": 0.24013203382492065, "step": 1443, "token_acc": 0.9334166927164739 }, { "epoch": 0.30464135021097044, "grad_norm": 0.7109375, "learning_rate": 9.557120748451608e-07, "loss": 0.2695329189300537, "step": 1444, "token_acc": 0.9281867145421903 }, { "epoch": 0.3048523206751055, "grad_norm": 0.73828125, "learning_rate": 9.556424758481926e-07, "loss": 0.28713494539260864, "step": 1445, "token_acc": 0.9243258915627718 }, { "epoch": 0.3050632911392405, "grad_norm": 0.65625, "learning_rate": 9.555728247446505e-07, "loss": 0.2532760500907898, "step": 1446, "token_acc": 0.9253039953676896 }, { "epoch": 0.3052742616033755, "grad_norm": 0.94140625, "learning_rate": 9.555031215425e-07, "loss": 0.25795722007751465, "step": 1447, "token_acc": 0.924613987284287 }, { "epoch": 0.30548523206751055, "grad_norm": 1.0390625, "learning_rate": 9.554333662497122e-07, "loss": 0.24081915616989136, "step": 1448, "token_acc": 0.925282098200671 }, { "epoch": 0.3056962025316456, "grad_norm": 0.6015625, "learning_rate": 9.553635588742644e-07, "loss": 0.2439633309841156, "step": 1449, "token_acc": 0.929632999696694 }, { "epoch": 0.3059071729957806, "grad_norm": 0.8515625, "learning_rate": 9.552936994241394e-07, "loss": 0.2797650694847107, "step": 1450, "token_acc": 0.9248277927523211 }, { "epoch": 0.3061181434599156, "grad_norm": 1.03125, "learning_rate": 9.552237879073262e-07, "loss": 0.23650625348091125, "step": 1451, "token_acc": 0.927787406123628 }, { "epoch": 0.30632911392405066, "grad_norm": 0.9921875, "learning_rate": 9.5515382433182e-07, "loss": 0.23584780097007751, "step": 1452, "token_acc": 0.9325115562403699 }, { "epoch": 0.30654008438818564, "grad_norm": 0.66796875, "learning_rate": 9.550838087056215e-07, "loss": 0.25513389706611633, "step": 1453, "token_acc": 0.9239160839160839 }, { "epoch": 0.3067510548523207, "grad_norm": 0.6953125, "learning_rate": 9.550137410367379e-07, "loss": 0.2748698592185974, "step": 1454, "token_acc": 0.9194828092859242 }, { "epoch": 0.3069620253164557, "grad_norm": 0.6640625, "learning_rate": 9.549436213331814e-07, "loss": 0.2806450426578522, "step": 1455, "token_acc": 0.9268160950580611 }, { "epoch": 0.3071729957805907, "grad_norm": 0.6171875, "learning_rate": 9.548734496029715e-07, "loss": 0.24432675540447235, "step": 1456, "token_acc": 0.9289112790372236 }, { "epoch": 0.30738396624472575, "grad_norm": 0.62109375, "learning_rate": 9.548032258541325e-07, "loss": 0.2699507772922516, "step": 1457, "token_acc": 0.9230322393261691 }, { "epoch": 0.30759493670886073, "grad_norm": 0.66015625, "learning_rate": 9.547329500946951e-07, "loss": 0.266490638256073, "step": 1458, "token_acc": 0.926647564469914 }, { "epoch": 0.3078059071729958, "grad_norm": 0.6875, "learning_rate": 9.546626223326964e-07, "loss": 0.2475104182958603, "step": 1459, "token_acc": 0.9266259032795998 }, { "epoch": 0.3080168776371308, "grad_norm": 0.87109375, "learning_rate": 9.545922425761782e-07, "loss": 0.27554529905319214, "step": 1460, "token_acc": 0.9233644859813084 }, { "epoch": 0.3082278481012658, "grad_norm": 0.7421875, "learning_rate": 9.545218108331895e-07, "loss": 0.20847618579864502, "step": 1461, "token_acc": 0.9386682242990654 }, { "epoch": 0.30843881856540084, "grad_norm": 0.72265625, "learning_rate": 9.54451327111785e-07, "loss": 0.2351740300655365, "step": 1462, "token_acc": 0.9326732673267327 }, { "epoch": 0.3086497890295359, "grad_norm": 0.65234375, "learning_rate": 9.543807914200244e-07, "loss": 0.26930472254753113, "step": 1463, "token_acc": 0.9294723294723295 }, { "epoch": 0.30886075949367087, "grad_norm": 3.5, "learning_rate": 9.543102037659746e-07, "loss": 0.26640209555625916, "step": 1464, "token_acc": 0.9225462527438069 }, { "epoch": 0.3090717299578059, "grad_norm": 0.69921875, "learning_rate": 9.542395641577079e-07, "loss": 0.2432868480682373, "step": 1465, "token_acc": 0.930205618302925 }, { "epoch": 0.30928270042194095, "grad_norm": 0.69140625, "learning_rate": 9.541688726033022e-07, "loss": 0.21304035186767578, "step": 1466, "token_acc": 0.9346386409444285 }, { "epoch": 0.30949367088607593, "grad_norm": 0.55859375, "learning_rate": 9.540981291108419e-07, "loss": 0.23767802119255066, "step": 1467, "token_acc": 0.9310522253666066 }, { "epoch": 0.309704641350211, "grad_norm": 0.6484375, "learning_rate": 9.54027333688417e-07, "loss": 0.24264025688171387, "step": 1468, "token_acc": 0.9302891110510673 }, { "epoch": 0.309915611814346, "grad_norm": 0.734375, "learning_rate": 9.539564863441239e-07, "loss": 0.24002739787101746, "step": 1469, "token_acc": 0.9333333333333333 }, { "epoch": 0.310126582278481, "grad_norm": 0.8125, "learning_rate": 9.53885587086064e-07, "loss": 0.2675400376319885, "step": 1470, "token_acc": 0.9192083062946139 }, { "epoch": 0.31033755274261604, "grad_norm": 0.62109375, "learning_rate": 9.538146359223457e-07, "loss": 0.23460690677165985, "step": 1471, "token_acc": 0.9331210191082803 }, { "epoch": 0.3105485232067511, "grad_norm": 0.72265625, "learning_rate": 9.537436328610829e-07, "loss": 0.26020288467407227, "step": 1472, "token_acc": 0.9312090530077427 }, { "epoch": 0.31075949367088607, "grad_norm": 0.65625, "learning_rate": 9.536725779103952e-07, "loss": 0.2224973738193512, "step": 1473, "token_acc": 0.9334952930458549 }, { "epoch": 0.3109704641350211, "grad_norm": 0.6484375, "learning_rate": 9.536014710784082e-07, "loss": 0.23821187019348145, "step": 1474, "token_acc": 0.9365303244005642 }, { "epoch": 0.3111814345991561, "grad_norm": 0.74609375, "learning_rate": 9.535303123732537e-07, "loss": 0.24408169090747833, "step": 1475, "token_acc": 0.9283845650752126 }, { "epoch": 0.31139240506329113, "grad_norm": 0.9921875, "learning_rate": 9.534591018030693e-07, "loss": 0.2836637794971466, "step": 1476, "token_acc": 0.9262971698113207 }, { "epoch": 0.3116033755274262, "grad_norm": 0.7578125, "learning_rate": 9.533878393759988e-07, "loss": 0.21933302283287048, "step": 1477, "token_acc": 0.9352976913730255 }, { "epoch": 0.31181434599156116, "grad_norm": 0.65234375, "learning_rate": 9.533165251001912e-07, "loss": 0.2876891493797302, "step": 1478, "token_acc": 0.9213543055185284 }, { "epoch": 0.3120253164556962, "grad_norm": 1.0390625, "learning_rate": 9.532451589838022e-07, "loss": 0.22867098450660706, "step": 1479, "token_acc": 0.9380315917375456 }, { "epoch": 0.31223628691983124, "grad_norm": 0.72265625, "learning_rate": 9.53173741034993e-07, "loss": 0.20845067501068115, "step": 1480, "token_acc": 0.9393183707398172 }, { "epoch": 0.3124472573839662, "grad_norm": 0.65234375, "learning_rate": 9.53102271261931e-07, "loss": 0.22937451303005219, "step": 1481, "token_acc": 0.9368231046931408 }, { "epoch": 0.31265822784810127, "grad_norm": 0.625, "learning_rate": 9.530307496727891e-07, "loss": 0.24746280908584595, "step": 1482, "token_acc": 0.9314791403286978 }, { "epoch": 0.3128691983122363, "grad_norm": 0.6796875, "learning_rate": 9.529591762757468e-07, "loss": 0.2423558235168457, "step": 1483, "token_acc": 0.9336126329358685 }, { "epoch": 0.3130801687763713, "grad_norm": 0.7890625, "learning_rate": 9.528875510789885e-07, "loss": 0.23221366107463837, "step": 1484, "token_acc": 0.9326707277973709 }, { "epoch": 0.31329113924050633, "grad_norm": 0.7109375, "learning_rate": 9.528158740907058e-07, "loss": 0.2552267014980316, "step": 1485, "token_acc": 0.9284696494727843 }, { "epoch": 0.3135021097046414, "grad_norm": 0.63671875, "learning_rate": 9.527441453190951e-07, "loss": 0.22586306929588318, "step": 1486, "token_acc": 0.9339469409853817 }, { "epoch": 0.31371308016877636, "grad_norm": 0.74609375, "learning_rate": 9.526723647723596e-07, "loss": 0.2447504699230194, "step": 1487, "token_acc": 0.927639751552795 }, { "epoch": 0.3139240506329114, "grad_norm": 0.66796875, "learning_rate": 9.526005324587076e-07, "loss": 0.27702945470809937, "step": 1488, "token_acc": 0.9251620506776664 }, { "epoch": 0.31413502109704644, "grad_norm": 0.94921875, "learning_rate": 9.525286483863542e-07, "loss": 0.29128116369247437, "step": 1489, "token_acc": 0.916892502258356 }, { "epoch": 0.3143459915611814, "grad_norm": 0.73046875, "learning_rate": 9.524567125635195e-07, "loss": 0.2566947042942047, "step": 1490, "token_acc": 0.9236556143772638 }, { "epoch": 0.31455696202531647, "grad_norm": 0.8671875, "learning_rate": 9.523847249984303e-07, "loss": 0.23654621839523315, "step": 1491, "token_acc": 0.9337433603578418 }, { "epoch": 0.31476793248945145, "grad_norm": 0.66015625, "learning_rate": 9.523126856993187e-07, "loss": 0.22157318890094757, "step": 1492, "token_acc": 0.9311002178649237 }, { "epoch": 0.3149789029535865, "grad_norm": 0.6875, "learning_rate": 9.52240594674423e-07, "loss": 0.24354051053524017, "step": 1493, "token_acc": 0.9276387377584331 }, { "epoch": 0.31518987341772153, "grad_norm": 0.75, "learning_rate": 9.521684519319878e-07, "loss": 0.2520773410797119, "step": 1494, "token_acc": 0.9277808522412839 }, { "epoch": 0.3154008438818565, "grad_norm": 0.69140625, "learning_rate": 9.520962574802628e-07, "loss": 0.2067497968673706, "step": 1495, "token_acc": 0.9425287356321839 }, { "epoch": 0.31561181434599156, "grad_norm": 0.66015625, "learning_rate": 9.520240113275046e-07, "loss": 0.24253101646900177, "step": 1496, "token_acc": 0.9343544857768052 }, { "epoch": 0.3158227848101266, "grad_norm": 0.71875, "learning_rate": 9.519517134819746e-07, "loss": 0.23171034455299377, "step": 1497, "token_acc": 0.934695244474213 }, { "epoch": 0.3160337552742616, "grad_norm": 0.65234375, "learning_rate": 9.518793639519408e-07, "loss": 0.20974554121494293, "step": 1498, "token_acc": 0.9291677888499865 }, { "epoch": 0.3162447257383966, "grad_norm": 0.82421875, "learning_rate": 9.518069627456771e-07, "loss": 0.26031211018562317, "step": 1499, "token_acc": 0.932449105490438 }, { "epoch": 0.31645569620253167, "grad_norm": 0.81640625, "learning_rate": 9.517345098714631e-07, "loss": 0.2840519845485687, "step": 1500, "token_acc": 0.9211150652431791 }, { "epoch": 0.31666666666666665, "grad_norm": 1.4609375, "learning_rate": 9.516620053375845e-07, "loss": 0.24605998396873474, "step": 1501, "token_acc": 0.9327284105131415 }, { "epoch": 0.3168776371308017, "grad_norm": 0.7578125, "learning_rate": 9.515894491523328e-07, "loss": 0.2669103443622589, "step": 1502, "token_acc": 0.9236850106415324 }, { "epoch": 0.31708860759493673, "grad_norm": 0.80078125, "learning_rate": 9.515168413240054e-07, "loss": 0.33311209082603455, "step": 1503, "token_acc": 0.9080691642651297 }, { "epoch": 0.3172995780590717, "grad_norm": 0.76953125, "learning_rate": 9.514441818609055e-07, "loss": 0.24630479514598846, "step": 1504, "token_acc": 0.9333128457283344 }, { "epoch": 0.31751054852320676, "grad_norm": 0.6875, "learning_rate": 9.513714707713424e-07, "loss": 0.2654748857021332, "step": 1505, "token_acc": 0.9272495213784301 }, { "epoch": 0.31772151898734174, "grad_norm": 0.671875, "learning_rate": 9.512987080636312e-07, "loss": 0.27504950761795044, "step": 1506, "token_acc": 0.9268217054263566 }, { "epoch": 0.3179324894514768, "grad_norm": 0.6484375, "learning_rate": 9.512258937460931e-07, "loss": 0.23799118399620056, "step": 1507, "token_acc": 0.9329538266919671 }, { "epoch": 0.3181434599156118, "grad_norm": 0.6640625, "learning_rate": 9.511530278270548e-07, "loss": 0.24022099375724792, "step": 1508, "token_acc": 0.9324768449357633 }, { "epoch": 0.3183544303797468, "grad_norm": 0.7734375, "learning_rate": 9.510801103148494e-07, "loss": 0.2613435983657837, "step": 1509, "token_acc": 0.9316982303632412 }, { "epoch": 0.31856540084388185, "grad_norm": 0.609375, "learning_rate": 9.510071412178153e-07, "loss": 0.24355687201023102, "step": 1510, "token_acc": 0.9279303780255643 }, { "epoch": 0.3187763713080169, "grad_norm": 0.76171875, "learning_rate": 9.509341205442973e-07, "loss": 0.2355407327413559, "step": 1511, "token_acc": 0.9361508057160232 }, { "epoch": 0.3189873417721519, "grad_norm": 0.65234375, "learning_rate": 9.508610483026461e-07, "loss": 0.2515805661678314, "step": 1512, "token_acc": 0.9299863387978142 }, { "epoch": 0.3191983122362869, "grad_norm": 0.8671875, "learning_rate": 9.507879245012178e-07, "loss": 0.2919672131538391, "step": 1513, "token_acc": 0.9186514073615837 }, { "epoch": 0.31940928270042196, "grad_norm": 0.62890625, "learning_rate": 9.507147491483749e-07, "loss": 0.249205082654953, "step": 1514, "token_acc": 0.9292244175759363 }, { "epoch": 0.31962025316455694, "grad_norm": 0.6796875, "learning_rate": 9.506415222524857e-07, "loss": 0.24349896609783173, "step": 1515, "token_acc": 0.9286709389802174 }, { "epoch": 0.319831223628692, "grad_norm": 0.65625, "learning_rate": 9.505682438219242e-07, "loss": 0.2532562017440796, "step": 1516, "token_acc": 0.9333507579717721 }, { "epoch": 0.320042194092827, "grad_norm": 0.703125, "learning_rate": 9.504949138650705e-07, "loss": 0.2774209976196289, "step": 1517, "token_acc": 0.9235361000568505 }, { "epoch": 0.320253164556962, "grad_norm": 0.703125, "learning_rate": 9.504215323903105e-07, "loss": 0.2720155119895935, "step": 1518, "token_acc": 0.9254856480139171 }, { "epoch": 0.32046413502109705, "grad_norm": 0.67578125, "learning_rate": 9.503480994060357e-07, "loss": 0.2988170087337494, "step": 1519, "token_acc": 0.9205357142857142 }, { "epoch": 0.3206751054852321, "grad_norm": 0.89453125, "learning_rate": 9.502746149206442e-07, "loss": 0.21745893359184265, "step": 1520, "token_acc": 0.933983286908078 }, { "epoch": 0.3208860759493671, "grad_norm": 0.67578125, "learning_rate": 9.502010789425393e-07, "loss": 0.24026605486869812, "step": 1521, "token_acc": 0.9371310507674144 }, { "epoch": 0.3210970464135021, "grad_norm": 0.84765625, "learning_rate": 9.501274914801306e-07, "loss": 0.3082854747772217, "step": 1522, "token_acc": 0.9195114312558722 }, { "epoch": 0.3213080168776371, "grad_norm": 0.74609375, "learning_rate": 9.500538525418333e-07, "loss": 0.26113343238830566, "step": 1523, "token_acc": 0.9287790697674418 }, { "epoch": 0.32151898734177214, "grad_norm": 0.625, "learning_rate": 9.49980162136069e-07, "loss": 0.23083993792533875, "step": 1524, "token_acc": 0.9333127508490274 }, { "epoch": 0.3217299578059072, "grad_norm": 1.0546875, "learning_rate": 9.499064202712643e-07, "loss": 0.26131874322891235, "step": 1525, "token_acc": 0.9319688671086769 }, { "epoch": 0.32194092827004217, "grad_norm": 0.6328125, "learning_rate": 9.498326269558525e-07, "loss": 0.23430953919887543, "step": 1526, "token_acc": 0.9324556382369776 }, { "epoch": 0.3221518987341772, "grad_norm": 0.76953125, "learning_rate": 9.497587821982727e-07, "loss": 0.2851739525794983, "step": 1527, "token_acc": 0.9238906846899795 }, { "epoch": 0.32236286919831225, "grad_norm": 0.69921875, "learning_rate": 9.496848860069691e-07, "loss": 0.22896860539913177, "step": 1528, "token_acc": 0.9341647331786543 }, { "epoch": 0.32257383966244724, "grad_norm": 0.86328125, "learning_rate": 9.496109383903929e-07, "loss": 0.2737294137477875, "step": 1529, "token_acc": 0.9315191387559809 }, { "epoch": 0.3227848101265823, "grad_norm": 0.62890625, "learning_rate": 9.495369393570003e-07, "loss": 0.2521006464958191, "step": 1530, "token_acc": 0.9293909973521624 }, { "epoch": 0.3229957805907173, "grad_norm": 0.671875, "learning_rate": 9.494628889152539e-07, "loss": 0.23891186714172363, "step": 1531, "token_acc": 0.9335803876852907 }, { "epoch": 0.3232067510548523, "grad_norm": 0.6796875, "learning_rate": 9.493887870736218e-07, "loss": 0.23902195692062378, "step": 1532, "token_acc": 0.9280039721946375 }, { "epoch": 0.32341772151898734, "grad_norm": 0.78125, "learning_rate": 9.493146338405784e-07, "loss": 0.29924818873405457, "step": 1533, "token_acc": 0.9268981089472199 }, { "epoch": 0.3236286919831224, "grad_norm": 0.78515625, "learning_rate": 9.492404292246037e-07, "loss": 0.230657160282135, "step": 1534, "token_acc": 0.9317676927959379 }, { "epoch": 0.32383966244725737, "grad_norm": 0.78515625, "learning_rate": 9.491661732341836e-07, "loss": 0.28479820489883423, "step": 1535, "token_acc": 0.924031007751938 }, { "epoch": 0.3240506329113924, "grad_norm": 0.7109375, "learning_rate": 9.490918658778098e-07, "loss": 0.25515246391296387, "step": 1536, "token_acc": 0.9273451870018393 }, { "epoch": 0.32426160337552745, "grad_norm": 0.73828125, "learning_rate": 9.4901750716398e-07, "loss": 0.2776564359664917, "step": 1537, "token_acc": 0.9246202350243623 }, { "epoch": 0.32447257383966244, "grad_norm": 0.6484375, "learning_rate": 9.489430971011978e-07, "loss": 0.23764348030090332, "step": 1538, "token_acc": 0.9334790755777639 }, { "epoch": 0.3246835443037975, "grad_norm": 1.203125, "learning_rate": 9.488686356979727e-07, "loss": 0.2838374972343445, "step": 1539, "token_acc": 0.9257334963325183 }, { "epoch": 0.32489451476793246, "grad_norm": 0.90625, "learning_rate": 9.487941229628199e-07, "loss": 0.3395375907421112, "step": 1540, "token_acc": 0.9116038882138517 }, { "epoch": 0.3251054852320675, "grad_norm": 0.796875, "learning_rate": 9.487195589042606e-07, "loss": 0.2461426556110382, "step": 1541, "token_acc": 0.9316965690903368 }, { "epoch": 0.32531645569620254, "grad_norm": 0.62109375, "learning_rate": 9.486449435308218e-07, "loss": 0.22606943547725677, "step": 1542, "token_acc": 0.9363494539781592 }, { "epoch": 0.32552742616033753, "grad_norm": 0.62890625, "learning_rate": 9.485702768510364e-07, "loss": 0.2177889347076416, "step": 1543, "token_acc": 0.9346268656716418 }, { "epoch": 0.32573839662447257, "grad_norm": 0.82421875, "learning_rate": 9.484955588734431e-07, "loss": 0.2693382501602173, "step": 1544, "token_acc": 0.9304897314375987 }, { "epoch": 0.3259493670886076, "grad_norm": 0.578125, "learning_rate": 9.484207896065868e-07, "loss": 0.24160149693489075, "step": 1545, "token_acc": 0.936786410470621 }, { "epoch": 0.3261603375527426, "grad_norm": 0.68359375, "learning_rate": 9.483459690590176e-07, "loss": 0.2740339934825897, "step": 1546, "token_acc": 0.9267431597528685 }, { "epoch": 0.32637130801687764, "grad_norm": 1.21875, "learning_rate": 9.482710972392922e-07, "loss": 0.22905594110488892, "step": 1547, "token_acc": 0.9407459044963402 }, { "epoch": 0.3265822784810127, "grad_norm": 0.75, "learning_rate": 9.481961741559725e-07, "loss": 0.27816396951675415, "step": 1548, "token_acc": 0.9170243204577968 }, { "epoch": 0.32679324894514766, "grad_norm": 0.765625, "learning_rate": 9.48121199817627e-07, "loss": 0.2953186631202698, "step": 1549, "token_acc": 0.9229046705054382 }, { "epoch": 0.3270042194092827, "grad_norm": 0.640625, "learning_rate": 9.480461742328294e-07, "loss": 0.21503537893295288, "step": 1550, "token_acc": 0.9387460271597804 }, { "epoch": 0.32721518987341774, "grad_norm": 0.703125, "learning_rate": 9.479710974101594e-07, "loss": 0.2728448808193207, "step": 1551, "token_acc": 0.9233208417411358 }, { "epoch": 0.32742616033755273, "grad_norm": 0.65234375, "learning_rate": 9.47895969358203e-07, "loss": 0.25554656982421875, "step": 1552, "token_acc": 0.9293216009083167 }, { "epoch": 0.32763713080168777, "grad_norm": 0.75, "learning_rate": 9.478207900855515e-07, "loss": 0.23449862003326416, "step": 1553, "token_acc": 0.939615736505032 }, { "epoch": 0.3278481012658228, "grad_norm": 0.75390625, "learning_rate": 9.477455596008022e-07, "loss": 0.28548648953437805, "step": 1554, "token_acc": 0.9239766081871345 }, { "epoch": 0.3280590717299578, "grad_norm": 0.7421875, "learning_rate": 9.476702779125585e-07, "loss": 0.27637791633605957, "step": 1555, "token_acc": 0.9250645994832042 }, { "epoch": 0.32827004219409284, "grad_norm": 0.69921875, "learning_rate": 9.475949450294297e-07, "loss": 0.2447734922170639, "step": 1556, "token_acc": 0.9353186039065001 }, { "epoch": 0.3284810126582278, "grad_norm": 0.66796875, "learning_rate": 9.475195609600303e-07, "loss": 0.27998554706573486, "step": 1557, "token_acc": 0.9227260531258384 }, { "epoch": 0.32869198312236286, "grad_norm": 0.6015625, "learning_rate": 9.474441257129813e-07, "loss": 0.2545619606971741, "step": 1558, "token_acc": 0.9234523503724634 }, { "epoch": 0.3289029535864979, "grad_norm": 0.76953125, "learning_rate": 9.473686392969096e-07, "loss": 0.25500959157943726, "step": 1559, "token_acc": 0.9239380022962113 }, { "epoch": 0.3291139240506329, "grad_norm": 0.62890625, "learning_rate": 9.472931017204473e-07, "loss": 0.22655850648880005, "step": 1560, "token_acc": 0.9381243063263041 }, { "epoch": 0.32932489451476793, "grad_norm": 0.87109375, "learning_rate": 9.47217512992233e-07, "loss": 0.30268189311027527, "step": 1561, "token_acc": 0.9165935030728709 }, { "epoch": 0.32953586497890297, "grad_norm": 0.70703125, "learning_rate": 9.471418731209108e-07, "loss": 0.2656475007534027, "step": 1562, "token_acc": 0.9258801729462631 }, { "epoch": 0.32974683544303796, "grad_norm": 0.734375, "learning_rate": 9.47066182115131e-07, "loss": 0.2406896948814392, "step": 1563, "token_acc": 0.9350610664283586 }, { "epoch": 0.329957805907173, "grad_norm": 0.68359375, "learning_rate": 9.469904399835493e-07, "loss": 0.2999385595321655, "step": 1564, "token_acc": 0.9215285880980163 }, { "epoch": 0.33016877637130804, "grad_norm": 0.76953125, "learning_rate": 9.469146467348274e-07, "loss": 0.2671598792076111, "step": 1565, "token_acc": 0.9232323232323232 }, { "epoch": 0.330379746835443, "grad_norm": 0.77734375, "learning_rate": 9.46838802377633e-07, "loss": 0.27922165393829346, "step": 1566, "token_acc": 0.9190307328605201 }, { "epoch": 0.33059071729957806, "grad_norm": 0.625, "learning_rate": 9.467629069206397e-07, "loss": 0.24752941727638245, "step": 1567, "token_acc": 0.9327052489905787 }, { "epoch": 0.3308016877637131, "grad_norm": 0.90234375, "learning_rate": 9.466869603725265e-07, "loss": 0.2795766294002533, "step": 1568, "token_acc": 0.9258549057206775 }, { "epoch": 0.3310126582278481, "grad_norm": 0.66015625, "learning_rate": 9.466109627419788e-07, "loss": 0.2570780813694, "step": 1569, "token_acc": 0.9308101714961561 }, { "epoch": 0.33122362869198313, "grad_norm": 0.765625, "learning_rate": 9.465349140376871e-07, "loss": 0.2272178828716278, "step": 1570, "token_acc": 0.9376088218224028 }, { "epoch": 0.33143459915611817, "grad_norm": 0.65625, "learning_rate": 9.464588142683488e-07, "loss": 0.2625764012336731, "step": 1571, "token_acc": 0.9222898230088495 }, { "epoch": 0.33164556962025316, "grad_norm": 1.046875, "learning_rate": 9.463826634426661e-07, "loss": 0.2913757562637329, "step": 1572, "token_acc": 0.9228368794326242 }, { "epoch": 0.3318565400843882, "grad_norm": 1.015625, "learning_rate": 9.463064615693479e-07, "loss": 0.2961352467536926, "step": 1573, "token_acc": 0.9242700729927007 }, { "epoch": 0.3320675105485232, "grad_norm": 0.9453125, "learning_rate": 9.462302086571081e-07, "loss": 0.2536572813987732, "step": 1574, "token_acc": 0.9272577545978589 }, { "epoch": 0.3322784810126582, "grad_norm": 0.91796875, "learning_rate": 9.461539047146672e-07, "loss": 0.2897554039955139, "step": 1575, "token_acc": 0.9191452497834248 }, { "epoch": 0.33248945147679326, "grad_norm": 0.78515625, "learning_rate": 9.460775497507512e-07, "loss": 0.2721807062625885, "step": 1576, "token_acc": 0.9249371859296482 }, { "epoch": 0.33270042194092825, "grad_norm": 0.7265625, "learning_rate": 9.460011437740916e-07, "loss": 0.2651718556880951, "step": 1577, "token_acc": 0.9248257047590179 }, { "epoch": 0.3329113924050633, "grad_norm": 0.6328125, "learning_rate": 9.459246867934263e-07, "loss": 0.2130395919084549, "step": 1578, "token_acc": 0.9348395546823838 }, { "epoch": 0.33312236286919833, "grad_norm": 0.7109375, "learning_rate": 9.45848178817499e-07, "loss": 0.25840145349502563, "step": 1579, "token_acc": 0.9321983273596177 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5703125, "learning_rate": 9.457716198550586e-07, "loss": 0.24820664525032043, "step": 1580, "token_acc": 0.9322164948453608 }, { "epoch": 0.33354430379746836, "grad_norm": 0.6328125, "learning_rate": 9.456950099148606e-07, "loss": 0.23400051891803741, "step": 1581, "token_acc": 0.9332925336597307 }, { "epoch": 0.3337552742616034, "grad_norm": 0.80859375, "learning_rate": 9.456183490056659e-07, "loss": 0.27070870995521545, "step": 1582, "token_acc": 0.9294269990288119 }, { "epoch": 0.3339662447257384, "grad_norm": 0.66015625, "learning_rate": 9.455416371362413e-07, "loss": 0.26411256194114685, "step": 1583, "token_acc": 0.9285919128190422 }, { "epoch": 0.3341772151898734, "grad_norm": 1.1171875, "learning_rate": 9.454648743153593e-07, "loss": 0.3222864866256714, "step": 1584, "token_acc": 0.9186418962203715 }, { "epoch": 0.33438818565400846, "grad_norm": 0.64453125, "learning_rate": 9.453880605517986e-07, "loss": 0.25822392106056213, "step": 1585, "token_acc": 0.9284223083805547 }, { "epoch": 0.33459915611814345, "grad_norm": 0.9453125, "learning_rate": 9.453111958543436e-07, "loss": 0.26547372341156006, "step": 1586, "token_acc": 0.9233236151603499 }, { "epoch": 0.3348101265822785, "grad_norm": 0.82421875, "learning_rate": 9.452342802317841e-07, "loss": 0.2866516411304474, "step": 1587, "token_acc": 0.9244858611825193 }, { "epoch": 0.33502109704641353, "grad_norm": 0.6015625, "learning_rate": 9.451573136929163e-07, "loss": 0.25004857778549194, "step": 1588, "token_acc": 0.9289326590364653 }, { "epoch": 0.3352320675105485, "grad_norm": 0.7109375, "learning_rate": 9.450802962465418e-07, "loss": 0.2522854804992676, "step": 1589, "token_acc": 0.9273840769903762 }, { "epoch": 0.33544303797468356, "grad_norm": 0.78125, "learning_rate": 9.450032279014686e-07, "loss": 0.320743203163147, "step": 1590, "token_acc": 0.9178940770918207 }, { "epoch": 0.33565400843881854, "grad_norm": 0.7890625, "learning_rate": 9.449261086665095e-07, "loss": 0.2889062762260437, "step": 1591, "token_acc": 0.930829420970266 }, { "epoch": 0.3358649789029536, "grad_norm": 0.85546875, "learning_rate": 9.448489385504842e-07, "loss": 0.2749912738800049, "step": 1592, "token_acc": 0.9262124367747694 }, { "epoch": 0.3360759493670886, "grad_norm": 0.76171875, "learning_rate": 9.447717175622175e-07, "loss": 0.3473696708679199, "step": 1593, "token_acc": 0.9091406677613574 }, { "epoch": 0.3362869198312236, "grad_norm": 0.65625, "learning_rate": 9.446944457105405e-07, "loss": 0.26022714376449585, "step": 1594, "token_acc": 0.9322671683913453 }, { "epoch": 0.33649789029535865, "grad_norm": 0.75, "learning_rate": 9.446171230042897e-07, "loss": 0.22268863022327423, "step": 1595, "token_acc": 0.9359122401847575 }, { "epoch": 0.3367088607594937, "grad_norm": 0.8203125, "learning_rate": 9.445397494523077e-07, "loss": 0.30218058824539185, "step": 1596, "token_acc": 0.9221233312142403 }, { "epoch": 0.3369198312236287, "grad_norm": 0.7890625, "learning_rate": 9.444623250634427e-07, "loss": 0.2494884729385376, "step": 1597, "token_acc": 0.930500917912405 }, { "epoch": 0.3371308016877637, "grad_norm": 0.71875, "learning_rate": 9.44384849846549e-07, "loss": 0.2274206578731537, "step": 1598, "token_acc": 0.9355882352941176 }, { "epoch": 0.33734177215189876, "grad_norm": 0.59765625, "learning_rate": 9.443073238104865e-07, "loss": 0.26012539863586426, "step": 1599, "token_acc": 0.930809804529941 }, { "epoch": 0.33755274261603374, "grad_norm": 0.73046875, "learning_rate": 9.44229746964121e-07, "loss": 0.23634777963161469, "step": 1600, "token_acc": 0.9323394495412844 }, { "epoch": 0.33755274261603374, "eval_loss": 0.4337185323238373, "eval_runtime": 245.7809, "eval_samples_per_second": 137.134, "eval_steps_per_second": 2.144, "eval_token_acc": 0.8991530557300548, "step": 1600 }, { "epoch": 0.3377637130801688, "grad_norm": 0.67578125, "learning_rate": 9.441521193163238e-07, "loss": 0.23944209516048431, "step": 1601, "token_acc": 0.927382319173364 }, { "epoch": 0.3379746835443038, "grad_norm": 0.80078125, "learning_rate": 9.440744408759727e-07, "loss": 0.24298495054244995, "step": 1602, "token_acc": 0.9336686576548255 }, { "epoch": 0.3381856540084388, "grad_norm": 0.76953125, "learning_rate": 9.439967116519505e-07, "loss": 0.2587997019290924, "step": 1603, "token_acc": 0.9240464344941957 }, { "epoch": 0.33839662447257385, "grad_norm": 0.71875, "learning_rate": 9.439189316531464e-07, "loss": 0.2167397290468216, "step": 1604, "token_acc": 0.9390922401171303 }, { "epoch": 0.33860759493670883, "grad_norm": 0.6640625, "learning_rate": 9.438411008884553e-07, "loss": 0.2602764368057251, "step": 1605, "token_acc": 0.9219173952366596 }, { "epoch": 0.3388185654008439, "grad_norm": 0.64453125, "learning_rate": 9.437632193667775e-07, "loss": 0.23749463260173798, "step": 1606, "token_acc": 0.934610705596107 }, { "epoch": 0.3390295358649789, "grad_norm": 0.71875, "learning_rate": 9.436852870970196e-07, "loss": 0.2818450331687927, "step": 1607, "token_acc": 0.9215006305170239 }, { "epoch": 0.3392405063291139, "grad_norm": 0.71484375, "learning_rate": 9.436073040880939e-07, "loss": 0.2736986577510834, "step": 1608, "token_acc": 0.9178852643419573 }, { "epoch": 0.33945147679324894, "grad_norm": 0.609375, "learning_rate": 9.435292703489184e-07, "loss": 0.24205069243907928, "step": 1609, "token_acc": 0.9281653746770026 }, { "epoch": 0.339662447257384, "grad_norm": 0.96875, "learning_rate": 9.434511858884167e-07, "loss": 0.23683682084083557, "step": 1610, "token_acc": 0.9266888821569221 }, { "epoch": 0.33987341772151897, "grad_norm": 0.70703125, "learning_rate": 9.433730507155184e-07, "loss": 0.24786365032196045, "step": 1611, "token_acc": 0.9293495175848117 }, { "epoch": 0.340084388185654, "grad_norm": 0.72265625, "learning_rate": 9.432948648391593e-07, "loss": 0.2611381411552429, "step": 1612, "token_acc": 0.9250978200111795 }, { "epoch": 0.34029535864978905, "grad_norm": 0.6953125, "learning_rate": 9.432166282682803e-07, "loss": 0.2261401265859604, "step": 1613, "token_acc": 0.9268707482993197 }, { "epoch": 0.34050632911392403, "grad_norm": 0.69140625, "learning_rate": 9.431383410118286e-07, "loss": 0.2952437996864319, "step": 1614, "token_acc": 0.9218795888399413 }, { "epoch": 0.3407172995780591, "grad_norm": 0.73828125, "learning_rate": 9.430600030787568e-07, "loss": 0.2706619203090668, "step": 1615, "token_acc": 0.9214936783299029 }, { "epoch": 0.3409282700421941, "grad_norm": 0.75390625, "learning_rate": 9.429816144780236e-07, "loss": 0.28954198956489563, "step": 1616, "token_acc": 0.9216836734693877 }, { "epoch": 0.3411392405063291, "grad_norm": 0.6171875, "learning_rate": 9.429031752185936e-07, "loss": 0.23225362598896027, "step": 1617, "token_acc": 0.9354280772556933 }, { "epoch": 0.34135021097046414, "grad_norm": 1.2109375, "learning_rate": 9.428246853094366e-07, "loss": 0.2903389632701874, "step": 1618, "token_acc": 0.9204334365325078 }, { "epoch": 0.3415611814345992, "grad_norm": 0.6796875, "learning_rate": 9.427461447595288e-07, "loss": 0.2608135938644409, "step": 1619, "token_acc": 0.9234987661091308 }, { "epoch": 0.34177215189873417, "grad_norm": 0.91796875, "learning_rate": 9.426675535778522e-07, "loss": 0.3057047724723816, "step": 1620, "token_acc": 0.9140136864028563 }, { "epoch": 0.3419831223628692, "grad_norm": 0.62109375, "learning_rate": 9.425889117733939e-07, "loss": 0.2640751600265503, "step": 1621, "token_acc": 0.9277286135693216 }, { "epoch": 0.3421940928270042, "grad_norm": 0.62109375, "learning_rate": 9.425102193551477e-07, "loss": 0.28919440507888794, "step": 1622, "token_acc": 0.9247822644497229 }, { "epoch": 0.34240506329113923, "grad_norm": 0.7734375, "learning_rate": 9.424314763321124e-07, "loss": 0.2618682384490967, "step": 1623, "token_acc": 0.9260844748858448 }, { "epoch": 0.3426160337552743, "grad_norm": 0.7109375, "learning_rate": 9.423526827132931e-07, "loss": 0.2545069754123688, "step": 1624, "token_acc": 0.9270595897101921 }, { "epoch": 0.34282700421940926, "grad_norm": 0.69140625, "learning_rate": 9.422738385077005e-07, "loss": 0.27819639444351196, "step": 1625, "token_acc": 0.9325095057034221 }, { "epoch": 0.3430379746835443, "grad_norm": 0.7421875, "learning_rate": 9.421949437243511e-07, "loss": 0.2696912884712219, "step": 1626, "token_acc": 0.9227120535714286 }, { "epoch": 0.34324894514767934, "grad_norm": 0.67578125, "learning_rate": 9.421159983722671e-07, "loss": 0.23221710324287415, "step": 1627, "token_acc": 0.9322323462414579 }, { "epoch": 0.3434599156118143, "grad_norm": 0.70703125, "learning_rate": 9.420370024604767e-07, "loss": 0.21929380297660828, "step": 1628, "token_acc": 0.9286370597243492 }, { "epoch": 0.34367088607594937, "grad_norm": 0.81640625, "learning_rate": 9.419579559980136e-07, "loss": 0.2439175844192505, "step": 1629, "token_acc": 0.9296130117779025 }, { "epoch": 0.3438818565400844, "grad_norm": 0.80859375, "learning_rate": 9.418788589939177e-07, "loss": 0.23102378845214844, "step": 1630, "token_acc": 0.9315107913669065 }, { "epoch": 0.3440928270042194, "grad_norm": 0.703125, "learning_rate": 9.417997114572342e-07, "loss": 0.2606005072593689, "step": 1631, "token_acc": 0.9257028112449799 }, { "epoch": 0.34430379746835443, "grad_norm": 0.6328125, "learning_rate": 9.417205133970143e-07, "loss": 0.24455326795578003, "step": 1632, "token_acc": 0.9317640835757736 }, { "epoch": 0.3445147679324895, "grad_norm": 0.73828125, "learning_rate": 9.41641264822315e-07, "loss": 0.2429400384426117, "step": 1633, "token_acc": 0.9239130434782609 }, { "epoch": 0.34472573839662446, "grad_norm": 1.34375, "learning_rate": 9.415619657421991e-07, "loss": 0.2420717179775238, "step": 1634, "token_acc": 0.9310043668122271 }, { "epoch": 0.3449367088607595, "grad_norm": 0.65234375, "learning_rate": 9.41482616165735e-07, "loss": 0.27381402254104614, "step": 1635, "token_acc": 0.9236192714453584 }, { "epoch": 0.34514767932489454, "grad_norm": 1.5859375, "learning_rate": 9.41403216101997e-07, "loss": 0.24875643849372864, "step": 1636, "token_acc": 0.931766704416761 }, { "epoch": 0.3453586497890295, "grad_norm": 1.015625, "learning_rate": 9.413237655600654e-07, "loss": 0.2728778123855591, "step": 1637, "token_acc": 0.9263157894736842 }, { "epoch": 0.34556962025316457, "grad_norm": 0.7109375, "learning_rate": 9.412442645490257e-07, "loss": 0.30332183837890625, "step": 1638, "token_acc": 0.9168779938010707 }, { "epoch": 0.34578059071729955, "grad_norm": 0.78125, "learning_rate": 9.411647130779699e-07, "loss": 0.26190561056137085, "step": 1639, "token_acc": 0.9347617524784139 }, { "epoch": 0.3459915611814346, "grad_norm": 0.90234375, "learning_rate": 9.41085111155995e-07, "loss": 0.2752026915550232, "step": 1640, "token_acc": 0.9264008921103987 }, { "epoch": 0.34620253164556963, "grad_norm": 0.78515625, "learning_rate": 9.410054587922043e-07, "loss": 0.26243409514427185, "step": 1641, "token_acc": 0.928486646884273 }, { "epoch": 0.3464135021097046, "grad_norm": 0.84765625, "learning_rate": 9.409257559957069e-07, "loss": 0.2628988027572632, "step": 1642, "token_acc": 0.9306260575296108 }, { "epoch": 0.34662447257383966, "grad_norm": 0.640625, "learning_rate": 9.408460027756172e-07, "loss": 0.2401159405708313, "step": 1643, "token_acc": 0.9343711843711844 }, { "epoch": 0.3468354430379747, "grad_norm": 0.80859375, "learning_rate": 9.407661991410558e-07, "loss": 0.2749597132205963, "step": 1644, "token_acc": 0.9255230125523013 }, { "epoch": 0.3470464135021097, "grad_norm": 0.88671875, "learning_rate": 9.40686345101149e-07, "loss": 0.2682425379753113, "step": 1645, "token_acc": 0.9265940902021773 }, { "epoch": 0.3472573839662447, "grad_norm": 0.62890625, "learning_rate": 9.406064406650287e-07, "loss": 0.2411586046218872, "step": 1646, "token_acc": 0.9323656578134651 }, { "epoch": 0.34746835443037977, "grad_norm": 0.66015625, "learning_rate": 9.405264858418326e-07, "loss": 0.21946536004543304, "step": 1647, "token_acc": 0.9317245438493231 }, { "epoch": 0.34767932489451475, "grad_norm": 0.9375, "learning_rate": 9.404464806407042e-07, "loss": 0.296546071767807, "step": 1648, "token_acc": 0.9222361024359775 }, { "epoch": 0.3478902953586498, "grad_norm": 0.59375, "learning_rate": 9.40366425070793e-07, "loss": 0.20803073048591614, "step": 1649, "token_acc": 0.9425218176346675 }, { "epoch": 0.34810126582278483, "grad_norm": 0.83984375, "learning_rate": 9.402863191412537e-07, "loss": 0.3020900785923004, "step": 1650, "token_acc": 0.9220455254131588 }, { "epoch": 0.3483122362869198, "grad_norm": 0.6484375, "learning_rate": 9.402061628612472e-07, "loss": 0.25674012303352356, "step": 1651, "token_acc": 0.9310624493106245 }, { "epoch": 0.34852320675105486, "grad_norm": 0.76171875, "learning_rate": 9.401259562399403e-07, "loss": 0.28478553891181946, "step": 1652, "token_acc": 0.9299287410926366 }, { "epoch": 0.3487341772151899, "grad_norm": 0.80859375, "learning_rate": 9.40045699286505e-07, "loss": 0.23817691206932068, "step": 1653, "token_acc": 0.9281288723667905 }, { "epoch": 0.3489451476793249, "grad_norm": 0.6171875, "learning_rate": 9.399653920101195e-07, "loss": 0.22525762021541595, "step": 1654, "token_acc": 0.9331259720062208 }, { "epoch": 0.3491561181434599, "grad_norm": 0.70703125, "learning_rate": 9.398850344199675e-07, "loss": 0.23875603079795837, "step": 1655, "token_acc": 0.930642750373692 }, { "epoch": 0.3493670886075949, "grad_norm": 0.921875, "learning_rate": 9.398046265252388e-07, "loss": 0.25053465366363525, "step": 1656, "token_acc": 0.9324946302546794 }, { "epoch": 0.34957805907172995, "grad_norm": 0.69921875, "learning_rate": 9.397241683351285e-07, "loss": 0.2564641833305359, "step": 1657, "token_acc": 0.9294522512188127 }, { "epoch": 0.349789029535865, "grad_norm": 0.78125, "learning_rate": 9.396436598588378e-07, "loss": 0.2557925283908844, "step": 1658, "token_acc": 0.9266431924882629 }, { "epoch": 0.35, "grad_norm": 0.73046875, "learning_rate": 9.395631011055734e-07, "loss": 0.26249969005584717, "step": 1659, "token_acc": 0.9319962394233783 }, { "epoch": 0.350210970464135, "grad_norm": 0.6953125, "learning_rate": 9.394824920845481e-07, "loss": 0.23513028025627136, "step": 1660, "token_acc": 0.9312829038514824 }, { "epoch": 0.35042194092827006, "grad_norm": 0.7734375, "learning_rate": 9.394018328049799e-07, "loss": 0.2548867464065552, "step": 1661, "token_acc": 0.9336269267065115 }, { "epoch": 0.35063291139240504, "grad_norm": 0.7734375, "learning_rate": 9.393211232760932e-07, "loss": 0.23487508296966553, "step": 1662, "token_acc": 0.9349162011173184 }, { "epoch": 0.3508438818565401, "grad_norm": 0.7421875, "learning_rate": 9.392403635071176e-07, "loss": 0.260843962430954, "step": 1663, "token_acc": 0.9274122159929182 }, { "epoch": 0.3510548523206751, "grad_norm": 0.796875, "learning_rate": 9.391595535072887e-07, "loss": 0.2631385922431946, "step": 1664, "token_acc": 0.9209509658246656 }, { "epoch": 0.3512658227848101, "grad_norm": 0.8359375, "learning_rate": 9.390786932858479e-07, "loss": 0.23830120265483856, "step": 1665, "token_acc": 0.9282700421940928 }, { "epoch": 0.35147679324894515, "grad_norm": 0.6953125, "learning_rate": 9.389977828520421e-07, "loss": 0.26736193895339966, "step": 1666, "token_acc": 0.9238440616500453 }, { "epoch": 0.3516877637130802, "grad_norm": 0.69921875, "learning_rate": 9.389168222151243e-07, "loss": 0.27793848514556885, "step": 1667, "token_acc": 0.9270031365839749 }, { "epoch": 0.3518987341772152, "grad_norm": 0.57421875, "learning_rate": 9.388358113843529e-07, "loss": 0.25020474195480347, "step": 1668, "token_acc": 0.9275283937263386 }, { "epoch": 0.3521097046413502, "grad_norm": 0.85546875, "learning_rate": 9.387547503689921e-07, "loss": 0.2771265506744385, "step": 1669, "token_acc": 0.9255952380952381 }, { "epoch": 0.35232067510548526, "grad_norm": 0.76953125, "learning_rate": 9.386736391783121e-07, "loss": 0.29730749130249023, "step": 1670, "token_acc": 0.9246809835045129 }, { "epoch": 0.35253164556962024, "grad_norm": 0.67578125, "learning_rate": 9.385924778215885e-07, "loss": 0.2390887439250946, "step": 1671, "token_acc": 0.93340922026181 }, { "epoch": 0.3527426160337553, "grad_norm": 0.671875, "learning_rate": 9.385112663081028e-07, "loss": 0.2280537337064743, "step": 1672, "token_acc": 0.9354485776805251 }, { "epoch": 0.35295358649789027, "grad_norm": 0.80078125, "learning_rate": 9.384300046471424e-07, "loss": 0.3002857565879822, "step": 1673, "token_acc": 0.9189271563547698 }, { "epoch": 0.3531645569620253, "grad_norm": 0.8828125, "learning_rate": 9.38348692848e-07, "loss": 0.2538878917694092, "step": 1674, "token_acc": 0.9297851875601154 }, { "epoch": 0.35337552742616035, "grad_norm": 0.703125, "learning_rate": 9.382673309199745e-07, "loss": 0.22456809878349304, "step": 1675, "token_acc": 0.9339071626191208 }, { "epoch": 0.35358649789029534, "grad_norm": 0.71875, "learning_rate": 9.381859188723702e-07, "loss": 0.25327855348587036, "step": 1676, "token_acc": 0.929019929019929 }, { "epoch": 0.3537974683544304, "grad_norm": 0.65234375, "learning_rate": 9.381044567144973e-07, "loss": 0.24936312437057495, "step": 1677, "token_acc": 0.9296285953644233 }, { "epoch": 0.3540084388185654, "grad_norm": 0.72265625, "learning_rate": 9.380229444556717e-07, "loss": 0.231843501329422, "step": 1678, "token_acc": 0.9309262166405023 }, { "epoch": 0.3542194092827004, "grad_norm": 0.81640625, "learning_rate": 9.379413821052151e-07, "loss": 0.2699410319328308, "step": 1679, "token_acc": 0.9276815557581283 }, { "epoch": 0.35443037974683544, "grad_norm": 0.75, "learning_rate": 9.378597696724546e-07, "loss": 0.24168401956558228, "step": 1680, "token_acc": 0.9271465741543798 }, { "epoch": 0.3546413502109705, "grad_norm": 0.85546875, "learning_rate": 9.377781071667235e-07, "loss": 0.27143609523773193, "step": 1681, "token_acc": 0.9236213506340313 }, { "epoch": 0.35485232067510547, "grad_norm": 0.63671875, "learning_rate": 9.376963945973606e-07, "loss": 0.2371048629283905, "step": 1682, "token_acc": 0.9340296866410115 }, { "epoch": 0.3550632911392405, "grad_norm": 0.7265625, "learning_rate": 9.376146319737102e-07, "loss": 0.2859129309654236, "step": 1683, "token_acc": 0.9200359389038635 }, { "epoch": 0.35527426160337555, "grad_norm": 0.8828125, "learning_rate": 9.375328193051227e-07, "loss": 0.28455692529678345, "step": 1684, "token_acc": 0.9202853598014888 }, { "epoch": 0.35548523206751054, "grad_norm": 0.87109375, "learning_rate": 9.374509566009542e-07, "loss": 0.2839798331260681, "step": 1685, "token_acc": 0.9208261617900172 }, { "epoch": 0.3556962025316456, "grad_norm": 0.91796875, "learning_rate": 9.373690438705661e-07, "loss": 0.2920804023742676, "step": 1686, "token_acc": 0.9253557943653791 }, { "epoch": 0.35590717299578056, "grad_norm": 0.74609375, "learning_rate": 9.372870811233261e-07, "loss": 0.29054540395736694, "step": 1687, "token_acc": 0.9222520107238605 }, { "epoch": 0.3561181434599156, "grad_norm": 0.69140625, "learning_rate": 9.372050683686071e-07, "loss": 0.2947998046875, "step": 1688, "token_acc": 0.918534718425369 }, { "epoch": 0.35632911392405064, "grad_norm": 0.765625, "learning_rate": 9.371230056157882e-07, "loss": 0.2592851519584656, "step": 1689, "token_acc": 0.9262319268220714 }, { "epoch": 0.35654008438818563, "grad_norm": 0.7265625, "learning_rate": 9.370408928742537e-07, "loss": 0.26862451434135437, "step": 1690, "token_acc": 0.9255605381165919 }, { "epoch": 0.35675105485232067, "grad_norm": 1.1875, "learning_rate": 9.369587301533941e-07, "loss": 0.27907925844192505, "step": 1691, "token_acc": 0.9208823529411765 }, { "epoch": 0.3569620253164557, "grad_norm": 1.1015625, "learning_rate": 9.368765174626052e-07, "loss": 0.27880504727363586, "step": 1692, "token_acc": 0.9164239953407105 }, { "epoch": 0.3571729957805907, "grad_norm": 0.6484375, "learning_rate": 9.367942548112889e-07, "loss": 0.22724300622940063, "step": 1693, "token_acc": 0.9299659126123334 }, { "epoch": 0.35738396624472574, "grad_norm": 0.63671875, "learning_rate": 9.367119422088526e-07, "loss": 0.2403573840856552, "step": 1694, "token_acc": 0.9322590271560728 }, { "epoch": 0.3575949367088608, "grad_norm": 0.59375, "learning_rate": 9.366295796647093e-07, "loss": 0.22870095074176788, "step": 1695, "token_acc": 0.9367552703941339 }, { "epoch": 0.35780590717299576, "grad_norm": 0.7578125, "learning_rate": 9.365471671882781e-07, "loss": 0.28150975704193115, "step": 1696, "token_acc": 0.9182144997004195 }, { "epoch": 0.3580168776371308, "grad_norm": 0.80859375, "learning_rate": 9.364647047889833e-07, "loss": 0.31994009017944336, "step": 1697, "token_acc": 0.9140805334701205 }, { "epoch": 0.35822784810126584, "grad_norm": 0.7734375, "learning_rate": 9.363821924762554e-07, "loss": 0.2513180077075958, "step": 1698, "token_acc": 0.9310533515731874 }, { "epoch": 0.35843881856540083, "grad_norm": 0.72265625, "learning_rate": 9.362996302595303e-07, "loss": 0.29430505633354187, "step": 1699, "token_acc": 0.9234194122885129 }, { "epoch": 0.35864978902953587, "grad_norm": 0.75, "learning_rate": 9.362170181482496e-07, "loss": 0.2608153820037842, "step": 1700, "token_acc": 0.9316065192083819 }, { "epoch": 0.3588607594936709, "grad_norm": 0.96875, "learning_rate": 9.361343561518608e-07, "loss": 0.27800315618515015, "step": 1701, "token_acc": 0.9215134459036898 }, { "epoch": 0.3590717299578059, "grad_norm": 0.77734375, "learning_rate": 9.36051644279817e-07, "loss": 0.26527389883995056, "step": 1702, "token_acc": 0.9273917108133375 }, { "epoch": 0.35928270042194094, "grad_norm": 0.7421875, "learning_rate": 9.359688825415768e-07, "loss": 0.24779286980628967, "step": 1703, "token_acc": 0.930368636629608 }, { "epoch": 0.3594936708860759, "grad_norm": 0.66796875, "learning_rate": 9.35886070946605e-07, "loss": 0.2481088787317276, "step": 1704, "token_acc": 0.936046511627907 }, { "epoch": 0.35970464135021096, "grad_norm": 0.6796875, "learning_rate": 9.358032095043716e-07, "loss": 0.23535574972629547, "step": 1705, "token_acc": 0.9337689337689338 }, { "epoch": 0.359915611814346, "grad_norm": 0.8359375, "learning_rate": 9.357202982243526e-07, "loss": 0.29152315855026245, "step": 1706, "token_acc": 0.9170403587443946 }, { "epoch": 0.360126582278481, "grad_norm": 0.578125, "learning_rate": 9.356373371160298e-07, "loss": 0.21768781542778015, "step": 1707, "token_acc": 0.939873417721519 }, { "epoch": 0.36033755274261603, "grad_norm": 0.70703125, "learning_rate": 9.3555432618889e-07, "loss": 0.2704032063484192, "step": 1708, "token_acc": 0.9234184239733629 }, { "epoch": 0.36054852320675107, "grad_norm": 0.71484375, "learning_rate": 9.354712654524267e-07, "loss": 0.24188432097434998, "step": 1709, "token_acc": 0.9323047858942065 }, { "epoch": 0.36075949367088606, "grad_norm": 0.6484375, "learning_rate": 9.353881549161383e-07, "loss": 0.26301664113998413, "step": 1710, "token_acc": 0.9259364358683314 }, { "epoch": 0.3609704641350211, "grad_norm": 0.6484375, "learning_rate": 9.353049945895293e-07, "loss": 0.24812474846839905, "step": 1711, "token_acc": 0.9318505845451953 }, { "epoch": 0.36118143459915614, "grad_norm": 0.70703125, "learning_rate": 9.352217844821098e-07, "loss": 0.2484617680311203, "step": 1712, "token_acc": 0.9366929133858267 }, { "epoch": 0.3613924050632911, "grad_norm": 0.73046875, "learning_rate": 9.351385246033956e-07, "loss": 0.23907536268234253, "step": 1713, "token_acc": 0.9324242424242424 }, { "epoch": 0.36160337552742616, "grad_norm": 0.6171875, "learning_rate": 9.35055214962908e-07, "loss": 0.23805205523967743, "step": 1714, "token_acc": 0.9346926713947991 }, { "epoch": 0.3618143459915612, "grad_norm": 0.578125, "learning_rate": 9.349718555701744e-07, "loss": 0.2235066294670105, "step": 1715, "token_acc": 0.9360208062418726 }, { "epoch": 0.3620253164556962, "grad_norm": 0.96875, "learning_rate": 9.348884464347275e-07, "loss": 0.28189563751220703, "step": 1716, "token_acc": 0.9183735860593091 }, { "epoch": 0.36223628691983123, "grad_norm": 0.91015625, "learning_rate": 9.348049875661059e-07, "loss": 0.28244683146476746, "step": 1717, "token_acc": 0.9196454103517301 }, { "epoch": 0.36244725738396627, "grad_norm": 0.6875, "learning_rate": 9.347214789738538e-07, "loss": 0.3043467700481415, "step": 1718, "token_acc": 0.9222814164838609 }, { "epoch": 0.36265822784810126, "grad_norm": 0.95703125, "learning_rate": 9.346379206675211e-07, "loss": 0.2614938020706177, "step": 1719, "token_acc": 0.9301948051948052 }, { "epoch": 0.3628691983122363, "grad_norm": 0.85546875, "learning_rate": 9.345543126566635e-07, "loss": 0.2756979167461395, "step": 1720, "token_acc": 0.9246906939214632 }, { "epoch": 0.3630801687763713, "grad_norm": 0.8671875, "learning_rate": 9.344706549508421e-07, "loss": 0.28513869643211365, "step": 1721, "token_acc": 0.9201053555750659 }, { "epoch": 0.3632911392405063, "grad_norm": 0.86328125, "learning_rate": 9.343869475596241e-07, "loss": 0.2887800931930542, "step": 1722, "token_acc": 0.9226091763405196 }, { "epoch": 0.36350210970464136, "grad_norm": 0.640625, "learning_rate": 9.34303190492582e-07, "loss": 0.23534713685512543, "step": 1723, "token_acc": 0.935820895522388 }, { "epoch": 0.36371308016877635, "grad_norm": 0.70703125, "learning_rate": 9.342193837592941e-07, "loss": 0.30718207359313965, "step": 1724, "token_acc": 0.9156766154737758 }, { "epoch": 0.3639240506329114, "grad_norm": 0.76171875, "learning_rate": 9.341355273693446e-07, "loss": 0.2970879077911377, "step": 1725, "token_acc": 0.9235145385587863 }, { "epoch": 0.36413502109704643, "grad_norm": 0.84765625, "learning_rate": 9.340516213323228e-07, "loss": 0.2779674530029297, "step": 1726, "token_acc": 0.926529357516139 }, { "epoch": 0.3643459915611814, "grad_norm": 0.67578125, "learning_rate": 9.339676656578245e-07, "loss": 0.301363468170166, "step": 1727, "token_acc": 0.9251336898395722 }, { "epoch": 0.36455696202531646, "grad_norm": 0.5859375, "learning_rate": 9.338836603554505e-07, "loss": 0.25844764709472656, "step": 1728, "token_acc": 0.9302995391705069 }, { "epoch": 0.3647679324894515, "grad_norm": 0.6875, "learning_rate": 9.337996054348076e-07, "loss": 0.2536547780036926, "step": 1729, "token_acc": 0.9244303432362273 }, { "epoch": 0.3649789029535865, "grad_norm": 0.6015625, "learning_rate": 9.337155009055081e-07, "loss": 0.2093231976032257, "step": 1730, "token_acc": 0.9381860196418256 }, { "epoch": 0.3651898734177215, "grad_norm": 0.734375, "learning_rate": 9.336313467771701e-07, "loss": 0.2366497814655304, "step": 1731, "token_acc": 0.9375562894025818 }, { "epoch": 0.36540084388185656, "grad_norm": 0.61328125, "learning_rate": 9.335471430594175e-07, "loss": 0.23666155338287354, "step": 1732, "token_acc": 0.9285078611687927 }, { "epoch": 0.36561181434599155, "grad_norm": 0.69140625, "learning_rate": 9.334628897618797e-07, "loss": 0.2761836349964142, "step": 1733, "token_acc": 0.9200749297533563 }, { "epoch": 0.3658227848101266, "grad_norm": 0.69921875, "learning_rate": 9.333785868941915e-07, "loss": 0.22819164395332336, "step": 1734, "token_acc": 0.9270286047869235 }, { "epoch": 0.36603375527426163, "grad_norm": 0.765625, "learning_rate": 9.332942344659938e-07, "loss": 0.25955134630203247, "step": 1735, "token_acc": 0.9318693693693694 }, { "epoch": 0.3662447257383966, "grad_norm": 0.74609375, "learning_rate": 9.332098324869329e-07, "loss": 0.24414655566215515, "step": 1736, "token_acc": 0.9310754604872252 }, { "epoch": 0.36645569620253166, "grad_norm": 0.6953125, "learning_rate": 9.331253809666611e-07, "loss": 0.27255892753601074, "step": 1737, "token_acc": 0.9247685185185185 }, { "epoch": 0.36666666666666664, "grad_norm": 0.671875, "learning_rate": 9.330408799148362e-07, "loss": 0.2682799994945526, "step": 1738, "token_acc": 0.9308393586922351 }, { "epoch": 0.3668776371308017, "grad_norm": 0.90625, "learning_rate": 9.329563293411211e-07, "loss": 0.2974281311035156, "step": 1739, "token_acc": 0.9198951354500436 }, { "epoch": 0.3670886075949367, "grad_norm": 0.73828125, "learning_rate": 9.328717292551855e-07, "loss": 0.22290384769439697, "step": 1740, "token_acc": 0.9367441860465117 }, { "epoch": 0.3672995780590717, "grad_norm": 0.65234375, "learning_rate": 9.327870796667038e-07, "loss": 0.22769173979759216, "step": 1741, "token_acc": 0.933920704845815 }, { "epoch": 0.36751054852320675, "grad_norm": 0.703125, "learning_rate": 9.327023805853564e-07, "loss": 0.2551524043083191, "step": 1742, "token_acc": 0.9279952195996415 }, { "epoch": 0.3677215189873418, "grad_norm": 0.67578125, "learning_rate": 9.326176320208296e-07, "loss": 0.21800342202186584, "step": 1743, "token_acc": 0.9341448189762797 }, { "epoch": 0.3679324894514768, "grad_norm": 0.75, "learning_rate": 9.325328339828147e-07, "loss": 0.29383134841918945, "step": 1744, "token_acc": 0.9246655031995347 }, { "epoch": 0.3681434599156118, "grad_norm": 0.66796875, "learning_rate": 9.324479864810094e-07, "loss": 0.24657410383224487, "step": 1745, "token_acc": 0.9288770053475935 }, { "epoch": 0.36835443037974686, "grad_norm": 0.77734375, "learning_rate": 9.323630895251167e-07, "loss": 0.24168427288532257, "step": 1746, "token_acc": 0.9318403115871471 }, { "epoch": 0.36856540084388184, "grad_norm": 0.78515625, "learning_rate": 9.322781431248452e-07, "loss": 0.2549583315849304, "step": 1747, "token_acc": 0.9271541950113379 }, { "epoch": 0.3687763713080169, "grad_norm": 0.74609375, "learning_rate": 9.321931472899092e-07, "loss": 0.2550113797187805, "step": 1748, "token_acc": 0.9294592914853946 }, { "epoch": 0.3689873417721519, "grad_norm": 0.73828125, "learning_rate": 9.321081020300288e-07, "loss": 0.2872239947319031, "step": 1749, "token_acc": 0.9191132414619533 }, { "epoch": 0.3691983122362869, "grad_norm": 0.6640625, "learning_rate": 9.320230073549295e-07, "loss": 0.22348162531852722, "step": 1750, "token_acc": 0.9379411764705883 }, { "epoch": 0.36940928270042195, "grad_norm": 0.640625, "learning_rate": 9.319378632743429e-07, "loss": 0.25937214493751526, "step": 1751, "token_acc": 0.9258928571428572 }, { "epoch": 0.369620253164557, "grad_norm": 0.921875, "learning_rate": 9.318526697980056e-07, "loss": 0.277938574552536, "step": 1752, "token_acc": 0.9225372698041509 }, { "epoch": 0.369831223628692, "grad_norm": 1.1171875, "learning_rate": 9.317674269356604e-07, "loss": 0.3054434061050415, "step": 1753, "token_acc": 0.915146249637996 }, { "epoch": 0.370042194092827, "grad_norm": 0.75390625, "learning_rate": 9.316821346970554e-07, "loss": 0.2515072822570801, "step": 1754, "token_acc": 0.9270833333333334 }, { "epoch": 0.370253164556962, "grad_norm": 0.8046875, "learning_rate": 9.315967930919445e-07, "loss": 0.2604433298110962, "step": 1755, "token_acc": 0.9382022471910112 }, { "epoch": 0.37046413502109704, "grad_norm": 0.83984375, "learning_rate": 9.315114021300874e-07, "loss": 0.30326345562934875, "step": 1756, "token_acc": 0.9137931034482759 }, { "epoch": 0.3706751054852321, "grad_norm": 0.6796875, "learning_rate": 9.314259618212492e-07, "loss": 0.2592795491218567, "step": 1757, "token_acc": 0.9306569343065694 }, { "epoch": 0.37088607594936707, "grad_norm": 0.6328125, "learning_rate": 9.313404721752008e-07, "loss": 0.24161499738693237, "step": 1758, "token_acc": 0.9281354051054383 }, { "epoch": 0.3710970464135021, "grad_norm": 0.62890625, "learning_rate": 9.312549332017183e-07, "loss": 0.2770785093307495, "step": 1759, "token_acc": 0.9250313676286073 }, { "epoch": 0.37130801687763715, "grad_norm": 0.703125, "learning_rate": 9.311693449105844e-07, "loss": 0.2557133734226227, "step": 1760, "token_acc": 0.926200451176281 }, { "epoch": 0.37151898734177213, "grad_norm": 0.76171875, "learning_rate": 9.310837073115862e-07, "loss": 0.3002878427505493, "step": 1761, "token_acc": 0.9207248018120046 }, { "epoch": 0.3717299578059072, "grad_norm": 0.68359375, "learning_rate": 9.309980204145176e-07, "loss": 0.24686521291732788, "step": 1762, "token_acc": 0.9330046403712297 }, { "epoch": 0.3719409282700422, "grad_norm": 0.76953125, "learning_rate": 9.309122842291774e-07, "loss": 0.292415589094162, "step": 1763, "token_acc": 0.9288702928870293 }, { "epoch": 0.3721518987341772, "grad_norm": 0.7109375, "learning_rate": 9.308264987653703e-07, "loss": 0.25645536184310913, "step": 1764, "token_acc": 0.9317073170731708 }, { "epoch": 0.37236286919831224, "grad_norm": 0.65234375, "learning_rate": 9.307406640329065e-07, "loss": 0.22903116047382355, "step": 1765, "token_acc": 0.9329577464788732 }, { "epoch": 0.3725738396624473, "grad_norm": 0.6875, "learning_rate": 9.306547800416022e-07, "loss": 0.2557244300842285, "step": 1766, "token_acc": 0.9314949201741655 }, { "epoch": 0.37278481012658227, "grad_norm": 0.8125, "learning_rate": 9.305688468012787e-07, "loss": 0.19353802502155304, "step": 1767, "token_acc": 0.9413680781758957 }, { "epoch": 0.3729957805907173, "grad_norm": 1.171875, "learning_rate": 9.304828643217631e-07, "loss": 0.31406620144844055, "step": 1768, "token_acc": 0.9192907367777438 }, { "epoch": 0.37320675105485235, "grad_norm": 0.57421875, "learning_rate": 9.303968326128884e-07, "loss": 0.21769124269485474, "step": 1769, "token_acc": 0.9381910972497873 }, { "epoch": 0.37341772151898733, "grad_norm": 0.79296875, "learning_rate": 9.303107516844932e-07, "loss": 0.2673788070678711, "step": 1770, "token_acc": 0.9337557603686636 }, { "epoch": 0.3736286919831224, "grad_norm": 0.68359375, "learning_rate": 9.302246215464213e-07, "loss": 0.24331021308898926, "step": 1771, "token_acc": 0.9333902647309992 }, { "epoch": 0.37383966244725736, "grad_norm": 0.8828125, "learning_rate": 9.301384422085227e-07, "loss": 0.27998149394989014, "step": 1772, "token_acc": 0.9219777079165475 }, { "epoch": 0.3740506329113924, "grad_norm": 0.73828125, "learning_rate": 9.300522136806524e-07, "loss": 0.2714657187461853, "step": 1773, "token_acc": 0.9230088495575222 }, { "epoch": 0.37426160337552744, "grad_norm": 0.62890625, "learning_rate": 9.299659359726717e-07, "loss": 0.23993107676506042, "step": 1774, "token_acc": 0.9355118565644881 }, { "epoch": 0.3744725738396624, "grad_norm": 0.69921875, "learning_rate": 9.298796090944468e-07, "loss": 0.27614468336105347, "step": 1775, "token_acc": 0.9254237288135593 }, { "epoch": 0.37468354430379747, "grad_norm": 0.67578125, "learning_rate": 9.297932330558503e-07, "loss": 0.26223164796829224, "step": 1776, "token_acc": 0.9227665706051873 }, { "epoch": 0.3748945147679325, "grad_norm": 0.61328125, "learning_rate": 9.297068078667598e-07, "loss": 0.21317782998085022, "step": 1777, "token_acc": 0.9325064897605999 }, { "epoch": 0.3751054852320675, "grad_norm": 0.7421875, "learning_rate": 9.296203335370587e-07, "loss": 0.30340367555618286, "step": 1778, "token_acc": 0.9200648123143397 }, { "epoch": 0.37531645569620253, "grad_norm": 0.734375, "learning_rate": 9.295338100766364e-07, "loss": 0.21391162276268005, "step": 1779, "token_acc": 0.9396666666666667 }, { "epoch": 0.3755274261603376, "grad_norm": 0.625, "learning_rate": 9.294472374953872e-07, "loss": 0.2524837851524353, "step": 1780, "token_acc": 0.9336639801611903 }, { "epoch": 0.37573839662447256, "grad_norm": 0.78515625, "learning_rate": 9.293606158032117e-07, "loss": 0.24630481004714966, "step": 1781, "token_acc": 0.931282722513089 }, { "epoch": 0.3759493670886076, "grad_norm": 0.7109375, "learning_rate": 9.292739450100155e-07, "loss": 0.2903074026107788, "step": 1782, "token_acc": 0.9166461765429064 }, { "epoch": 0.37616033755274264, "grad_norm": 0.65625, "learning_rate": 9.291872251257107e-07, "loss": 0.2113291174173355, "step": 1783, "token_acc": 0.9350509930220076 }, { "epoch": 0.3763713080168776, "grad_norm": 0.62109375, "learning_rate": 9.291004561602138e-07, "loss": 0.2671849727630615, "step": 1784, "token_acc": 0.9261083743842364 }, { "epoch": 0.37658227848101267, "grad_norm": 1.1875, "learning_rate": 9.290136381234479e-07, "loss": 0.34464550018310547, "step": 1785, "token_acc": 0.9090909090909091 }, { "epoch": 0.37679324894514765, "grad_norm": 0.6484375, "learning_rate": 9.289267710253415e-07, "loss": 0.24422526359558105, "step": 1786, "token_acc": 0.9316384180790961 }, { "epoch": 0.3770042194092827, "grad_norm": 0.6484375, "learning_rate": 9.288398548758283e-07, "loss": 0.2332957684993744, "step": 1787, "token_acc": 0.9384615384615385 }, { "epoch": 0.37721518987341773, "grad_norm": 0.76171875, "learning_rate": 9.28752889684848e-07, "loss": 0.32483792304992676, "step": 1788, "token_acc": 0.9187627464309993 }, { "epoch": 0.3774261603375527, "grad_norm": 0.671875, "learning_rate": 9.286658754623458e-07, "loss": 0.2529968023300171, "step": 1789, "token_acc": 0.9290078556881001 }, { "epoch": 0.37763713080168776, "grad_norm": 0.6953125, "learning_rate": 9.285788122182728e-07, "loss": 0.21313020586967468, "step": 1790, "token_acc": 0.9386806160999709 }, { "epoch": 0.3778481012658228, "grad_norm": 0.79296875, "learning_rate": 9.284916999625849e-07, "loss": 0.28534191846847534, "step": 1791, "token_acc": 0.918232044198895 }, { "epoch": 0.3780590717299578, "grad_norm": 0.7578125, "learning_rate": 9.284045387052444e-07, "loss": 0.2752482295036316, "step": 1792, "token_acc": 0.9267692307692308 }, { "epoch": 0.3782700421940928, "grad_norm": 0.703125, "learning_rate": 9.283173284562189e-07, "loss": 0.23181575536727905, "step": 1793, "token_acc": 0.9338775510204081 }, { "epoch": 0.37848101265822787, "grad_norm": 0.69921875, "learning_rate": 9.282300692254818e-07, "loss": 0.2837159037590027, "step": 1794, "token_acc": 0.9179869524697111 }, { "epoch": 0.37869198312236285, "grad_norm": 0.74609375, "learning_rate": 9.281427610230117e-07, "loss": 0.2748425602912903, "step": 1795, "token_acc": 0.9209206255532606 }, { "epoch": 0.3789029535864979, "grad_norm": 0.73828125, "learning_rate": 9.280554038587931e-07, "loss": 0.27525874972343445, "step": 1796, "token_acc": 0.9241778319123021 }, { "epoch": 0.37911392405063293, "grad_norm": 0.765625, "learning_rate": 9.27967997742816e-07, "loss": 0.24282464385032654, "step": 1797, "token_acc": 0.9311178247734139 }, { "epoch": 0.3793248945147679, "grad_norm": 0.80078125, "learning_rate": 9.278805426850761e-07, "loss": 0.2466842383146286, "step": 1798, "token_acc": 0.9305689488910318 }, { "epoch": 0.37953586497890296, "grad_norm": 0.79296875, "learning_rate": 9.277930386955745e-07, "loss": 0.28594300150871277, "step": 1799, "token_acc": 0.9248895434462445 }, { "epoch": 0.379746835443038, "grad_norm": 0.73828125, "learning_rate": 9.277054857843183e-07, "loss": 0.2887975573539734, "step": 1800, "token_acc": 0.9212067955477445 }, { "epoch": 0.379746835443038, "eval_loss": 0.43369510769844055, "eval_runtime": 245.8672, "eval_samples_per_second": 137.086, "eval_steps_per_second": 2.143, "eval_token_acc": 0.899108184510455, "step": 1800 }, { "epoch": 0.379957805907173, "grad_norm": 0.7421875, "learning_rate": 9.276178839613196e-07, "loss": 0.2607673108577728, "step": 1801, "token_acc": 0.928168130489335 }, { "epoch": 0.380168776371308, "grad_norm": 0.6640625, "learning_rate": 9.275302332365965e-07, "loss": 0.2245202362537384, "step": 1802, "token_acc": 0.9333936106088004 }, { "epoch": 0.380379746835443, "grad_norm": 0.66015625, "learning_rate": 9.274425336201728e-07, "loss": 0.23835879564285278, "step": 1803, "token_acc": 0.9261068702290076 }, { "epoch": 0.38059071729957805, "grad_norm": 0.76171875, "learning_rate": 9.273547851220775e-07, "loss": 0.24824509024620056, "step": 1804, "token_acc": 0.9263188918361333 }, { "epoch": 0.3808016877637131, "grad_norm": 0.83984375, "learning_rate": 9.272669877523454e-07, "loss": 0.23923823237419128, "step": 1805, "token_acc": 0.9348637015781922 }, { "epoch": 0.3810126582278481, "grad_norm": 0.65234375, "learning_rate": 9.271791415210168e-07, "loss": 0.26697322726249695, "step": 1806, "token_acc": 0.9220437956204379 }, { "epoch": 0.3812236286919831, "grad_norm": 0.7109375, "learning_rate": 9.270912464381377e-07, "loss": 0.24041783809661865, "step": 1807, "token_acc": 0.93335325762104 }, { "epoch": 0.38143459915611816, "grad_norm": 0.73046875, "learning_rate": 9.270033025137598e-07, "loss": 0.27658283710479736, "step": 1808, "token_acc": 0.9247249565720903 }, { "epoch": 0.38164556962025314, "grad_norm": 0.703125, "learning_rate": 9.269153097579401e-07, "loss": 0.22341014444828033, "step": 1809, "token_acc": 0.9340346886551038 }, { "epoch": 0.3818565400843882, "grad_norm": 0.65625, "learning_rate": 9.268272681807415e-07, "loss": 0.2665144205093384, "step": 1810, "token_acc": 0.9275118947663028 }, { "epoch": 0.3820675105485232, "grad_norm": 0.96875, "learning_rate": 9.26739177792232e-07, "loss": 0.25565192103385925, "step": 1811, "token_acc": 0.927958307786634 }, { "epoch": 0.3822784810126582, "grad_norm": 0.79296875, "learning_rate": 9.266510386024858e-07, "loss": 0.27763834595680237, "step": 1812, "token_acc": 0.9273282442748092 }, { "epoch": 0.38248945147679325, "grad_norm": 0.96484375, "learning_rate": 9.265628506215819e-07, "loss": 0.2819390594959259, "step": 1813, "token_acc": 0.9262629432067775 }, { "epoch": 0.3827004219409283, "grad_norm": 0.7890625, "learning_rate": 9.264746138596058e-07, "loss": 0.29503488540649414, "step": 1814, "token_acc": 0.920820189274448 }, { "epoch": 0.3829113924050633, "grad_norm": 0.83984375, "learning_rate": 9.26386328326648e-07, "loss": 0.2595806121826172, "step": 1815, "token_acc": 0.9269662921348315 }, { "epoch": 0.3831223628691983, "grad_norm": 1.03125, "learning_rate": 9.262979940328046e-07, "loss": 0.24287866055965424, "step": 1816, "token_acc": 0.9247341913822048 }, { "epoch": 0.38333333333333336, "grad_norm": 0.75, "learning_rate": 9.262096109881774e-07, "loss": 0.2156772017478943, "step": 1817, "token_acc": 0.9401315789473684 }, { "epoch": 0.38354430379746834, "grad_norm": 0.8359375, "learning_rate": 9.261211792028738e-07, "loss": 0.2951451539993286, "step": 1818, "token_acc": 0.9252308608876973 }, { "epoch": 0.3837552742616034, "grad_norm": 0.72265625, "learning_rate": 9.260326986870066e-07, "loss": 0.27076175808906555, "step": 1819, "token_acc": 0.9258073901658423 }, { "epoch": 0.38396624472573837, "grad_norm": 0.6875, "learning_rate": 9.259441694506944e-07, "loss": 0.22185054421424866, "step": 1820, "token_acc": 0.9377751687701791 }, { "epoch": 0.3841772151898734, "grad_norm": 1.0390625, "learning_rate": 9.258555915040614e-07, "loss": 0.22383590042591095, "step": 1821, "token_acc": 0.9407185628742515 }, { "epoch": 0.38438818565400845, "grad_norm": 1.1171875, "learning_rate": 9.257669648572371e-07, "loss": 0.2371908277273178, "step": 1822, "token_acc": 0.938851142680667 }, { "epoch": 0.38459915611814344, "grad_norm": 0.671875, "learning_rate": 9.256782895203567e-07, "loss": 0.26829174160957336, "step": 1823, "token_acc": 0.9228368794326242 }, { "epoch": 0.3848101265822785, "grad_norm": 0.79296875, "learning_rate": 9.255895655035608e-07, "loss": 0.2719237506389618, "step": 1824, "token_acc": 0.9227082085080887 }, { "epoch": 0.3850210970464135, "grad_norm": 0.82421875, "learning_rate": 9.255007928169961e-07, "loss": 0.2967223823070526, "step": 1825, "token_acc": 0.9157397107897665 }, { "epoch": 0.3852320675105485, "grad_norm": 0.75390625, "learning_rate": 9.254119714708142e-07, "loss": 0.24009615182876587, "step": 1826, "token_acc": 0.9363372093023256 }, { "epoch": 0.38544303797468354, "grad_norm": 0.7109375, "learning_rate": 9.253231014751729e-07, "loss": 0.307847797870636, "step": 1827, "token_acc": 0.9175531914893617 }, { "epoch": 0.3856540084388186, "grad_norm": 1.1328125, "learning_rate": 9.252341828402349e-07, "loss": 0.23958738148212433, "step": 1828, "token_acc": 0.9360444907890163 }, { "epoch": 0.38586497890295357, "grad_norm": 1.078125, "learning_rate": 9.25145215576169e-07, "loss": 0.2339942902326584, "step": 1829, "token_acc": 0.9394484412470024 }, { "epoch": 0.3860759493670886, "grad_norm": 0.66796875, "learning_rate": 9.250561996931492e-07, "loss": 0.225580632686615, "step": 1830, "token_acc": 0.9307692307692308 }, { "epoch": 0.38628691983122365, "grad_norm": 0.78125, "learning_rate": 9.249671352013553e-07, "loss": 0.26309460401535034, "step": 1831, "token_acc": 0.9265518362040949 }, { "epoch": 0.38649789029535864, "grad_norm": 0.6640625, "learning_rate": 9.248780221109728e-07, "loss": 0.24800172448158264, "step": 1832, "token_acc": 0.9343544857768052 }, { "epoch": 0.3867088607594937, "grad_norm": 0.76171875, "learning_rate": 9.247888604321923e-07, "loss": 0.25037574768066406, "step": 1833, "token_acc": 0.9315494710640946 }, { "epoch": 0.3869198312236287, "grad_norm": 0.68359375, "learning_rate": 9.2469965017521e-07, "loss": 0.28050029277801514, "step": 1834, "token_acc": 0.9182209469153515 }, { "epoch": 0.3871308016877637, "grad_norm": 1.171875, "learning_rate": 9.246103913502282e-07, "loss": 0.26002001762390137, "step": 1835, "token_acc": 0.9284110050533408 }, { "epoch": 0.38734177215189874, "grad_norm": 0.7734375, "learning_rate": 9.245210839674543e-07, "loss": 0.2603076696395874, "step": 1836, "token_acc": 0.9262130347401665 }, { "epoch": 0.38755274261603373, "grad_norm": 0.55859375, "learning_rate": 9.244317280371013e-07, "loss": 0.21622005105018616, "step": 1837, "token_acc": 0.9399153737658674 }, { "epoch": 0.38776371308016877, "grad_norm": 0.72265625, "learning_rate": 9.243423235693879e-07, "loss": 0.2640265226364136, "step": 1838, "token_acc": 0.9272887842213718 }, { "epoch": 0.3879746835443038, "grad_norm": 0.7890625, "learning_rate": 9.242528705745381e-07, "loss": 0.2691488265991211, "step": 1839, "token_acc": 0.9298516687268232 }, { "epoch": 0.3881856540084388, "grad_norm": 0.7421875, "learning_rate": 9.241633690627818e-07, "loss": 0.24178001284599304, "step": 1840, "token_acc": 0.9338363319791062 }, { "epoch": 0.38839662447257384, "grad_norm": 0.66796875, "learning_rate": 9.240738190443541e-07, "loss": 0.25345578789711, "step": 1841, "token_acc": 0.9266702878870179 }, { "epoch": 0.3886075949367089, "grad_norm": 0.62109375, "learning_rate": 9.239842205294959e-07, "loss": 0.2360486388206482, "step": 1842, "token_acc": 0.9328785811732606 }, { "epoch": 0.38881856540084386, "grad_norm": 0.73828125, "learning_rate": 9.238945735284534e-07, "loss": 0.2449038028717041, "step": 1843, "token_acc": 0.9268953068592057 }, { "epoch": 0.3890295358649789, "grad_norm": 0.62890625, "learning_rate": 9.238048780514787e-07, "loss": 0.26960039138793945, "step": 1844, "token_acc": 0.9270607375271149 }, { "epoch": 0.38924050632911394, "grad_norm": 0.83203125, "learning_rate": 9.237151341088292e-07, "loss": 0.2575567662715912, "step": 1845, "token_acc": 0.9342750072066878 }, { "epoch": 0.38945147679324893, "grad_norm": 0.6953125, "learning_rate": 9.236253417107676e-07, "loss": 0.2754535675048828, "step": 1846, "token_acc": 0.9288154897494305 }, { "epoch": 0.38966244725738397, "grad_norm": 0.78515625, "learning_rate": 9.23535500867563e-07, "loss": 0.2439945489168167, "step": 1847, "token_acc": 0.9362714013950539 }, { "epoch": 0.389873417721519, "grad_norm": 0.6640625, "learning_rate": 9.234456115894888e-07, "loss": 0.2606812119483948, "step": 1848, "token_acc": 0.9236545682102628 }, { "epoch": 0.390084388185654, "grad_norm": 0.9140625, "learning_rate": 9.233556738868249e-07, "loss": 0.27625495195388794, "step": 1849, "token_acc": 0.9225721784776902 }, { "epoch": 0.39029535864978904, "grad_norm": 0.6640625, "learning_rate": 9.232656877698566e-07, "loss": 0.2793102264404297, "step": 1850, "token_acc": 0.9202629322663618 }, { "epoch": 0.3905063291139241, "grad_norm": 0.6875, "learning_rate": 9.231756532488743e-07, "loss": 0.2455964833498001, "step": 1851, "token_acc": 0.9258962011771 }, { "epoch": 0.39071729957805906, "grad_norm": 0.5078125, "learning_rate": 9.230855703341743e-07, "loss": 0.19969472289085388, "step": 1852, "token_acc": 0.9432416617905208 }, { "epoch": 0.3909282700421941, "grad_norm": 0.7265625, "learning_rate": 9.229954390360584e-07, "loss": 0.28671932220458984, "step": 1853, "token_acc": 0.9230058515552818 }, { "epoch": 0.3911392405063291, "grad_norm": 0.75, "learning_rate": 9.229052593648339e-07, "loss": 0.26401039958000183, "step": 1854, "token_acc": 0.9225410977988298 }, { "epoch": 0.39135021097046413, "grad_norm": 0.7734375, "learning_rate": 9.228150313308134e-07, "loss": 0.233763188123703, "step": 1855, "token_acc": 0.9356511131442513 }, { "epoch": 0.39156118143459917, "grad_norm": 0.62109375, "learning_rate": 9.227247549443156e-07, "loss": 0.19187316298484802, "step": 1856, "token_acc": 0.9460545193687231 }, { "epoch": 0.39177215189873416, "grad_norm": 0.82421875, "learning_rate": 9.226344302156641e-07, "loss": 0.24813725054264069, "step": 1857, "token_acc": 0.9283582089552239 }, { "epoch": 0.3919831223628692, "grad_norm": 0.7421875, "learning_rate": 9.225440571551882e-07, "loss": 0.28099197149276733, "step": 1858, "token_acc": 0.9286861548345647 }, { "epoch": 0.39219409282700424, "grad_norm": 0.65234375, "learning_rate": 9.224536357732231e-07, "loss": 0.23781321942806244, "step": 1859, "token_acc": 0.9274464239607539 }, { "epoch": 0.3924050632911392, "grad_norm": 0.68359375, "learning_rate": 9.223631660801093e-07, "loss": 0.3117380440235138, "step": 1860, "token_acc": 0.9198289684660609 }, { "epoch": 0.39261603375527426, "grad_norm": 0.81640625, "learning_rate": 9.222726480861922e-07, "loss": 0.2948303818702698, "step": 1861, "token_acc": 0.9180280882774434 }, { "epoch": 0.3928270042194093, "grad_norm": 0.58203125, "learning_rate": 9.22182081801824e-07, "loss": 0.2569146454334259, "step": 1862, "token_acc": 0.9329383248047401 }, { "epoch": 0.3930379746835443, "grad_norm": 0.94921875, "learning_rate": 9.220914672373614e-07, "loss": 0.32216933369636536, "step": 1863, "token_acc": 0.9064083457526081 }, { "epoch": 0.39324894514767933, "grad_norm": 0.9453125, "learning_rate": 9.220008044031669e-07, "loss": 0.27637138962745667, "step": 1864, "token_acc": 0.9210816777041942 }, { "epoch": 0.39345991561181437, "grad_norm": 0.81640625, "learning_rate": 9.219100933096086e-07, "loss": 0.26345184445381165, "step": 1865, "token_acc": 0.9283387622149837 }, { "epoch": 0.39367088607594936, "grad_norm": 0.8671875, "learning_rate": 9.218193339670601e-07, "loss": 0.3173444867134094, "step": 1866, "token_acc": 0.9129682997118156 }, { "epoch": 0.3938818565400844, "grad_norm": 0.625, "learning_rate": 9.217285263859007e-07, "loss": 0.22335606813430786, "step": 1867, "token_acc": 0.935064935064935 }, { "epoch": 0.39409282700421944, "grad_norm": 0.7265625, "learning_rate": 9.216376705765147e-07, "loss": 0.2642119526863098, "step": 1868, "token_acc": 0.9246898995865328 }, { "epoch": 0.3943037974683544, "grad_norm": 0.6484375, "learning_rate": 9.215467665492923e-07, "loss": 0.23183171451091766, "step": 1869, "token_acc": 0.9344312290332418 }, { "epoch": 0.39451476793248946, "grad_norm": 0.73046875, "learning_rate": 9.214558143146292e-07, "loss": 0.272377073764801, "step": 1870, "token_acc": 0.9305785123966942 }, { "epoch": 0.39472573839662445, "grad_norm": 0.8359375, "learning_rate": 9.213648138829266e-07, "loss": 0.26784923672676086, "step": 1871, "token_acc": 0.928849902534113 }, { "epoch": 0.3949367088607595, "grad_norm": 0.890625, "learning_rate": 9.212737652645913e-07, "loss": 0.23856118321418762, "step": 1872, "token_acc": 0.9337503554165482 }, { "epoch": 0.39514767932489453, "grad_norm": 0.6875, "learning_rate": 9.211826684700351e-07, "loss": 0.2687574625015259, "step": 1873, "token_acc": 0.9265722752716496 }, { "epoch": 0.3953586497890295, "grad_norm": 0.8046875, "learning_rate": 9.210915235096759e-07, "loss": 0.2604142427444458, "step": 1874, "token_acc": 0.9201069201069201 }, { "epoch": 0.39556962025316456, "grad_norm": 0.59765625, "learning_rate": 9.210003303939371e-07, "loss": 0.21657685935497284, "step": 1875, "token_acc": 0.9314020224104946 }, { "epoch": 0.3957805907172996, "grad_norm": 0.6875, "learning_rate": 9.20909089133247e-07, "loss": 0.24224431812763214, "step": 1876, "token_acc": 0.9318985849056604 }, { "epoch": 0.3959915611814346, "grad_norm": 0.765625, "learning_rate": 9.208177997380399e-07, "loss": 0.29792293906211853, "step": 1877, "token_acc": 0.9213075060532687 }, { "epoch": 0.3962025316455696, "grad_norm": 0.67578125, "learning_rate": 9.20726462218756e-07, "loss": 0.2578182816505432, "step": 1878, "token_acc": 0.924163783160323 }, { "epoch": 0.39641350210970466, "grad_norm": 0.6640625, "learning_rate": 9.2063507658584e-07, "loss": 0.23330868780612946, "step": 1879, "token_acc": 0.9328333876752527 }, { "epoch": 0.39662447257383965, "grad_norm": 0.890625, "learning_rate": 9.205436428497426e-07, "loss": 0.28994351625442505, "step": 1880, "token_acc": 0.9161849710982659 }, { "epoch": 0.3968354430379747, "grad_norm": 0.75390625, "learning_rate": 9.204521610209202e-07, "loss": 0.2730024456977844, "step": 1881, "token_acc": 0.9229598051157125 }, { "epoch": 0.39704641350210973, "grad_norm": 0.78515625, "learning_rate": 9.203606311098347e-07, "loss": 0.24187864363193512, "step": 1882, "token_acc": 0.9261853448275862 }, { "epoch": 0.3972573839662447, "grad_norm": 1.8984375, "learning_rate": 9.202690531269531e-07, "loss": 0.2717437148094177, "step": 1883, "token_acc": 0.9216603332358959 }, { "epoch": 0.39746835443037976, "grad_norm": 0.76171875, "learning_rate": 9.201774270827481e-07, "loss": 0.281715989112854, "step": 1884, "token_acc": 0.9242509892594686 }, { "epoch": 0.39767932489451474, "grad_norm": 0.7890625, "learning_rate": 9.200857529876978e-07, "loss": 0.2318015843629837, "step": 1885, "token_acc": 0.9304397815464214 }, { "epoch": 0.3978902953586498, "grad_norm": 0.65234375, "learning_rate": 9.199940308522862e-07, "loss": 0.27065324783325195, "step": 1886, "token_acc": 0.92619825708061 }, { "epoch": 0.3981012658227848, "grad_norm": 0.83984375, "learning_rate": 9.199022606870024e-07, "loss": 0.2438652217388153, "step": 1887, "token_acc": 0.9273247496423462 }, { "epoch": 0.3983122362869198, "grad_norm": 0.703125, "learning_rate": 9.198104425023411e-07, "loss": 0.24134406447410583, "step": 1888, "token_acc": 0.9285099052540913 }, { "epoch": 0.39852320675105485, "grad_norm": 0.9765625, "learning_rate": 9.197185763088024e-07, "loss": 0.24476462602615356, "step": 1889, "token_acc": 0.92578125 }, { "epoch": 0.3987341772151899, "grad_norm": 0.70703125, "learning_rate": 9.19626662116892e-07, "loss": 0.24153611063957214, "step": 1890, "token_acc": 0.9318809450751764 }, { "epoch": 0.3989451476793249, "grad_norm": 0.671875, "learning_rate": 9.195346999371211e-07, "loss": 0.271758496761322, "step": 1891, "token_acc": 0.9209603452926896 }, { "epoch": 0.3991561181434599, "grad_norm": 0.6328125, "learning_rate": 9.194426897800064e-07, "loss": 0.26978251338005066, "step": 1892, "token_acc": 0.9290673105946272 }, { "epoch": 0.39936708860759496, "grad_norm": 0.76171875, "learning_rate": 9.1935063165607e-07, "loss": 0.28870701789855957, "step": 1893, "token_acc": 0.9254198690577854 }, { "epoch": 0.39957805907172994, "grad_norm": 0.8125, "learning_rate": 9.192585255758394e-07, "loss": 0.2580242455005646, "step": 1894, "token_acc": 0.9339540296770439 }, { "epoch": 0.399789029535865, "grad_norm": 1.375, "learning_rate": 9.191663715498478e-07, "loss": 0.21934179961681366, "step": 1895, "token_acc": 0.9387427838357921 }, { "epoch": 0.4, "grad_norm": 0.80859375, "learning_rate": 9.19074169588634e-07, "loss": 0.27658939361572266, "step": 1896, "token_acc": 0.9278290993071594 }, { "epoch": 0.400210970464135, "grad_norm": 0.68359375, "learning_rate": 9.189819197027418e-07, "loss": 0.22759562730789185, "step": 1897, "token_acc": 0.9377700950734659 }, { "epoch": 0.40042194092827005, "grad_norm": 0.70703125, "learning_rate": 9.188896219027209e-07, "loss": 0.2513328790664673, "step": 1898, "token_acc": 0.9277489925158319 }, { "epoch": 0.4006329113924051, "grad_norm": 0.62890625, "learning_rate": 9.187972761991263e-07, "loss": 0.22657060623168945, "step": 1899, "token_acc": 0.935752688172043 }, { "epoch": 0.4008438818565401, "grad_norm": 1.0390625, "learning_rate": 9.187048826025184e-07, "loss": 0.2751932442188263, "step": 1900, "token_acc": 0.9230091096091684 }, { "epoch": 0.4010548523206751, "grad_norm": 0.7890625, "learning_rate": 9.186124411234632e-07, "loss": 0.27560490369796753, "step": 1901, "token_acc": 0.9197261978842564 }, { "epoch": 0.4012658227848101, "grad_norm": 0.7265625, "learning_rate": 9.185199517725324e-07, "loss": 0.24792620539665222, "step": 1902, "token_acc": 0.9330645161290323 }, { "epoch": 0.40147679324894514, "grad_norm": 0.87890625, "learning_rate": 9.184274145603029e-07, "loss": 0.3180472254753113, "step": 1903, "token_acc": 0.9188658057271196 }, { "epoch": 0.4016877637130802, "grad_norm": 0.71875, "learning_rate": 9.183348294973568e-07, "loss": 0.24376609921455383, "step": 1904, "token_acc": 0.9286992840095465 }, { "epoch": 0.40189873417721517, "grad_norm": 0.76171875, "learning_rate": 9.182421965942821e-07, "loss": 0.28026020526885986, "step": 1905, "token_acc": 0.9222288438617402 }, { "epoch": 0.4021097046413502, "grad_norm": 3.65625, "learning_rate": 9.181495158616725e-07, "loss": 0.25579574704170227, "step": 1906, "token_acc": 0.9286159600997507 }, { "epoch": 0.40232067510548525, "grad_norm": 0.6328125, "learning_rate": 9.180567873101265e-07, "loss": 0.23290948569774628, "step": 1907, "token_acc": 0.9333922782198645 }, { "epoch": 0.40253164556962023, "grad_norm": 0.5859375, "learning_rate": 9.179640109502484e-07, "loss": 0.2401217222213745, "step": 1908, "token_acc": 0.9341425619834711 }, { "epoch": 0.4027426160337553, "grad_norm": 0.8203125, "learning_rate": 9.17871186792648e-07, "loss": 0.26812300086021423, "step": 1909, "token_acc": 0.9327782917052112 }, { "epoch": 0.4029535864978903, "grad_norm": 0.7578125, "learning_rate": 9.177783148479408e-07, "loss": 0.2858043909072876, "step": 1910, "token_acc": 0.9216884198833657 }, { "epoch": 0.4031645569620253, "grad_norm": 0.8046875, "learning_rate": 9.176853951267469e-07, "loss": 0.23564667999744415, "step": 1911, "token_acc": 0.9349294045426643 }, { "epoch": 0.40337552742616034, "grad_norm": 0.75, "learning_rate": 9.175924276396931e-07, "loss": 0.24908998608589172, "step": 1912, "token_acc": 0.9320012890750886 }, { "epoch": 0.4035864978902954, "grad_norm": 0.796875, "learning_rate": 9.174994123974105e-07, "loss": 0.25956442952156067, "step": 1913, "token_acc": 0.9315411065958112 }, { "epoch": 0.40379746835443037, "grad_norm": 0.546875, "learning_rate": 9.174063494105366e-07, "loss": 0.1772785186767578, "step": 1914, "token_acc": 0.9457151570600792 }, { "epoch": 0.4040084388185654, "grad_norm": 1.078125, "learning_rate": 9.173132386897136e-07, "loss": 0.26592162251472473, "step": 1915, "token_acc": 0.9237465181058496 }, { "epoch": 0.40421940928270045, "grad_norm": 0.65234375, "learning_rate": 9.172200802455898e-07, "loss": 0.25685757398605347, "step": 1916, "token_acc": 0.9311940759024993 }, { "epoch": 0.40443037974683543, "grad_norm": 0.65234375, "learning_rate": 9.171268740888182e-07, "loss": 0.250326544046402, "step": 1917, "token_acc": 0.9311023622047244 }, { "epoch": 0.4046413502109705, "grad_norm": 1.1875, "learning_rate": 9.170336202300583e-07, "loss": 0.2700883150100708, "step": 1918, "token_acc": 0.9242227979274611 }, { "epoch": 0.40485232067510546, "grad_norm": 0.74609375, "learning_rate": 9.169403186799741e-07, "loss": 0.2698970437049866, "step": 1919, "token_acc": 0.9252514270182115 }, { "epoch": 0.4050632911392405, "grad_norm": 0.7421875, "learning_rate": 9.168469694492355e-07, "loss": 0.28905174136161804, "step": 1920, "token_acc": 0.9209056159952955 }, { "epoch": 0.40527426160337554, "grad_norm": 0.73046875, "learning_rate": 9.167535725485178e-07, "loss": 0.23809051513671875, "step": 1921, "token_acc": 0.9302013422818792 }, { "epoch": 0.4054852320675105, "grad_norm": 0.78515625, "learning_rate": 9.166601279885017e-07, "loss": 0.23999746143817902, "step": 1922, "token_acc": 0.935251798561151 }, { "epoch": 0.40569620253164557, "grad_norm": 0.6796875, "learning_rate": 9.165666357798733e-07, "loss": 0.23197510838508606, "step": 1923, "token_acc": 0.9327980969372583 }, { "epoch": 0.4059071729957806, "grad_norm": 0.86328125, "learning_rate": 9.164730959333245e-07, "loss": 0.29541683197021484, "step": 1924, "token_acc": 0.9220892909250072 }, { "epoch": 0.4061181434599156, "grad_norm": 0.60546875, "learning_rate": 9.16379508459552e-07, "loss": 0.27687525749206543, "step": 1925, "token_acc": 0.9200790737079921 }, { "epoch": 0.40632911392405063, "grad_norm": 1.2109375, "learning_rate": 9.162858733692585e-07, "loss": 0.3053325116634369, "step": 1926, "token_acc": 0.916566265060241 }, { "epoch": 0.4065400843881857, "grad_norm": 0.69140625, "learning_rate": 9.16192190673152e-07, "loss": 0.2050454467535019, "step": 1927, "token_acc": 0.9362162162162162 }, { "epoch": 0.40675105485232066, "grad_norm": 0.7734375, "learning_rate": 9.160984603819459e-07, "loss": 0.2896081805229187, "step": 1928, "token_acc": 0.9183372641509434 }, { "epoch": 0.4069620253164557, "grad_norm": 0.75390625, "learning_rate": 9.160046825063591e-07, "loss": 0.30498236417770386, "step": 1929, "token_acc": 0.925463948889565 }, { "epoch": 0.40717299578059074, "grad_norm": 0.7109375, "learning_rate": 9.159108570571157e-07, "loss": 0.26336434483528137, "step": 1930, "token_acc": 0.9244799088059276 }, { "epoch": 0.4073839662447257, "grad_norm": 0.78515625, "learning_rate": 9.158169840449457e-07, "loss": 0.2476760447025299, "step": 1931, "token_acc": 0.9316361556064073 }, { "epoch": 0.40759493670886077, "grad_norm": 0.8359375, "learning_rate": 9.157230634805839e-07, "loss": 0.23430106043815613, "step": 1932, "token_acc": 0.9355336212214682 }, { "epoch": 0.4078059071729958, "grad_norm": 0.7421875, "learning_rate": 9.156290953747714e-07, "loss": 0.2746797502040863, "step": 1933, "token_acc": 0.9278319123020706 }, { "epoch": 0.4080168776371308, "grad_norm": 0.76171875, "learning_rate": 9.155350797382537e-07, "loss": 0.2650185525417328, "step": 1934, "token_acc": 0.9247412982126059 }, { "epoch": 0.40822784810126583, "grad_norm": 0.7421875, "learning_rate": 9.154410165817828e-07, "loss": 0.30540603399276733, "step": 1935, "token_acc": 0.9162790697674419 }, { "epoch": 0.4084388185654008, "grad_norm": 0.78515625, "learning_rate": 9.153469059161153e-07, "loss": 0.27233344316482544, "step": 1936, "token_acc": 0.9290521592821088 }, { "epoch": 0.40864978902953586, "grad_norm": 2.515625, "learning_rate": 9.152527477520137e-07, "loss": 0.3095840811729431, "step": 1937, "token_acc": 0.9161658653846154 }, { "epoch": 0.4088607594936709, "grad_norm": 1.390625, "learning_rate": 9.151585421002457e-07, "loss": 0.31651750206947327, "step": 1938, "token_acc": 0.9165916591659166 }, { "epoch": 0.4090717299578059, "grad_norm": 0.75, "learning_rate": 9.150642889715845e-07, "loss": 0.26749229431152344, "step": 1939, "token_acc": 0.9320086929524992 }, { "epoch": 0.4092827004219409, "grad_norm": 0.6484375, "learning_rate": 9.149699883768088e-07, "loss": 0.21710315346717834, "step": 1940, "token_acc": 0.9348079161816065 }, { "epoch": 0.40949367088607597, "grad_norm": 0.73828125, "learning_rate": 9.148756403267026e-07, "loss": 0.34100958704948425, "step": 1941, "token_acc": 0.9185356200527705 }, { "epoch": 0.40970464135021095, "grad_norm": 0.7734375, "learning_rate": 9.147812448320554e-07, "loss": 0.25636026263237, "step": 1942, "token_acc": 0.9277210884353742 }, { "epoch": 0.409915611814346, "grad_norm": 0.75390625, "learning_rate": 9.146868019036623e-07, "loss": 0.31487444043159485, "step": 1943, "token_acc": 0.916615194564546 }, { "epoch": 0.41012658227848103, "grad_norm": 0.83984375, "learning_rate": 9.145923115523236e-07, "loss": 0.2474036067724228, "step": 1944, "token_acc": 0.932229377491567 }, { "epoch": 0.410337552742616, "grad_norm": 0.8046875, "learning_rate": 9.144977737888448e-07, "loss": 0.23942765593528748, "step": 1945, "token_acc": 0.935179358086847 }, { "epoch": 0.41054852320675106, "grad_norm": 0.71875, "learning_rate": 9.144031886240373e-07, "loss": 0.321605384349823, "step": 1946, "token_acc": 0.9218192627824019 }, { "epoch": 0.4107594936708861, "grad_norm": 0.63671875, "learning_rate": 9.143085560687179e-07, "loss": 0.24766230583190918, "step": 1947, "token_acc": 0.9277746793084216 }, { "epoch": 0.4109704641350211, "grad_norm": 0.7421875, "learning_rate": 9.142138761337082e-07, "loss": 0.26399165391921997, "step": 1948, "token_acc": 0.9282596835788325 }, { "epoch": 0.4111814345991561, "grad_norm": 0.69921875, "learning_rate": 9.141191488298361e-07, "loss": 0.2344694286584854, "step": 1949, "token_acc": 0.9286647504572773 }, { "epoch": 0.41139240506329117, "grad_norm": 0.69140625, "learning_rate": 9.140243741679341e-07, "loss": 0.20495407283306122, "step": 1950, "token_acc": 0.937727724412057 }, { "epoch": 0.41160337552742615, "grad_norm": 0.7265625, "learning_rate": 9.139295521588406e-07, "loss": 0.2814640998840332, "step": 1951, "token_acc": 0.9239944521497919 }, { "epoch": 0.4118143459915612, "grad_norm": 0.6875, "learning_rate": 9.138346828133995e-07, "loss": 0.257242351770401, "step": 1952, "token_acc": 0.9294289897510981 }, { "epoch": 0.4120253164556962, "grad_norm": 0.7734375, "learning_rate": 9.137397661424596e-07, "loss": 0.30631783604621887, "step": 1953, "token_acc": 0.9265954533004657 }, { "epoch": 0.4122362869198312, "grad_norm": 0.66796875, "learning_rate": 9.136448021568757e-07, "loss": 0.2396744191646576, "step": 1954, "token_acc": 0.9292063492063493 }, { "epoch": 0.41244725738396626, "grad_norm": 0.54296875, "learning_rate": 9.135497908675076e-07, "loss": 0.2213076651096344, "step": 1955, "token_acc": 0.9362022269034005 }, { "epoch": 0.41265822784810124, "grad_norm": 0.73828125, "learning_rate": 9.134547322852206e-07, "loss": 0.2861933708190918, "step": 1956, "token_acc": 0.9219481735872619 }, { "epoch": 0.4128691983122363, "grad_norm": 0.71875, "learning_rate": 9.133596264208856e-07, "loss": 0.2793295383453369, "step": 1957, "token_acc": 0.9232751216719153 }, { "epoch": 0.4130801687763713, "grad_norm": 0.78515625, "learning_rate": 9.132644732853785e-07, "loss": 0.23415514826774597, "step": 1958, "token_acc": 0.936848752762867 }, { "epoch": 0.4132911392405063, "grad_norm": 0.7109375, "learning_rate": 9.131692728895811e-07, "loss": 0.29440468549728394, "step": 1959, "token_acc": 0.9173300673606859 }, { "epoch": 0.41350210970464135, "grad_norm": 0.62890625, "learning_rate": 9.130740252443803e-07, "loss": 0.25713711977005005, "step": 1960, "token_acc": 0.9248366013071896 }, { "epoch": 0.4137130801687764, "grad_norm": 0.734375, "learning_rate": 9.129787303606687e-07, "loss": 0.28025707602500916, "step": 1961, "token_acc": 0.9208227669766131 }, { "epoch": 0.4139240506329114, "grad_norm": 0.83984375, "learning_rate": 9.128833882493436e-07, "loss": 0.25988930463790894, "step": 1962, "token_acc": 0.9253945480631277 }, { "epoch": 0.4141350210970464, "grad_norm": 1.015625, "learning_rate": 9.127879989213086e-07, "loss": 0.2681414484977722, "step": 1963, "token_acc": 0.924223602484472 }, { "epoch": 0.41434599156118146, "grad_norm": 0.609375, "learning_rate": 9.126925623874719e-07, "loss": 0.23518618941307068, "step": 1964, "token_acc": 0.9347695990424896 }, { "epoch": 0.41455696202531644, "grad_norm": 0.84375, "learning_rate": 9.125970786587479e-07, "loss": 0.2538262605667114, "step": 1965, "token_acc": 0.9263786242183059 }, { "epoch": 0.4147679324894515, "grad_norm": 0.80078125, "learning_rate": 9.125015477460556e-07, "loss": 0.2897471785545349, "step": 1966, "token_acc": 0.9163509180996794 }, { "epoch": 0.41497890295358647, "grad_norm": 0.6640625, "learning_rate": 9.124059696603201e-07, "loss": 0.2630343437194824, "step": 1967, "token_acc": 0.9244324970131422 }, { "epoch": 0.4151898734177215, "grad_norm": 0.7421875, "learning_rate": 9.123103444124713e-07, "loss": 0.25306540727615356, "step": 1968, "token_acc": 0.9272513933704899 }, { "epoch": 0.41540084388185655, "grad_norm": 0.62109375, "learning_rate": 9.12214672013445e-07, "loss": 0.22293509542942047, "step": 1969, "token_acc": 0.9381590196637218 }, { "epoch": 0.41561181434599154, "grad_norm": 0.5625, "learning_rate": 9.121189524741817e-07, "loss": 0.23128658533096313, "step": 1970, "token_acc": 0.9351984013702541 }, { "epoch": 0.4158227848101266, "grad_norm": 0.54296875, "learning_rate": 9.120231858056282e-07, "loss": 0.2618774175643921, "step": 1971, "token_acc": 0.9295605057194462 }, { "epoch": 0.4160337552742616, "grad_norm": 0.72265625, "learning_rate": 9.119273720187361e-07, "loss": 0.25696003437042236, "step": 1972, "token_acc": 0.9253450439146801 }, { "epoch": 0.4162447257383966, "grad_norm": 1.1328125, "learning_rate": 9.118315111244624e-07, "loss": 0.22687079012393951, "step": 1973, "token_acc": 0.9345570630486831 }, { "epoch": 0.41645569620253164, "grad_norm": 0.8671875, "learning_rate": 9.117356031337698e-07, "loss": 0.25750643014907837, "step": 1974, "token_acc": 0.9301022090339598 }, { "epoch": 0.4166666666666667, "grad_norm": 0.91796875, "learning_rate": 9.11639648057626e-07, "loss": 0.2913302481174469, "step": 1975, "token_acc": 0.9276166456494326 }, { "epoch": 0.41687763713080167, "grad_norm": 0.86328125, "learning_rate": 9.115436459070044e-07, "loss": 0.2763899564743042, "step": 1976, "token_acc": 0.9230769230769231 }, { "epoch": 0.4170886075949367, "grad_norm": 0.640625, "learning_rate": 9.114475966928836e-07, "loss": 0.2513253390789032, "step": 1977, "token_acc": 0.9275092936802974 }, { "epoch": 0.41729957805907175, "grad_norm": 0.72265625, "learning_rate": 9.113515004262475e-07, "loss": 0.24095700681209564, "step": 1978, "token_acc": 0.9372488408037094 }, { "epoch": 0.41751054852320674, "grad_norm": 0.765625, "learning_rate": 9.112553571180858e-07, "loss": 0.2281455397605896, "step": 1979, "token_acc": 0.9321780699133553 }, { "epoch": 0.4177215189873418, "grad_norm": 0.765625, "learning_rate": 9.111591667793933e-07, "loss": 0.23691600561141968, "step": 1980, "token_acc": 0.934855403348554 }, { "epoch": 0.4179324894514768, "grad_norm": 0.78515625, "learning_rate": 9.1106292942117e-07, "loss": 0.25448983907699585, "step": 1981, "token_acc": 0.9298789947254111 }, { "epoch": 0.4181434599156118, "grad_norm": 1.265625, "learning_rate": 9.109666450544213e-07, "loss": 0.2553502023220062, "step": 1982, "token_acc": 0.93343653250774 }, { "epoch": 0.41835443037974684, "grad_norm": 0.7734375, "learning_rate": 9.108703136901587e-07, "loss": 0.28221210837364197, "step": 1983, "token_acc": 0.9225239616613419 }, { "epoch": 0.41856540084388183, "grad_norm": 0.71875, "learning_rate": 9.10773935339398e-07, "loss": 0.27611416578292847, "step": 1984, "token_acc": 0.9298349056603774 }, { "epoch": 0.41877637130801687, "grad_norm": 0.78515625, "learning_rate": 9.106775100131608e-07, "loss": 0.2644606828689575, "step": 1985, "token_acc": 0.9235033259423503 }, { "epoch": 0.4189873417721519, "grad_norm": 0.64453125, "learning_rate": 9.105810377224745e-07, "loss": 0.2672709822654724, "step": 1986, "token_acc": 0.927710843373494 }, { "epoch": 0.4191983122362869, "grad_norm": 0.71484375, "learning_rate": 9.104845184783716e-07, "loss": 0.2812398076057434, "step": 1987, "token_acc": 0.9225935447015139 }, { "epoch": 0.41940928270042194, "grad_norm": 0.5546875, "learning_rate": 9.103879522918896e-07, "loss": 0.26493459939956665, "step": 1988, "token_acc": 0.9258202567760342 }, { "epoch": 0.419620253164557, "grad_norm": 0.67578125, "learning_rate": 9.102913391740716e-07, "loss": 0.23307015001773834, "step": 1989, "token_acc": 0.936281241417193 }, { "epoch": 0.41983122362869196, "grad_norm": 0.99609375, "learning_rate": 9.101946791359665e-07, "loss": 0.28101563453674316, "step": 1990, "token_acc": 0.923546511627907 }, { "epoch": 0.420042194092827, "grad_norm": 0.6953125, "learning_rate": 9.100979721886279e-07, "loss": 0.24740473926067352, "step": 1991, "token_acc": 0.9285905322278684 }, { "epoch": 0.42025316455696204, "grad_norm": 0.7578125, "learning_rate": 9.100012183431152e-07, "loss": 0.25005412101745605, "step": 1992, "token_acc": 0.9281230382925298 }, { "epoch": 0.42046413502109703, "grad_norm": 0.7578125, "learning_rate": 9.099044176104929e-07, "loss": 0.20773842930793762, "step": 1993, "token_acc": 0.9379124175164967 }, { "epoch": 0.42067510548523207, "grad_norm": 0.859375, "learning_rate": 9.098075700018311e-07, "loss": 0.28273704648017883, "step": 1994, "token_acc": 0.9198189460071128 }, { "epoch": 0.4208860759493671, "grad_norm": 1.0546875, "learning_rate": 9.097106755282049e-07, "loss": 0.28721871972084045, "step": 1995, "token_acc": 0.9265134347685335 }, { "epoch": 0.4210970464135021, "grad_norm": 1.8359375, "learning_rate": 9.096137342006953e-07, "loss": 0.27081963419914246, "step": 1996, "token_acc": 0.9250157529930687 }, { "epoch": 0.42130801687763714, "grad_norm": 0.640625, "learning_rate": 9.095167460303883e-07, "loss": 0.23872046172618866, "step": 1997, "token_acc": 0.9386761842959117 }, { "epoch": 0.4215189873417722, "grad_norm": 0.66796875, "learning_rate": 9.094197110283752e-07, "loss": 0.26904159784317017, "step": 1998, "token_acc": 0.9290484140233722 }, { "epoch": 0.42172995780590716, "grad_norm": 0.98828125, "learning_rate": 9.093226292057529e-07, "loss": 0.3138263523578644, "step": 1999, "token_acc": 0.9206625980819529 }, { "epoch": 0.4219409282700422, "grad_norm": 0.55078125, "learning_rate": 9.092255005736236e-07, "loss": 0.1999545842409134, "step": 2000, "token_acc": 0.9382038694773318 }, { "epoch": 0.4219409282700422, "eval_loss": 0.43380746245384216, "eval_runtime": 245.6162, "eval_samples_per_second": 137.226, "eval_steps_per_second": 2.146, "eval_token_acc": 0.8990778964372251, "step": 2000 }, { "epoch": 0.4221518987341772, "grad_norm": 0.90234375, "learning_rate": 9.091283251430943e-07, "loss": 0.28299134969711304, "step": 2001, "token_acc": 0.9268440145102781 }, { "epoch": 0.42236286919831223, "grad_norm": 0.9140625, "learning_rate": 9.090311029252785e-07, "loss": 0.30973243713378906, "step": 2002, "token_acc": 0.9175925925925926 }, { "epoch": 0.42257383966244727, "grad_norm": 0.72265625, "learning_rate": 9.08933833931294e-07, "loss": 0.2593654990196228, "step": 2003, "token_acc": 0.9329677026203534 }, { "epoch": 0.42278481012658226, "grad_norm": 0.71875, "learning_rate": 9.088365181722644e-07, "loss": 0.302176296710968, "step": 2004, "token_acc": 0.9175856065002902 }, { "epoch": 0.4229957805907173, "grad_norm": 0.73046875, "learning_rate": 9.087391556593185e-07, "loss": 0.22570964694023132, "step": 2005, "token_acc": 0.9386650631389056 }, { "epoch": 0.42320675105485234, "grad_norm": 0.7421875, "learning_rate": 9.086417464035907e-07, "loss": 0.2694271206855774, "step": 2006, "token_acc": 0.921993216801461 }, { "epoch": 0.4234177215189873, "grad_norm": 0.875, "learning_rate": 9.085442904162203e-07, "loss": 0.3252578675746918, "step": 2007, "token_acc": 0.9135548368127021 }, { "epoch": 0.42362869198312236, "grad_norm": 0.7109375, "learning_rate": 9.084467877083526e-07, "loss": 0.238933265209198, "step": 2008, "token_acc": 0.9316681022107379 }, { "epoch": 0.4238396624472574, "grad_norm": 0.8046875, "learning_rate": 9.083492382911377e-07, "loss": 0.24017579853534698, "step": 2009, "token_acc": 0.9259684361549498 }, { "epoch": 0.4240506329113924, "grad_norm": 0.65234375, "learning_rate": 9.08251642175731e-07, "loss": 0.26246100664138794, "step": 2010, "token_acc": 0.9270131968758416 }, { "epoch": 0.42426160337552743, "grad_norm": 0.87890625, "learning_rate": 9.081539993732936e-07, "loss": 0.2740520238876343, "step": 2011, "token_acc": 0.9259037944427846 }, { "epoch": 0.42447257383966247, "grad_norm": 0.91796875, "learning_rate": 9.080563098949919e-07, "loss": 0.3123852610588074, "step": 2012, "token_acc": 0.917912822144448 }, { "epoch": 0.42468354430379746, "grad_norm": 0.796875, "learning_rate": 9.079585737519973e-07, "loss": 0.28245824575424194, "step": 2013, "token_acc": 0.9229088639200999 }, { "epoch": 0.4248945147679325, "grad_norm": 0.765625, "learning_rate": 9.078607909554869e-07, "loss": 0.28732019662857056, "step": 2014, "token_acc": 0.9224526600541028 }, { "epoch": 0.42510548523206754, "grad_norm": 0.859375, "learning_rate": 9.07762961516643e-07, "loss": 0.2762022912502289, "step": 2015, "token_acc": 0.9247247842903897 }, { "epoch": 0.4253164556962025, "grad_norm": 0.8359375, "learning_rate": 9.076650854466532e-07, "loss": 0.24369433522224426, "step": 2016, "token_acc": 0.9378579610538373 }, { "epoch": 0.42552742616033756, "grad_norm": 0.765625, "learning_rate": 9.075671627567103e-07, "loss": 0.2983117699623108, "step": 2017, "token_acc": 0.9194429273620972 }, { "epoch": 0.42573839662447255, "grad_norm": 0.77734375, "learning_rate": 9.074691934580128e-07, "loss": 0.22820597887039185, "step": 2018, "token_acc": 0.932734125833575 }, { "epoch": 0.4259493670886076, "grad_norm": 1.09375, "learning_rate": 9.073711775617643e-07, "loss": 0.24428993463516235, "step": 2019, "token_acc": 0.9322805139186295 }, { "epoch": 0.42616033755274263, "grad_norm": 0.625, "learning_rate": 9.072731150791735e-07, "loss": 0.25292932987213135, "step": 2020, "token_acc": 0.927382319173364 }, { "epoch": 0.4263713080168776, "grad_norm": 0.72265625, "learning_rate": 9.07175006021455e-07, "loss": 0.23769620060920715, "step": 2021, "token_acc": 0.9357142857142857 }, { "epoch": 0.42658227848101266, "grad_norm": 0.65625, "learning_rate": 9.070768503998282e-07, "loss": 0.2435709685087204, "step": 2022, "token_acc": 0.9310344827586207 }, { "epoch": 0.4267932489451477, "grad_norm": 0.640625, "learning_rate": 9.069786482255182e-07, "loss": 0.28426623344421387, "step": 2023, "token_acc": 0.9265389082462253 }, { "epoch": 0.4270042194092827, "grad_norm": 0.66015625, "learning_rate": 9.068803995097549e-07, "loss": 0.30951380729675293, "step": 2024, "token_acc": 0.9211136890951276 }, { "epoch": 0.4272151898734177, "grad_norm": 0.64453125, "learning_rate": 9.067821042637742e-07, "loss": 0.259574294090271, "step": 2025, "token_acc": 0.9300994275384152 }, { "epoch": 0.42742616033755276, "grad_norm": 0.58984375, "learning_rate": 9.066837624988171e-07, "loss": 0.27088677883148193, "step": 2026, "token_acc": 0.9207119741100324 }, { "epoch": 0.42763713080168775, "grad_norm": 0.734375, "learning_rate": 9.065853742261293e-07, "loss": 0.2423410415649414, "step": 2027, "token_acc": 0.9306083650190115 }, { "epoch": 0.4278481012658228, "grad_norm": 1.0625, "learning_rate": 9.06486939456963e-07, "loss": 0.2796346843242645, "step": 2028, "token_acc": 0.9206700379266751 }, { "epoch": 0.42805907172995783, "grad_norm": 2.234375, "learning_rate": 9.063884582025745e-07, "loss": 0.27198976278305054, "step": 2029, "token_acc": 0.9281923197002809 }, { "epoch": 0.4282700421940928, "grad_norm": 0.6328125, "learning_rate": 9.062899304742261e-07, "loss": 0.24360674619674683, "step": 2030, "token_acc": 0.9331550802139037 }, { "epoch": 0.42848101265822786, "grad_norm": 0.74609375, "learning_rate": 9.061913562831853e-07, "loss": 0.23410849273204803, "step": 2031, "token_acc": 0.9399752475247525 }, { "epoch": 0.4286919831223629, "grad_norm": 0.9453125, "learning_rate": 9.060927356407251e-07, "loss": 0.24269482493400574, "step": 2032, "token_acc": 0.9330185895544408 }, { "epoch": 0.4289029535864979, "grad_norm": 0.80078125, "learning_rate": 9.059940685581233e-07, "loss": 0.24459296464920044, "step": 2033, "token_acc": 0.9313672922252011 }, { "epoch": 0.4291139240506329, "grad_norm": 0.7265625, "learning_rate": 9.058953550466638e-07, "loss": 0.27956610918045044, "step": 2034, "token_acc": 0.9240992321323095 }, { "epoch": 0.4293248945147679, "grad_norm": 0.921875, "learning_rate": 9.057965951176345e-07, "loss": 0.2567591369152069, "step": 2035, "token_acc": 0.9278495227400337 }, { "epoch": 0.42953586497890295, "grad_norm": 0.55078125, "learning_rate": 9.056977887823302e-07, "loss": 0.2104652225971222, "step": 2036, "token_acc": 0.9372947148402508 }, { "epoch": 0.429746835443038, "grad_norm": 0.5546875, "learning_rate": 9.055989360520498e-07, "loss": 0.21626508235931396, "step": 2037, "token_acc": 0.9346444780635401 }, { "epoch": 0.429957805907173, "grad_norm": 0.73828125, "learning_rate": 9.055000369380983e-07, "loss": 0.24856555461883545, "step": 2038, "token_acc": 0.9285312324141812 }, { "epoch": 0.430168776371308, "grad_norm": 0.7265625, "learning_rate": 9.054010914517852e-07, "loss": 0.23962201178073883, "step": 2039, "token_acc": 0.9331395348837209 }, { "epoch": 0.43037974683544306, "grad_norm": 0.6171875, "learning_rate": 9.053020996044261e-07, "loss": 0.22111834585666656, "step": 2040, "token_acc": 0.929965249933173 }, { "epoch": 0.43059071729957804, "grad_norm": 0.68359375, "learning_rate": 9.052030614073416e-07, "loss": 0.23133578896522522, "step": 2041, "token_acc": 0.9337461300309597 }, { "epoch": 0.4308016877637131, "grad_norm": 0.8046875, "learning_rate": 9.051039768718572e-07, "loss": 0.2812628448009491, "step": 2042, "token_acc": 0.9241266375545851 }, { "epoch": 0.4310126582278481, "grad_norm": 0.6640625, "learning_rate": 9.050048460093045e-07, "loss": 0.2149478793144226, "step": 2043, "token_acc": 0.9357326478149101 }, { "epoch": 0.4312236286919831, "grad_norm": 0.796875, "learning_rate": 9.049056688310196e-07, "loss": 0.29091382026672363, "step": 2044, "token_acc": 0.921418826739427 }, { "epoch": 0.43143459915611815, "grad_norm": 0.96875, "learning_rate": 9.048064453483444e-07, "loss": 0.29459765553474426, "step": 2045, "token_acc": 0.924181696726787 }, { "epoch": 0.4316455696202532, "grad_norm": 0.66796875, "learning_rate": 9.047071755726259e-07, "loss": 0.23687945306301117, "step": 2046, "token_acc": 0.9377119950662967 }, { "epoch": 0.4318565400843882, "grad_norm": 0.61328125, "learning_rate": 9.046078595152165e-07, "loss": 0.2584837079048157, "step": 2047, "token_acc": 0.926093916755603 }, { "epoch": 0.4320675105485232, "grad_norm": 0.9296875, "learning_rate": 9.045084971874737e-07, "loss": 0.2333284616470337, "step": 2048, "token_acc": 0.9298853639029592 }, { "epoch": 0.43227848101265826, "grad_norm": 0.68359375, "learning_rate": 9.044090886007605e-07, "loss": 0.30107617378234863, "step": 2049, "token_acc": 0.9216965742251223 }, { "epoch": 0.43248945147679324, "grad_norm": 0.890625, "learning_rate": 9.043096337664454e-07, "loss": 0.3218232989311218, "step": 2050, "token_acc": 0.915603532875368 }, { "epoch": 0.4327004219409283, "grad_norm": 0.69140625, "learning_rate": 9.042101326959013e-07, "loss": 0.2680596709251404, "step": 2051, "token_acc": 0.9270833333333334 }, { "epoch": 0.43291139240506327, "grad_norm": 0.60546875, "learning_rate": 9.041105854005075e-07, "loss": 0.20740154385566711, "step": 2052, "token_acc": 0.9399198931909212 }, { "epoch": 0.4331223628691983, "grad_norm": 0.7421875, "learning_rate": 9.040109918916478e-07, "loss": 0.26231849193573, "step": 2053, "token_acc": 0.9298590626024255 }, { "epoch": 0.43333333333333335, "grad_norm": 0.9921875, "learning_rate": 9.039113521807117e-07, "loss": 0.25735440850257874, "step": 2054, "token_acc": 0.92787913340935 }, { "epoch": 0.43354430379746833, "grad_norm": 0.625, "learning_rate": 9.038116662790938e-07, "loss": 0.2551949620246887, "step": 2055, "token_acc": 0.9349544072948328 }, { "epoch": 0.4337552742616034, "grad_norm": 0.671875, "learning_rate": 9.037119341981941e-07, "loss": 0.2311941534280777, "step": 2056, "token_acc": 0.9320836729149687 }, { "epoch": 0.4339662447257384, "grad_norm": 0.69140625, "learning_rate": 9.036121559494175e-07, "loss": 0.24836215376853943, "step": 2057, "token_acc": 0.926224426534408 }, { "epoch": 0.4341772151898734, "grad_norm": 0.7578125, "learning_rate": 9.035123315441748e-07, "loss": 0.28423619270324707, "step": 2058, "token_acc": 0.9228350957416405 }, { "epoch": 0.43438818565400844, "grad_norm": 0.74609375, "learning_rate": 9.034124609938818e-07, "loss": 0.2160068154335022, "step": 2059, "token_acc": 0.9409814323607427 }, { "epoch": 0.4345991561181435, "grad_norm": 0.62890625, "learning_rate": 9.033125443099594e-07, "loss": 0.23352009057998657, "step": 2060, "token_acc": 0.933939393939394 }, { "epoch": 0.43481012658227847, "grad_norm": 0.6171875, "learning_rate": 9.03212581503834e-07, "loss": 0.2348017394542694, "step": 2061, "token_acc": 0.9326923076923077 }, { "epoch": 0.4350210970464135, "grad_norm": 0.640625, "learning_rate": 9.03112572586937e-07, "loss": 0.21551939845085144, "step": 2062, "token_acc": 0.9380373554699081 }, { "epoch": 0.43523206751054855, "grad_norm": 0.9453125, "learning_rate": 9.030125175707057e-07, "loss": 0.24949979782104492, "step": 2063, "token_acc": 0.9251488517153388 }, { "epoch": 0.43544303797468353, "grad_norm": 0.7109375, "learning_rate": 9.029124164665815e-07, "loss": 0.26215875148773193, "step": 2064, "token_acc": 0.93125 }, { "epoch": 0.4356540084388186, "grad_norm": 0.65625, "learning_rate": 9.028122692860126e-07, "loss": 0.26301145553588867, "step": 2065, "token_acc": 0.9341355288157062 }, { "epoch": 0.43586497890295356, "grad_norm": 0.6640625, "learning_rate": 9.027120760404511e-07, "loss": 0.2367100715637207, "step": 2066, "token_acc": 0.9342258440046566 }, { "epoch": 0.4360759493670886, "grad_norm": 0.71875, "learning_rate": 9.026118367413554e-07, "loss": 0.2691326141357422, "step": 2067, "token_acc": 0.9281159420289855 }, { "epoch": 0.43628691983122364, "grad_norm": 0.734375, "learning_rate": 9.025115514001885e-07, "loss": 0.2248142808675766, "step": 2068, "token_acc": 0.9379799173065564 }, { "epoch": 0.4364978902953586, "grad_norm": 0.76953125, "learning_rate": 9.024112200284187e-07, "loss": 0.2388959378004074, "step": 2069, "token_acc": 0.9336244541484716 }, { "epoch": 0.43670886075949367, "grad_norm": 0.67578125, "learning_rate": 9.023108426375198e-07, "loss": 0.22400923073291779, "step": 2070, "token_acc": 0.9390096618357487 }, { "epoch": 0.4369198312236287, "grad_norm": 0.671875, "learning_rate": 9.022104192389712e-07, "loss": 0.2216733992099762, "step": 2071, "token_acc": 0.9343434343434344 }, { "epoch": 0.4371308016877637, "grad_norm": 0.703125, "learning_rate": 9.021099498442568e-07, "loss": 0.2284812182188034, "step": 2072, "token_acc": 0.9399889380530974 }, { "epoch": 0.43734177215189873, "grad_norm": 1.328125, "learning_rate": 9.020094344648661e-07, "loss": 0.2963327169418335, "step": 2073, "token_acc": 0.9173697992435264 }, { "epoch": 0.4375527426160338, "grad_norm": 0.6875, "learning_rate": 9.019088731122941e-07, "loss": 0.22249482572078705, "step": 2074, "token_acc": 0.9357463524130191 }, { "epoch": 0.43776371308016876, "grad_norm": 0.55078125, "learning_rate": 9.018082657980407e-07, "loss": 0.23072096705436707, "step": 2075, "token_acc": 0.9282548476454293 }, { "epoch": 0.4379746835443038, "grad_norm": 0.6796875, "learning_rate": 9.017076125336111e-07, "loss": 0.26543164253234863, "step": 2076, "token_acc": 0.9238235294117647 }, { "epoch": 0.43818565400843884, "grad_norm": 0.8125, "learning_rate": 9.016069133305162e-07, "loss": 0.26815545558929443, "step": 2077, "token_acc": 0.9215811284666879 }, { "epoch": 0.4383966244725738, "grad_norm": 0.69140625, "learning_rate": 9.015061682002714e-07, "loss": 0.2680891454219818, "step": 2078, "token_acc": 0.9266609145815358 }, { "epoch": 0.43860759493670887, "grad_norm": 0.81640625, "learning_rate": 9.014053771543981e-07, "loss": 0.25884371995925903, "step": 2079, "token_acc": 0.9298716800954939 }, { "epoch": 0.4388185654008439, "grad_norm": 0.78515625, "learning_rate": 9.013045402044224e-07, "loss": 0.25434935092926025, "step": 2080, "token_acc": 0.9343497199751089 }, { "epoch": 0.4390295358649789, "grad_norm": 0.99609375, "learning_rate": 9.012036573618757e-07, "loss": 0.2669466435909271, "step": 2081, "token_acc": 0.9280685644322008 }, { "epoch": 0.43924050632911393, "grad_norm": 1.25, "learning_rate": 9.011027286382953e-07, "loss": 0.24821606278419495, "step": 2082, "token_acc": 0.9322137404580153 }, { "epoch": 0.4394514767932489, "grad_norm": 0.67578125, "learning_rate": 9.010017540452228e-07, "loss": 0.28434619307518005, "step": 2083, "token_acc": 0.92463509085493 }, { "epoch": 0.43966244725738396, "grad_norm": 0.765625, "learning_rate": 9.009007335942058e-07, "loss": 0.28264421224594116, "step": 2084, "token_acc": 0.9226569608735213 }, { "epoch": 0.439873417721519, "grad_norm": 0.6484375, "learning_rate": 9.007996672967968e-07, "loss": 0.2543209493160248, "step": 2085, "token_acc": 0.9261838440111421 }, { "epoch": 0.440084388185654, "grad_norm": 0.66015625, "learning_rate": 9.006985551645533e-07, "loss": 0.25419509410858154, "step": 2086, "token_acc": 0.9327782917052112 }, { "epoch": 0.440295358649789, "grad_norm": 0.625, "learning_rate": 9.005973972090386e-07, "loss": 0.27107924222946167, "step": 2087, "token_acc": 0.925207756232687 }, { "epoch": 0.44050632911392407, "grad_norm": 0.64453125, "learning_rate": 9.004961934418209e-07, "loss": 0.23417389392852783, "step": 2088, "token_acc": 0.936888608393815 }, { "epoch": 0.44071729957805905, "grad_norm": 0.67578125, "learning_rate": 9.003949438744738e-07, "loss": 0.24583196640014648, "step": 2089, "token_acc": 0.9249321676213446 }, { "epoch": 0.4409282700421941, "grad_norm": 0.7578125, "learning_rate": 9.002936485185758e-07, "loss": 0.30659905076026917, "step": 2090, "token_acc": 0.9176837922225376 }, { "epoch": 0.44113924050632913, "grad_norm": 0.8125, "learning_rate": 9.001923073857112e-07, "loss": 0.29318469762802124, "step": 2091, "token_acc": 0.9294618425053207 }, { "epoch": 0.4413502109704641, "grad_norm": 1.140625, "learning_rate": 9.000909204874689e-07, "loss": 0.2595710754394531, "step": 2092, "token_acc": 0.9296212549462973 }, { "epoch": 0.44156118143459916, "grad_norm": 0.95703125, "learning_rate": 8.999894878354435e-07, "loss": 0.21192219853401184, "step": 2093, "token_acc": 0.9372086646558816 }, { "epoch": 0.4417721518987342, "grad_norm": 0.86328125, "learning_rate": 8.998880094412347e-07, "loss": 0.22217011451721191, "step": 2094, "token_acc": 0.9376406300225008 }, { "epoch": 0.4419831223628692, "grad_norm": 0.70703125, "learning_rate": 8.997864853164474e-07, "loss": 0.23877094686031342, "step": 2095, "token_acc": 0.9314504265960577 }, { "epoch": 0.4421940928270042, "grad_norm": 0.7421875, "learning_rate": 8.996849154726918e-07, "loss": 0.2676827907562256, "step": 2096, "token_acc": 0.9252487227749395 }, { "epoch": 0.44240506329113927, "grad_norm": 0.8203125, "learning_rate": 8.995832999215832e-07, "loss": 0.2686436176300049, "step": 2097, "token_acc": 0.924968474148802 }, { "epoch": 0.44261603375527425, "grad_norm": 0.66796875, "learning_rate": 8.994816386747421e-07, "loss": 0.23035119473934174, "step": 2098, "token_acc": 0.9348214285714286 }, { "epoch": 0.4428270042194093, "grad_norm": 0.765625, "learning_rate": 8.993799317437946e-07, "loss": 0.23237064480781555, "step": 2099, "token_acc": 0.930239099859353 }, { "epoch": 0.4430379746835443, "grad_norm": 0.7734375, "learning_rate": 8.992781791403714e-07, "loss": 0.26407796144485474, "step": 2100, "token_acc": 0.9246745195288283 }, { "epoch": 0.4432489451476793, "grad_norm": 0.66796875, "learning_rate": 8.991763808761091e-07, "loss": 0.21231698989868164, "step": 2101, "token_acc": 0.9392376038979651 }, { "epoch": 0.44345991561181436, "grad_norm": 0.6796875, "learning_rate": 8.990745369626491e-07, "loss": 0.23250027000904083, "step": 2102, "token_acc": 0.9328899637243047 }, { "epoch": 0.44367088607594934, "grad_norm": 0.68359375, "learning_rate": 8.989726474116379e-07, "loss": 0.280051052570343, "step": 2103, "token_acc": 0.9312169312169312 }, { "epoch": 0.4438818565400844, "grad_norm": 0.55859375, "learning_rate": 8.988707122347277e-07, "loss": 0.24222272634506226, "step": 2104, "token_acc": 0.9333132166566084 }, { "epoch": 0.4440928270042194, "grad_norm": 1.0546875, "learning_rate": 8.987687314435754e-07, "loss": 0.3396711051464081, "step": 2105, "token_acc": 0.9100864553314121 }, { "epoch": 0.4443037974683544, "grad_norm": 0.68359375, "learning_rate": 8.986667050498435e-07, "loss": 0.2831103503704071, "step": 2106, "token_acc": 0.9225251076040172 }, { "epoch": 0.44451476793248945, "grad_norm": 0.8125, "learning_rate": 8.985646330651998e-07, "loss": 0.22755396366119385, "step": 2107, "token_acc": 0.9360557138334916 }, { "epoch": 0.4447257383966245, "grad_norm": 0.90234375, "learning_rate": 8.984625155013168e-07, "loss": 0.26585981249809265, "step": 2108, "token_acc": 0.9303356554781508 }, { "epoch": 0.4449367088607595, "grad_norm": 0.7265625, "learning_rate": 8.983603523698726e-07, "loss": 0.23549145460128784, "step": 2109, "token_acc": 0.9315028901734104 }, { "epoch": 0.4451476793248945, "grad_norm": 0.58203125, "learning_rate": 8.982581436825503e-07, "loss": 0.20205682516098022, "step": 2110, "token_acc": 0.9398826979472141 }, { "epoch": 0.44535864978902956, "grad_norm": 0.625, "learning_rate": 8.981558894510386e-07, "loss": 0.23253054916858673, "step": 2111, "token_acc": 0.939816281279696 }, { "epoch": 0.44556962025316454, "grad_norm": 0.75, "learning_rate": 8.980535896870308e-07, "loss": 0.20445546507835388, "step": 2112, "token_acc": 0.9392712550607287 }, { "epoch": 0.4457805907172996, "grad_norm": 0.828125, "learning_rate": 8.979512444022261e-07, "loss": 0.24986101686954498, "step": 2113, "token_acc": 0.9240806642941874 }, { "epoch": 0.4459915611814346, "grad_norm": 0.64453125, "learning_rate": 8.978488536083281e-07, "loss": 0.2899099886417389, "step": 2114, "token_acc": 0.9161184210526315 }, { "epoch": 0.4462025316455696, "grad_norm": 0.671875, "learning_rate": 8.977464173170466e-07, "loss": 0.24575838446617126, "step": 2115, "token_acc": 0.9335180055401662 }, { "epoch": 0.44641350210970465, "grad_norm": 0.77734375, "learning_rate": 8.976439355400956e-07, "loss": 0.2049473524093628, "step": 2116, "token_acc": 0.942992125984252 }, { "epoch": 0.44662447257383964, "grad_norm": 0.734375, "learning_rate": 8.975414082891951e-07, "loss": 0.23525948822498322, "step": 2117, "token_acc": 0.9304901689282747 }, { "epoch": 0.4468354430379747, "grad_norm": 0.78515625, "learning_rate": 8.974388355760698e-07, "loss": 0.2617686688899994, "step": 2118, "token_acc": 0.9258320126782884 }, { "epoch": 0.4470464135021097, "grad_norm": 0.87109375, "learning_rate": 8.973362174124497e-07, "loss": 0.28202760219573975, "step": 2119, "token_acc": 0.9148103552077061 }, { "epoch": 0.4472573839662447, "grad_norm": 0.65234375, "learning_rate": 8.972335538100699e-07, "loss": 0.207890123128891, "step": 2120, "token_acc": 0.9443351138599944 }, { "epoch": 0.44746835443037974, "grad_norm": 0.59765625, "learning_rate": 8.971308447806713e-07, "loss": 0.2131594568490982, "step": 2121, "token_acc": 0.938488071284852 }, { "epoch": 0.4476793248945148, "grad_norm": 0.77734375, "learning_rate": 8.970280903359992e-07, "loss": 0.2549141049385071, "step": 2122, "token_acc": 0.9296018465089441 }, { "epoch": 0.44789029535864977, "grad_norm": 0.875, "learning_rate": 8.969252904878048e-07, "loss": 0.20591279864311218, "step": 2123, "token_acc": 0.9394221808014911 }, { "epoch": 0.4481012658227848, "grad_norm": 0.59765625, "learning_rate": 8.968224452478437e-07, "loss": 0.24989059567451477, "step": 2124, "token_acc": 0.930804248861912 }, { "epoch": 0.44831223628691985, "grad_norm": 0.83203125, "learning_rate": 8.967195546278772e-07, "loss": 0.2711721658706665, "step": 2125, "token_acc": 0.9283995186522263 }, { "epoch": 0.44852320675105484, "grad_norm": 0.7578125, "learning_rate": 8.966166186396719e-07, "loss": 0.2819213271141052, "step": 2126, "token_acc": 0.9204771371769384 }, { "epoch": 0.4487341772151899, "grad_norm": 0.5546875, "learning_rate": 8.965136372949995e-07, "loss": 0.20328068733215332, "step": 2127, "token_acc": 0.94021270480023 }, { "epoch": 0.4489451476793249, "grad_norm": 0.6796875, "learning_rate": 8.964106106056363e-07, "loss": 0.24182623624801636, "step": 2128, "token_acc": 0.9245777518928363 }, { "epoch": 0.4491561181434599, "grad_norm": 0.7109375, "learning_rate": 8.96307538583365e-07, "loss": 0.25695544481277466, "step": 2129, "token_acc": 0.9318695511785599 }, { "epoch": 0.44936708860759494, "grad_norm": 0.61328125, "learning_rate": 8.96204421239972e-07, "loss": 0.24647077918052673, "step": 2130, "token_acc": 0.9301675977653632 }, { "epoch": 0.44957805907173, "grad_norm": 0.71484375, "learning_rate": 8.961012585872501e-07, "loss": 0.264778733253479, "step": 2131, "token_acc": 0.9276729559748428 }, { "epoch": 0.44978902953586497, "grad_norm": 0.69140625, "learning_rate": 8.959980506369966e-07, "loss": 0.22039973735809326, "step": 2132, "token_acc": 0.9324162679425837 }, { "epoch": 0.45, "grad_norm": 0.7890625, "learning_rate": 8.958947974010145e-07, "loss": 0.27548354864120483, "step": 2133, "token_acc": 0.9247431269091919 }, { "epoch": 0.450210970464135, "grad_norm": 0.65625, "learning_rate": 8.957914988911113e-07, "loss": 0.2655940651893616, "step": 2134, "token_acc": 0.9300212056952438 }, { "epoch": 0.45042194092827004, "grad_norm": 0.70703125, "learning_rate": 8.956881551191002e-07, "loss": 0.2579268217086792, "step": 2135, "token_acc": 0.9271859146523426 }, { "epoch": 0.4506329113924051, "grad_norm": 0.578125, "learning_rate": 8.955847660967996e-07, "loss": 0.21672308444976807, "step": 2136, "token_acc": 0.9414893617021277 }, { "epoch": 0.45084388185654006, "grad_norm": 0.79296875, "learning_rate": 8.95481331836033e-07, "loss": 0.2529594302177429, "step": 2137, "token_acc": 0.9335825186889016 }, { "epoch": 0.4510548523206751, "grad_norm": 0.703125, "learning_rate": 8.953778523486285e-07, "loss": 0.2122552990913391, "step": 2138, "token_acc": 0.9404648201209805 }, { "epoch": 0.45126582278481014, "grad_norm": 0.66796875, "learning_rate": 8.952743276464203e-07, "loss": 0.22124779224395752, "step": 2139, "token_acc": 0.9332943241661791 }, { "epoch": 0.45147679324894513, "grad_norm": 0.6796875, "learning_rate": 8.95170757741247e-07, "loss": 0.27871444821357727, "step": 2140, "token_acc": 0.9219877103927331 }, { "epoch": 0.45168776371308017, "grad_norm": 1.171875, "learning_rate": 8.95067142644953e-07, "loss": 0.2621363401412964, "step": 2141, "token_acc": 0.9252486365094642 }, { "epoch": 0.4518987341772152, "grad_norm": 0.7890625, "learning_rate": 8.949634823693874e-07, "loss": 0.25620999932289124, "step": 2142, "token_acc": 0.925171939477304 }, { "epoch": 0.4521097046413502, "grad_norm": 0.87109375, "learning_rate": 8.948597769264046e-07, "loss": 0.25033503770828247, "step": 2143, "token_acc": 0.9312388855957321 }, { "epoch": 0.45232067510548524, "grad_norm": 0.68359375, "learning_rate": 8.947560263278641e-07, "loss": 0.27634549140930176, "step": 2144, "token_acc": 0.9190049175585768 }, { "epoch": 0.4525316455696203, "grad_norm": 0.84765625, "learning_rate": 8.94652230585631e-07, "loss": 0.2885071337223053, "step": 2145, "token_acc": 0.9198913781398507 }, { "epoch": 0.45274261603375526, "grad_norm": 0.70703125, "learning_rate": 8.94548389711575e-07, "loss": 0.25526076555252075, "step": 2146, "token_acc": 0.9299108515216723 }, { "epoch": 0.4529535864978903, "grad_norm": 1.21875, "learning_rate": 8.944445037175712e-07, "loss": 0.27157655358314514, "step": 2147, "token_acc": 0.922173274596182 }, { "epoch": 0.4531645569620253, "grad_norm": 0.75, "learning_rate": 8.943405726154997e-07, "loss": 0.23408940434455872, "step": 2148, "token_acc": 0.9387211367673179 }, { "epoch": 0.45337552742616033, "grad_norm": 0.72265625, "learning_rate": 8.942365964172462e-07, "loss": 0.290885865688324, "step": 2149, "token_acc": 0.9250480461242793 }, { "epoch": 0.45358649789029537, "grad_norm": 0.85546875, "learning_rate": 8.94132575134701e-07, "loss": 0.2796597182750702, "step": 2150, "token_acc": 0.919929349425964 }, { "epoch": 0.45379746835443036, "grad_norm": 0.75390625, "learning_rate": 8.940285087797601e-07, "loss": 0.2531481981277466, "step": 2151, "token_acc": 0.9296440489432704 }, { "epoch": 0.4540084388185654, "grad_norm": 0.703125, "learning_rate": 8.93924397364324e-07, "loss": 0.26309943199157715, "step": 2152, "token_acc": 0.9271386430678467 }, { "epoch": 0.45421940928270044, "grad_norm": 0.875, "learning_rate": 8.938202409002991e-07, "loss": 0.31176134943962097, "step": 2153, "token_acc": 0.9181514476614699 }, { "epoch": 0.4544303797468354, "grad_norm": 0.578125, "learning_rate": 8.937160393995962e-07, "loss": 0.24332989752292633, "step": 2154, "token_acc": 0.9279794226921978 }, { "epoch": 0.45464135021097046, "grad_norm": 0.8359375, "learning_rate": 8.93611792874132e-07, "loss": 0.22825264930725098, "step": 2155, "token_acc": 0.9365280863218026 }, { "epoch": 0.4548523206751055, "grad_norm": 0.703125, "learning_rate": 8.935075013358276e-07, "loss": 0.25085559487342834, "step": 2156, "token_acc": 0.9258142340168878 }, { "epoch": 0.4550632911392405, "grad_norm": 0.7109375, "learning_rate": 8.9340316479661e-07, "loss": 0.2757166028022766, "step": 2157, "token_acc": 0.9258639910813824 }, { "epoch": 0.45527426160337553, "grad_norm": 0.578125, "learning_rate": 8.932987832684106e-07, "loss": 0.24141676723957062, "step": 2158, "token_acc": 0.9337490257209665 }, { "epoch": 0.45548523206751057, "grad_norm": 0.70703125, "learning_rate": 8.931943567631666e-07, "loss": 0.30009013414382935, "step": 2159, "token_acc": 0.9196307358131958 }, { "epoch": 0.45569620253164556, "grad_norm": 0.734375, "learning_rate": 8.930898852928197e-07, "loss": 0.24191243946552277, "step": 2160, "token_acc": 0.9262013729977117 }, { "epoch": 0.4559071729957806, "grad_norm": 0.75, "learning_rate": 8.929853688693176e-07, "loss": 0.27977827191352844, "step": 2161, "token_acc": 0.9263373282665888 }, { "epoch": 0.45611814345991564, "grad_norm": 0.828125, "learning_rate": 8.928808075046122e-07, "loss": 0.2501257061958313, "step": 2162, "token_acc": 0.9353503184713375 }, { "epoch": 0.4563291139240506, "grad_norm": 0.7578125, "learning_rate": 8.927762012106612e-07, "loss": 0.2849234938621521, "step": 2163, "token_acc": 0.9206946454413892 }, { "epoch": 0.45654008438818566, "grad_norm": 1.0859375, "learning_rate": 8.926715499994272e-07, "loss": 0.2490084171295166, "step": 2164, "token_acc": 0.9274475524475524 }, { "epoch": 0.45675105485232065, "grad_norm": 1.015625, "learning_rate": 8.925668538828779e-07, "loss": 0.3561903238296509, "step": 2165, "token_acc": 0.9056540649046504 }, { "epoch": 0.4569620253164557, "grad_norm": 0.7265625, "learning_rate": 8.924621128729862e-07, "loss": 0.25659793615341187, "step": 2166, "token_acc": 0.9307905259920025 }, { "epoch": 0.45717299578059073, "grad_norm": 1.03125, "learning_rate": 8.923573269817299e-07, "loss": 0.2436445951461792, "step": 2167, "token_acc": 0.9285714285714286 }, { "epoch": 0.4573839662447257, "grad_norm": 0.765625, "learning_rate": 8.922524962210927e-07, "loss": 0.22715000808238983, "step": 2168, "token_acc": 0.9323599523241954 }, { "epoch": 0.45759493670886076, "grad_norm": 0.6484375, "learning_rate": 8.921476206030625e-07, "loss": 0.25212275981903076, "step": 2169, "token_acc": 0.9287425149700599 }, { "epoch": 0.4578059071729958, "grad_norm": 0.8046875, "learning_rate": 8.920427001396326e-07, "loss": 0.29239213466644287, "step": 2170, "token_acc": 0.9237599510104103 }, { "epoch": 0.4580168776371308, "grad_norm": 0.80859375, "learning_rate": 8.919377348428018e-07, "loss": 0.23967140913009644, "step": 2171, "token_acc": 0.9340753424657534 }, { "epoch": 0.4582278481012658, "grad_norm": 0.796875, "learning_rate": 8.918327247245737e-07, "loss": 0.31007927656173706, "step": 2172, "token_acc": 0.9201277955271565 }, { "epoch": 0.45843881856540086, "grad_norm": 0.640625, "learning_rate": 8.917276697969572e-07, "loss": 0.22768999636173248, "step": 2173, "token_acc": 0.9381785283474066 }, { "epoch": 0.45864978902953585, "grad_norm": 0.68359375, "learning_rate": 8.916225700719659e-07, "loss": 0.2603572607040405, "step": 2174, "token_acc": 0.9233830845771144 }, { "epoch": 0.4588607594936709, "grad_norm": 0.71484375, "learning_rate": 8.915174255616191e-07, "loss": 0.22243662178516388, "step": 2175, "token_acc": 0.9349835967790039 }, { "epoch": 0.45907172995780593, "grad_norm": 0.8046875, "learning_rate": 8.91412236277941e-07, "loss": 0.31355756521224976, "step": 2176, "token_acc": 0.9138931297709924 }, { "epoch": 0.4592827004219409, "grad_norm": 0.72265625, "learning_rate": 8.913070022329608e-07, "loss": 0.2620652914047241, "step": 2177, "token_acc": 0.9295291549734955 }, { "epoch": 0.45949367088607596, "grad_norm": 0.7265625, "learning_rate": 8.912017234387128e-07, "loss": 0.2871960699558258, "step": 2178, "token_acc": 0.9183874139626352 }, { "epoch": 0.459704641350211, "grad_norm": 0.734375, "learning_rate": 8.910963999072367e-07, "loss": 0.2123001515865326, "step": 2179, "token_acc": 0.9378579610538373 }, { "epoch": 0.459915611814346, "grad_norm": 0.77734375, "learning_rate": 8.909910316505769e-07, "loss": 0.2081177532672882, "step": 2180, "token_acc": 0.9405046480743692 }, { "epoch": 0.460126582278481, "grad_norm": 0.7421875, "learning_rate": 8.908856186807834e-07, "loss": 0.2759869694709778, "step": 2181, "token_acc": 0.9224086279209107 }, { "epoch": 0.460337552742616, "grad_norm": 0.80859375, "learning_rate": 8.907801610099111e-07, "loss": 0.2771528363227844, "step": 2182, "token_acc": 0.9218657159833631 }, { "epoch": 0.46054852320675105, "grad_norm": 0.7109375, "learning_rate": 8.906746586500196e-07, "loss": 0.2757055163383484, "step": 2183, "token_acc": 0.9235979409374153 }, { "epoch": 0.4607594936708861, "grad_norm": 0.875, "learning_rate": 8.905691116131745e-07, "loss": 0.28040647506713867, "step": 2184, "token_acc": 0.923297262889879 }, { "epoch": 0.4609704641350211, "grad_norm": 0.76953125, "learning_rate": 8.904635199114457e-07, "loss": 0.2516011595726013, "step": 2185, "token_acc": 0.9299610894941635 }, { "epoch": 0.4611814345991561, "grad_norm": 0.59765625, "learning_rate": 8.903578835569084e-07, "loss": 0.22504326701164246, "step": 2186, "token_acc": 0.9359619686800895 }, { "epoch": 0.46139240506329116, "grad_norm": 1.3203125, "learning_rate": 8.902522025616432e-07, "loss": 0.3159291744232178, "step": 2187, "token_acc": 0.9180018850141376 }, { "epoch": 0.46160337552742614, "grad_norm": 0.70703125, "learning_rate": 8.901464769377356e-07, "loss": 0.2836325764656067, "step": 2188, "token_acc": 0.9164963503649635 }, { "epoch": 0.4618143459915612, "grad_norm": 0.76953125, "learning_rate": 8.900407066972762e-07, "loss": 0.25892630219459534, "step": 2189, "token_acc": 0.9296969696969697 }, { "epoch": 0.4620253164556962, "grad_norm": 0.859375, "learning_rate": 8.899348918523607e-07, "loss": 0.30012282729148865, "step": 2190, "token_acc": 0.9179734620024126 }, { "epoch": 0.4622362869198312, "grad_norm": 0.640625, "learning_rate": 8.898290324150899e-07, "loss": 0.27340683341026306, "step": 2191, "token_acc": 0.9237209302325582 }, { "epoch": 0.46244725738396625, "grad_norm": 0.72265625, "learning_rate": 8.897231283975697e-07, "loss": 0.24067184329032898, "step": 2192, "token_acc": 0.931273462315911 }, { "epoch": 0.4626582278481013, "grad_norm": 0.63671875, "learning_rate": 8.896171798119114e-07, "loss": 0.2594500184059143, "step": 2193, "token_acc": 0.9323894828084369 }, { "epoch": 0.4628691983122363, "grad_norm": 0.6953125, "learning_rate": 8.895111866702307e-07, "loss": 0.22804009914398193, "step": 2194, "token_acc": 0.9370650529500757 }, { "epoch": 0.4630801687763713, "grad_norm": 0.609375, "learning_rate": 8.894051489846491e-07, "loss": 0.23836784064769745, "step": 2195, "token_acc": 0.9367228516164686 }, { "epoch": 0.46329113924050636, "grad_norm": 1.203125, "learning_rate": 8.892990667672927e-07, "loss": 0.24842369556427002, "step": 2196, "token_acc": 0.9326230021936697 }, { "epoch": 0.46350210970464134, "grad_norm": 0.84375, "learning_rate": 8.891929400302931e-07, "loss": 0.3193819522857666, "step": 2197, "token_acc": 0.9160748870238864 }, { "epoch": 0.4637130801687764, "grad_norm": 0.67578125, "learning_rate": 8.890867687857867e-07, "loss": 0.24350924789905548, "step": 2198, "token_acc": 0.9309139784946237 }, { "epoch": 0.46392405063291137, "grad_norm": 0.7890625, "learning_rate": 8.889805530459152e-07, "loss": 0.25490716099739075, "step": 2199, "token_acc": 0.9314400458978772 }, { "epoch": 0.4641350210970464, "grad_norm": 0.79296875, "learning_rate": 8.88874292822825e-07, "loss": 0.2642059028148651, "step": 2200, "token_acc": 0.9267192784667418 }, { "epoch": 0.4641350210970464, "eval_loss": 0.43365225195884705, "eval_runtime": 245.8009, "eval_samples_per_second": 137.123, "eval_steps_per_second": 2.144, "eval_token_acc": 0.8991721259983846, "step": 2200 }, { "epoch": 0.46434599156118145, "grad_norm": 0.73046875, "learning_rate": 8.88767988128668e-07, "loss": 0.26544106006622314, "step": 2201, "token_acc": 0.928051267113312 }, { "epoch": 0.46455696202531643, "grad_norm": 0.75390625, "learning_rate": 8.886616389756009e-07, "loss": 0.2734348773956299, "step": 2202, "token_acc": 0.9289004029936673 }, { "epoch": 0.4647679324894515, "grad_norm": 0.79296875, "learning_rate": 8.885552453757861e-07, "loss": 0.23951369524002075, "step": 2203, "token_acc": 0.9345703125 }, { "epoch": 0.4649789029535865, "grad_norm": 0.67578125, "learning_rate": 8.884488073413902e-07, "loss": 0.2713561952114105, "step": 2204, "token_acc": 0.9230550823395798 }, { "epoch": 0.4651898734177215, "grad_norm": 0.76171875, "learning_rate": 8.883423248845853e-07, "loss": 0.24950376152992249, "step": 2205, "token_acc": 0.9314121037463977 }, { "epoch": 0.46540084388185654, "grad_norm": 0.71484375, "learning_rate": 8.882357980175488e-07, "loss": 0.2450135052204132, "step": 2206, "token_acc": 0.9293308317698562 }, { "epoch": 0.4656118143459916, "grad_norm": 0.78515625, "learning_rate": 8.881292267524626e-07, "loss": 0.2385254055261612, "step": 2207, "token_acc": 0.9313612866168869 }, { "epoch": 0.46582278481012657, "grad_norm": 0.66015625, "learning_rate": 8.880226111015143e-07, "loss": 0.32733994722366333, "step": 2208, "token_acc": 0.9151018249140439 }, { "epoch": 0.4660337552742616, "grad_norm": 0.80078125, "learning_rate": 8.879159510768964e-07, "loss": 0.24726706743240356, "step": 2209, "token_acc": 0.9348075818495117 }, { "epoch": 0.46624472573839665, "grad_norm": 0.6875, "learning_rate": 8.878092466908058e-07, "loss": 0.22801968455314636, "step": 2210, "token_acc": 0.9347242921013413 }, { "epoch": 0.46645569620253163, "grad_norm": 0.74609375, "learning_rate": 8.877024979554457e-07, "loss": 0.27816176414489746, "step": 2211, "token_acc": 0.925 }, { "epoch": 0.4666666666666667, "grad_norm": 0.93359375, "learning_rate": 8.875957048830234e-07, "loss": 0.24744361639022827, "step": 2212, "token_acc": 0.9312788906009245 }, { "epoch": 0.4668776371308017, "grad_norm": 0.78515625, "learning_rate": 8.874888674857518e-07, "loss": 0.2620353400707245, "step": 2213, "token_acc": 0.9281542056074766 }, { "epoch": 0.4670886075949367, "grad_norm": 0.84765625, "learning_rate": 8.873819857758485e-07, "loss": 0.28596949577331543, "step": 2214, "token_acc": 0.9214586255259467 }, { "epoch": 0.46729957805907174, "grad_norm": 0.6875, "learning_rate": 8.872750597655363e-07, "loss": 0.2453969120979309, "step": 2215, "token_acc": 0.9373313343328336 }, { "epoch": 0.4675105485232067, "grad_norm": 1.015625, "learning_rate": 8.87168089467043e-07, "loss": 0.29788151383399963, "step": 2216, "token_acc": 0.920080206244629 }, { "epoch": 0.46772151898734177, "grad_norm": 0.62109375, "learning_rate": 8.870610748926018e-07, "loss": 0.2774806022644043, "step": 2217, "token_acc": 0.9222657426935824 }, { "epoch": 0.4679324894514768, "grad_norm": 0.8046875, "learning_rate": 8.869540160544507e-07, "loss": 0.24728184938430786, "step": 2218, "token_acc": 0.9338616275525754 }, { "epoch": 0.4681434599156118, "grad_norm": 0.58203125, "learning_rate": 8.868469129648327e-07, "loss": 0.25519004464149475, "step": 2219, "token_acc": 0.930442919525889 }, { "epoch": 0.46835443037974683, "grad_norm": 0.64453125, "learning_rate": 8.867397656359959e-07, "loss": 0.2387292981147766, "step": 2220, "token_acc": 0.9319486198414867 }, { "epoch": 0.4685654008438819, "grad_norm": 0.73046875, "learning_rate": 8.866325740801937e-07, "loss": 0.2573626637458801, "step": 2221, "token_acc": 0.9276536312849162 }, { "epoch": 0.46877637130801686, "grad_norm": 0.78515625, "learning_rate": 8.86525338309684e-07, "loss": 0.26939404010772705, "step": 2222, "token_acc": 0.9291091593475533 }, { "epoch": 0.4689873417721519, "grad_norm": 0.75, "learning_rate": 8.864180583367304e-07, "loss": 0.27353376150131226, "step": 2223, "token_acc": 0.9237603897965033 }, { "epoch": 0.46919831223628694, "grad_norm": 0.76171875, "learning_rate": 8.863107341736014e-07, "loss": 0.299623966217041, "step": 2224, "token_acc": 0.9207062600321028 }, { "epoch": 0.4694092827004219, "grad_norm": 0.70703125, "learning_rate": 8.862033658325701e-07, "loss": 0.2493649572134018, "step": 2225, "token_acc": 0.9290941206156952 }, { "epoch": 0.46962025316455697, "grad_norm": 0.65625, "learning_rate": 8.860959533259151e-07, "loss": 0.26844432950019836, "step": 2226, "token_acc": 0.9202328966521106 }, { "epoch": 0.469831223628692, "grad_norm": 0.71875, "learning_rate": 8.859884966659199e-07, "loss": 0.2697129249572754, "step": 2227, "token_acc": 0.9254996971532404 }, { "epoch": 0.470042194092827, "grad_norm": 0.77734375, "learning_rate": 8.858809958648732e-07, "loss": 0.2647143602371216, "step": 2228, "token_acc": 0.9287298946200776 }, { "epoch": 0.47025316455696203, "grad_norm": 0.7421875, "learning_rate": 8.857734509350686e-07, "loss": 0.2677483558654785, "step": 2229, "token_acc": 0.9282627484874676 }, { "epoch": 0.4704641350210971, "grad_norm": 0.59765625, "learning_rate": 8.856658618888047e-07, "loss": 0.24569208920001984, "step": 2230, "token_acc": 0.9334133173365327 }, { "epoch": 0.47067510548523206, "grad_norm": 0.64453125, "learning_rate": 8.855582287383851e-07, "loss": 0.24396231770515442, "step": 2231, "token_acc": 0.9277628032345013 }, { "epoch": 0.4708860759493671, "grad_norm": 0.68359375, "learning_rate": 8.854505514961189e-07, "loss": 0.2265896052122116, "step": 2232, "token_acc": 0.9303062302006336 }, { "epoch": 0.4710970464135021, "grad_norm": 0.5625, "learning_rate": 8.853428301743197e-07, "loss": 0.2401953637599945, "step": 2233, "token_acc": 0.9332171893147503 }, { "epoch": 0.4713080168776371, "grad_norm": 0.79296875, "learning_rate": 8.852350647853062e-07, "loss": 0.2581579089164734, "step": 2234, "token_acc": 0.9284288565725691 }, { "epoch": 0.47151898734177217, "grad_norm": 0.5859375, "learning_rate": 8.851272553414026e-07, "loss": 0.22068506479263306, "step": 2235, "token_acc": 0.9346266276906723 }, { "epoch": 0.47172995780590715, "grad_norm": 0.68359375, "learning_rate": 8.850194018549375e-07, "loss": 0.2433212399482727, "step": 2236, "token_acc": 0.9351824817518248 }, { "epoch": 0.4719409282700422, "grad_norm": 0.734375, "learning_rate": 8.849115043382451e-07, "loss": 0.2532302737236023, "step": 2237, "token_acc": 0.9301283965362794 }, { "epoch": 0.47215189873417723, "grad_norm": 0.78125, "learning_rate": 8.848035628036643e-07, "loss": 0.2520287036895752, "step": 2238, "token_acc": 0.9295994993742178 }, { "epoch": 0.4723628691983122, "grad_norm": 0.66796875, "learning_rate": 8.846955772635392e-07, "loss": 0.24156486988067627, "step": 2239, "token_acc": 0.9334817701085444 }, { "epoch": 0.47257383966244726, "grad_norm": 0.87109375, "learning_rate": 8.845875477302188e-07, "loss": 0.30612218379974365, "step": 2240, "token_acc": 0.920201720557698 }, { "epoch": 0.4727848101265823, "grad_norm": 0.63671875, "learning_rate": 8.844794742160572e-07, "loss": 0.262297123670578, "step": 2241, "token_acc": 0.9226450405489707 }, { "epoch": 0.4729957805907173, "grad_norm": 0.66796875, "learning_rate": 8.843713567334134e-07, "loss": 0.26972663402557373, "step": 2242, "token_acc": 0.9206349206349206 }, { "epoch": 0.4732067510548523, "grad_norm": 0.90625, "learning_rate": 8.842631952946518e-07, "loss": 0.23537132143974304, "step": 2243, "token_acc": 0.9285505978419364 }, { "epoch": 0.47341772151898737, "grad_norm": 0.828125, "learning_rate": 8.841549899121413e-07, "loss": 0.2477632761001587, "step": 2244, "token_acc": 0.9291384886798001 }, { "epoch": 0.47362869198312235, "grad_norm": 0.65234375, "learning_rate": 8.840467405982564e-07, "loss": 0.23782771825790405, "step": 2245, "token_acc": 0.9316856428769786 }, { "epoch": 0.4738396624472574, "grad_norm": 0.875, "learning_rate": 8.839384473653761e-07, "loss": 0.257307767868042, "step": 2246, "token_acc": 0.9285321605277626 }, { "epoch": 0.4740506329113924, "grad_norm": 0.765625, "learning_rate": 8.838301102258847e-07, "loss": 0.2552265226840973, "step": 2247, "token_acc": 0.9269177126917713 }, { "epoch": 0.4742616033755274, "grad_norm": 0.67578125, "learning_rate": 8.837217291921715e-07, "loss": 0.25588980317115784, "step": 2248, "token_acc": 0.9309033280507132 }, { "epoch": 0.47447257383966246, "grad_norm": 0.70703125, "learning_rate": 8.836133042766308e-07, "loss": 0.2541846036911011, "step": 2249, "token_acc": 0.9296296296296296 }, { "epoch": 0.47468354430379744, "grad_norm": 0.70703125, "learning_rate": 8.835048354916617e-07, "loss": 0.27813607454299927, "step": 2250, "token_acc": 0.9220496894409937 }, { "epoch": 0.4748945147679325, "grad_norm": 0.76953125, "learning_rate": 8.833963228496689e-07, "loss": 0.27021002769470215, "step": 2251, "token_acc": 0.9276139410187667 }, { "epoch": 0.4751054852320675, "grad_norm": 0.859375, "learning_rate": 8.832877663630615e-07, "loss": 0.28146517276763916, "step": 2252, "token_acc": 0.9292297564186965 }, { "epoch": 0.4753164556962025, "grad_norm": 1.0625, "learning_rate": 8.831791660442538e-07, "loss": 0.2347206473350525, "step": 2253, "token_acc": 0.9354059764563839 }, { "epoch": 0.47552742616033755, "grad_norm": 1.1328125, "learning_rate": 8.830705219056654e-07, "loss": 0.2681208550930023, "step": 2254, "token_acc": 0.928 }, { "epoch": 0.4757383966244726, "grad_norm": 0.7578125, "learning_rate": 8.829618339597205e-07, "loss": 0.27639251947402954, "step": 2255, "token_acc": 0.923466516601013 }, { "epoch": 0.4759493670886076, "grad_norm": 0.71875, "learning_rate": 8.828531022188485e-07, "loss": 0.2656307816505432, "step": 2256, "token_acc": 0.9222468179302712 }, { "epoch": 0.4761603375527426, "grad_norm": 0.7578125, "learning_rate": 8.827443266954837e-07, "loss": 0.25257235765457153, "step": 2257, "token_acc": 0.9298937784522003 }, { "epoch": 0.47637130801687766, "grad_norm": 0.73046875, "learning_rate": 8.826355074020659e-07, "loss": 0.2333572506904602, "step": 2258, "token_acc": 0.9334500875656743 }, { "epoch": 0.47658227848101264, "grad_norm": 0.77734375, "learning_rate": 8.82526644351039e-07, "loss": 0.21845462918281555, "step": 2259, "token_acc": 0.9393468118195957 }, { "epoch": 0.4767932489451477, "grad_norm": 0.6640625, "learning_rate": 8.824177375548527e-07, "loss": 0.28741222620010376, "step": 2260, "token_acc": 0.9261802575107296 }, { "epoch": 0.4770042194092827, "grad_norm": 0.7890625, "learning_rate": 8.823087870259614e-07, "loss": 0.26964008808135986, "step": 2261, "token_acc": 0.9205552274069698 }, { "epoch": 0.4772151898734177, "grad_norm": 0.77734375, "learning_rate": 8.821997927768243e-07, "loss": 0.2560656666755676, "step": 2262, "token_acc": 0.930405019965773 }, { "epoch": 0.47742616033755275, "grad_norm": 0.6875, "learning_rate": 8.820907548199061e-07, "loss": 0.2408961057662964, "step": 2263, "token_acc": 0.9322501532801962 }, { "epoch": 0.47763713080168774, "grad_norm": 0.70703125, "learning_rate": 8.819816731676761e-07, "loss": 0.2337110936641693, "step": 2264, "token_acc": 0.9364942528735632 }, { "epoch": 0.4778481012658228, "grad_norm": 0.6796875, "learning_rate": 8.818725478326087e-07, "loss": 0.25228989124298096, "step": 2265, "token_acc": 0.9346354166666667 }, { "epoch": 0.4780590717299578, "grad_norm": 0.73046875, "learning_rate": 8.817633788271835e-07, "loss": 0.2806975245475769, "step": 2266, "token_acc": 0.9253891572732152 }, { "epoch": 0.4782700421940928, "grad_norm": 0.98828125, "learning_rate": 8.816541661638844e-07, "loss": 0.3271723687648773, "step": 2267, "token_acc": 0.9130695443645084 }, { "epoch": 0.47848101265822784, "grad_norm": 0.65625, "learning_rate": 8.815449098552012e-07, "loss": 0.25784432888031006, "step": 2268, "token_acc": 0.9243744728703964 }, { "epoch": 0.4786919831223629, "grad_norm": 0.65234375, "learning_rate": 8.814356099136283e-07, "loss": 0.24910078942775726, "step": 2269, "token_acc": 0.9287592008412198 }, { "epoch": 0.47890295358649787, "grad_norm": 0.7578125, "learning_rate": 8.813262663516649e-07, "loss": 0.2932097017765045, "step": 2270, "token_acc": 0.9178123132098027 }, { "epoch": 0.4791139240506329, "grad_norm": 0.64453125, "learning_rate": 8.812168791818155e-07, "loss": 0.22607073187828064, "step": 2271, "token_acc": 0.9363039131676664 }, { "epoch": 0.47932489451476795, "grad_norm": 0.7421875, "learning_rate": 8.811074484165893e-07, "loss": 0.24255219101905823, "step": 2272, "token_acc": 0.9297346200241254 }, { "epoch": 0.47953586497890294, "grad_norm": 0.8046875, "learning_rate": 8.809979740685005e-07, "loss": 0.2513260245323181, "step": 2273, "token_acc": 0.9312481557981706 }, { "epoch": 0.479746835443038, "grad_norm": 0.74609375, "learning_rate": 8.808884561500689e-07, "loss": 0.29963961243629456, "step": 2274, "token_acc": 0.9203539823008849 }, { "epoch": 0.479957805907173, "grad_norm": 0.93359375, "learning_rate": 8.807788946738184e-07, "loss": 0.258513480424881, "step": 2275, "token_acc": 0.9333125584294173 }, { "epoch": 0.480168776371308, "grad_norm": 0.61328125, "learning_rate": 8.806692896522784e-07, "loss": 0.1824427843093872, "step": 2276, "token_acc": 0.9467048710601719 }, { "epoch": 0.48037974683544304, "grad_norm": 0.80078125, "learning_rate": 8.805596410979833e-07, "loss": 0.2728447914123535, "step": 2277, "token_acc": 0.9248696718797915 }, { "epoch": 0.4805907172995781, "grad_norm": 0.72265625, "learning_rate": 8.80449949023472e-07, "loss": 0.24283793568611145, "step": 2278, "token_acc": 0.9304976369196553 }, { "epoch": 0.48080168776371307, "grad_norm": 0.63671875, "learning_rate": 8.803402134412892e-07, "loss": 0.2337758094072342, "step": 2279, "token_acc": 0.9311657879320445 }, { "epoch": 0.4810126582278481, "grad_norm": 0.76171875, "learning_rate": 8.802304343639837e-07, "loss": 0.20924966037273407, "step": 2280, "token_acc": 0.9414361389052384 }, { "epoch": 0.4812236286919831, "grad_norm": 0.69140625, "learning_rate": 8.801206118041097e-07, "loss": 0.23609662055969238, "step": 2281, "token_acc": 0.9304615384615385 }, { "epoch": 0.48143459915611814, "grad_norm": 1.0234375, "learning_rate": 8.800107457742265e-07, "loss": 0.28985005617141724, "step": 2282, "token_acc": 0.9207389749702026 }, { "epoch": 0.4816455696202532, "grad_norm": 0.6875, "learning_rate": 8.79900836286898e-07, "loss": 0.23934713006019592, "step": 2283, "token_acc": 0.9289756649688738 }, { "epoch": 0.48185654008438816, "grad_norm": 0.76171875, "learning_rate": 8.797908833546937e-07, "loss": 0.28975826501846313, "step": 2284, "token_acc": 0.9243485570187727 }, { "epoch": 0.4820675105485232, "grad_norm": 0.66796875, "learning_rate": 8.796808869901871e-07, "loss": 0.2605797052383423, "step": 2285, "token_acc": 0.925 }, { "epoch": 0.48227848101265824, "grad_norm": 0.67578125, "learning_rate": 8.795708472059576e-07, "loss": 0.2401445508003235, "step": 2286, "token_acc": 0.9342144788224692 }, { "epoch": 0.48248945147679323, "grad_norm": 0.6796875, "learning_rate": 8.794607640145891e-07, "loss": 0.24231047928333282, "step": 2287, "token_acc": 0.9350692155570204 }, { "epoch": 0.48270042194092827, "grad_norm": 0.78125, "learning_rate": 8.793506374286703e-07, "loss": 0.26586514711380005, "step": 2288, "token_acc": 0.9301297983982325 }, { "epoch": 0.4829113924050633, "grad_norm": 0.625, "learning_rate": 8.792404674607955e-07, "loss": 0.24635982513427734, "step": 2289, "token_acc": 0.9306599832915622 }, { "epoch": 0.4831223628691983, "grad_norm": 0.765625, "learning_rate": 8.791302541235633e-07, "loss": 0.27919480204582214, "step": 2290, "token_acc": 0.9156874265569918 }, { "epoch": 0.48333333333333334, "grad_norm": 0.80859375, "learning_rate": 8.790199974295778e-07, "loss": 0.3042367398738861, "step": 2291, "token_acc": 0.9152186938286399 }, { "epoch": 0.4835443037974684, "grad_norm": 0.765625, "learning_rate": 8.789096973914474e-07, "loss": 0.2661081552505493, "step": 2292, "token_acc": 0.9249793899422918 }, { "epoch": 0.48375527426160336, "grad_norm": 0.63671875, "learning_rate": 8.787993540217859e-07, "loss": 0.2303192913532257, "step": 2293, "token_acc": 0.9358974358974359 }, { "epoch": 0.4839662447257384, "grad_norm": 0.671875, "learning_rate": 8.786889673332124e-07, "loss": 0.2582243084907532, "step": 2294, "token_acc": 0.934476085153442 }, { "epoch": 0.48417721518987344, "grad_norm": 0.6953125, "learning_rate": 8.785785373383502e-07, "loss": 0.25178927183151245, "step": 2295, "token_acc": 0.9333917616126205 }, { "epoch": 0.48438818565400843, "grad_norm": 0.79296875, "learning_rate": 8.784680640498281e-07, "loss": 0.2827037274837494, "step": 2296, "token_acc": 0.9202546998180715 }, { "epoch": 0.48459915611814347, "grad_norm": 0.65625, "learning_rate": 8.783575474802793e-07, "loss": 0.23185110092163086, "step": 2297, "token_acc": 0.9372384937238494 }, { "epoch": 0.48481012658227846, "grad_norm": 0.52734375, "learning_rate": 8.782469876423429e-07, "loss": 0.20491400361061096, "step": 2298, "token_acc": 0.9335389792484576 }, { "epoch": 0.4850210970464135, "grad_norm": 1.1328125, "learning_rate": 8.781363845486615e-07, "loss": 0.2390601634979248, "step": 2299, "token_acc": 0.9255986316989738 }, { "epoch": 0.48523206751054854, "grad_norm": 0.7109375, "learning_rate": 8.780257382118845e-07, "loss": 0.23099708557128906, "step": 2300, "token_acc": 0.9300173510699826 }, { "epoch": 0.4854430379746835, "grad_norm": 1.0703125, "learning_rate": 8.779150486446644e-07, "loss": 0.2245050072669983, "step": 2301, "token_acc": 0.9327354260089686 }, { "epoch": 0.48565400843881856, "grad_norm": 0.59765625, "learning_rate": 8.778043158596601e-07, "loss": 0.2470259666442871, "step": 2302, "token_acc": 0.9319262782401903 }, { "epoch": 0.4858649789029536, "grad_norm": 0.65234375, "learning_rate": 8.776935398695344e-07, "loss": 0.268807053565979, "step": 2303, "token_acc": 0.9265339074273412 }, { "epoch": 0.4860759493670886, "grad_norm": 0.6796875, "learning_rate": 8.775827206869559e-07, "loss": 0.222184956073761, "step": 2304, "token_acc": 0.9379329454142422 }, { "epoch": 0.48628691983122363, "grad_norm": 0.98046875, "learning_rate": 8.774718583245972e-07, "loss": 0.2492075115442276, "step": 2305, "token_acc": 0.9299120234604106 }, { "epoch": 0.48649789029535867, "grad_norm": 0.6953125, "learning_rate": 8.773609527951367e-07, "loss": 0.232702374458313, "step": 2306, "token_acc": 0.9354194407456724 }, { "epoch": 0.48670886075949366, "grad_norm": 0.609375, "learning_rate": 8.772500041112572e-07, "loss": 0.22076931595802307, "step": 2307, "token_acc": 0.9356040447046301 }, { "epoch": 0.4869198312236287, "grad_norm": 0.7265625, "learning_rate": 8.771390122856468e-07, "loss": 0.22544875741004944, "step": 2308, "token_acc": 0.9322946175637393 }, { "epoch": 0.48713080168776374, "grad_norm": 0.6015625, "learning_rate": 8.770279773309982e-07, "loss": 0.2299356460571289, "step": 2309, "token_acc": 0.9305478693967902 }, { "epoch": 0.4873417721518987, "grad_norm": 0.76171875, "learning_rate": 8.769168992600094e-07, "loss": 0.2857593894004822, "step": 2310, "token_acc": 0.9228187919463087 }, { "epoch": 0.48755274261603376, "grad_norm": 0.671875, "learning_rate": 8.768057780853827e-07, "loss": 0.2371712476015091, "step": 2311, "token_acc": 0.9325876662636033 }, { "epoch": 0.4877637130801688, "grad_norm": 0.734375, "learning_rate": 8.766946138198262e-07, "loss": 0.2902355194091797, "step": 2312, "token_acc": 0.9223057644110275 }, { "epoch": 0.4879746835443038, "grad_norm": 0.55078125, "learning_rate": 8.765834064760523e-07, "loss": 0.23535096645355225, "step": 2313, "token_acc": 0.9319221967963387 }, { "epoch": 0.48818565400843883, "grad_norm": 0.66796875, "learning_rate": 8.764721560667785e-07, "loss": 0.2871108651161194, "step": 2314, "token_acc": 0.9229943654413738 }, { "epoch": 0.4883966244725738, "grad_norm": 0.95703125, "learning_rate": 8.763608626047272e-07, "loss": 0.29208487272262573, "step": 2315, "token_acc": 0.9216575922565033 }, { "epoch": 0.48860759493670886, "grad_norm": 0.66015625, "learning_rate": 8.762495261026257e-07, "loss": 0.28344714641571045, "step": 2316, "token_acc": 0.9238329238329238 }, { "epoch": 0.4888185654008439, "grad_norm": 0.75, "learning_rate": 8.761381465732063e-07, "loss": 0.2757223844528198, "step": 2317, "token_acc": 0.9201085318058486 }, { "epoch": 0.4890295358649789, "grad_norm": 0.625, "learning_rate": 8.760267240292065e-07, "loss": 0.23700553178787231, "step": 2318, "token_acc": 0.9288378766140603 }, { "epoch": 0.4892405063291139, "grad_norm": 0.75, "learning_rate": 8.759152584833677e-07, "loss": 0.2396867424249649, "step": 2319, "token_acc": 0.926785176257909 }, { "epoch": 0.48945147679324896, "grad_norm": 0.7890625, "learning_rate": 8.758037499484378e-07, "loss": 0.2612053155899048, "step": 2320, "token_acc": 0.9263013698630137 }, { "epoch": 0.48966244725738395, "grad_norm": 0.703125, "learning_rate": 8.756921984371681e-07, "loss": 0.25060030817985535, "step": 2321, "token_acc": 0.9302117506710409 }, { "epoch": 0.489873417721519, "grad_norm": 0.5859375, "learning_rate": 8.755806039623157e-07, "loss": 0.23008044064044952, "step": 2322, "token_acc": 0.9371980676328503 }, { "epoch": 0.49008438818565403, "grad_norm": 0.55859375, "learning_rate": 8.754689665366424e-07, "loss": 0.21028254926204681, "step": 2323, "token_acc": 0.9384572542467279 }, { "epoch": 0.490295358649789, "grad_norm": 0.828125, "learning_rate": 8.753572861729149e-07, "loss": 0.29556161165237427, "step": 2324, "token_acc": 0.9231199329046688 }, { "epoch": 0.49050632911392406, "grad_norm": 0.74609375, "learning_rate": 8.752455628839046e-07, "loss": 0.23340703547000885, "step": 2325, "token_acc": 0.9351066939491377 }, { "epoch": 0.4907172995780591, "grad_norm": 1.9296875, "learning_rate": 8.751337966823881e-07, "loss": 0.27585211396217346, "step": 2326, "token_acc": 0.926048898279505 }, { "epoch": 0.4909282700421941, "grad_norm": 0.6875, "learning_rate": 8.75021987581147e-07, "loss": 0.32190290093421936, "step": 2327, "token_acc": 0.9153699153699154 }, { "epoch": 0.4911392405063291, "grad_norm": 0.828125, "learning_rate": 8.749101355929673e-07, "loss": 0.26779577136039734, "step": 2328, "token_acc": 0.9182468138016786 }, { "epoch": 0.4913502109704641, "grad_norm": 0.78515625, "learning_rate": 8.747982407306405e-07, "loss": 0.25948038697242737, "step": 2329, "token_acc": 0.9274143302180685 }, { "epoch": 0.49156118143459915, "grad_norm": 0.99609375, "learning_rate": 8.746863030069625e-07, "loss": 0.24680930376052856, "step": 2330, "token_acc": 0.9244917715392061 }, { "epoch": 0.4917721518987342, "grad_norm": 0.82421875, "learning_rate": 8.745743224347346e-07, "loss": 0.284185528755188, "step": 2331, "token_acc": 0.9154581919160752 }, { "epoch": 0.4919831223628692, "grad_norm": 0.67578125, "learning_rate": 8.744622990267624e-07, "loss": 0.2825831174850464, "step": 2332, "token_acc": 0.9271392262072861 }, { "epoch": 0.4921940928270042, "grad_norm": 0.81640625, "learning_rate": 8.74350232795857e-07, "loss": 0.29751309752464294, "step": 2333, "token_acc": 0.9175055928411633 }, { "epoch": 0.49240506329113926, "grad_norm": 0.85546875, "learning_rate": 8.742381237548339e-07, "loss": 0.3089081645011902, "step": 2334, "token_acc": 0.9193789190803224 }, { "epoch": 0.49261603375527424, "grad_norm": 0.59375, "learning_rate": 8.741259719165137e-07, "loss": 0.24617190659046173, "step": 2335, "token_acc": 0.9312602291325696 }, { "epoch": 0.4928270042194093, "grad_norm": 0.65234375, "learning_rate": 8.740137772937224e-07, "loss": 0.2492576241493225, "step": 2336, "token_acc": 0.9289893617021276 }, { "epoch": 0.4930379746835443, "grad_norm": 0.78515625, "learning_rate": 8.739015398992897e-07, "loss": 0.24594742059707642, "step": 2337, "token_acc": 0.9242092076417162 }, { "epoch": 0.4932489451476793, "grad_norm": 0.78125, "learning_rate": 8.737892597460515e-07, "loss": 0.2689642310142517, "step": 2338, "token_acc": 0.9282831418814033 }, { "epoch": 0.49345991561181435, "grad_norm": 0.6328125, "learning_rate": 8.736769368468475e-07, "loss": 0.2513454556465149, "step": 2339, "token_acc": 0.930279458369346 }, { "epoch": 0.4936708860759494, "grad_norm": 0.71875, "learning_rate": 8.735645712145231e-07, "loss": 0.31461864709854126, "step": 2340, "token_acc": 0.9145348837209303 }, { "epoch": 0.4938818565400844, "grad_norm": 0.9765625, "learning_rate": 8.73452162861928e-07, "loss": 0.2764248847961426, "step": 2341, "token_acc": 0.9247737556561086 }, { "epoch": 0.4940928270042194, "grad_norm": 0.66015625, "learning_rate": 8.733397118019175e-07, "loss": 0.22401633858680725, "step": 2342, "token_acc": 0.9372011475932419 }, { "epoch": 0.49430379746835446, "grad_norm": 0.75390625, "learning_rate": 8.732272180473507e-07, "loss": 0.27256378531455994, "step": 2343, "token_acc": 0.9262246117084827 }, { "epoch": 0.49451476793248944, "grad_norm": 0.81640625, "learning_rate": 8.731146816110928e-07, "loss": 0.2438003271818161, "step": 2344, "token_acc": 0.9315403422982885 }, { "epoch": 0.4947257383966245, "grad_norm": 0.92578125, "learning_rate": 8.73002102506013e-07, "loss": 0.30492281913757324, "step": 2345, "token_acc": 0.9179487179487179 }, { "epoch": 0.49493670886075947, "grad_norm": 0.8671875, "learning_rate": 8.728894807449856e-07, "loss": 0.25407928228378296, "step": 2346, "token_acc": 0.9272042673360528 }, { "epoch": 0.4951476793248945, "grad_norm": 0.73046875, "learning_rate": 8.727768163408902e-07, "loss": 0.21123304963111877, "step": 2347, "token_acc": 0.9420376456528234 }, { "epoch": 0.49535864978902955, "grad_norm": 0.6875, "learning_rate": 8.726641093066105e-07, "loss": 0.2497667670249939, "step": 2348, "token_acc": 0.9323024054982818 }, { "epoch": 0.49556962025316453, "grad_norm": 0.73828125, "learning_rate": 8.725513596550356e-07, "loss": 0.2544287443161011, "step": 2349, "token_acc": 0.9243822566239952 }, { "epoch": 0.4957805907172996, "grad_norm": 0.6796875, "learning_rate": 8.724385673990597e-07, "loss": 0.27138179540634155, "step": 2350, "token_acc": 0.9246704331450094 }, { "epoch": 0.4959915611814346, "grad_norm": 0.87890625, "learning_rate": 8.723257325515811e-07, "loss": 0.2654998302459717, "step": 2351, "token_acc": 0.9276139410187667 }, { "epoch": 0.4962025316455696, "grad_norm": 0.7265625, "learning_rate": 8.72212855125504e-07, "loss": 0.2335270345211029, "step": 2352, "token_acc": 0.9358974358974359 }, { "epoch": 0.49641350210970464, "grad_norm": 0.7265625, "learning_rate": 8.720999351337362e-07, "loss": 0.27236318588256836, "step": 2353, "token_acc": 0.9270152505446623 }, { "epoch": 0.4966244725738397, "grad_norm": 0.70703125, "learning_rate": 8.719869725891915e-07, "loss": 0.2116774022579193, "step": 2354, "token_acc": 0.9348612786489746 }, { "epoch": 0.49683544303797467, "grad_norm": 0.6796875, "learning_rate": 8.71873967504788e-07, "loss": 0.228180930018425, "step": 2355, "token_acc": 0.9303468997541655 }, { "epoch": 0.4970464135021097, "grad_norm": 0.6640625, "learning_rate": 8.717609198934488e-07, "loss": 0.32046496868133545, "step": 2356, "token_acc": 0.9176435226931169 }, { "epoch": 0.49725738396624475, "grad_norm": 0.66796875, "learning_rate": 8.716478297681018e-07, "loss": 0.2344343364238739, "step": 2357, "token_acc": 0.9327779373743177 }, { "epoch": 0.49746835443037973, "grad_norm": 0.6953125, "learning_rate": 8.715346971416799e-07, "loss": 0.2466604858636856, "step": 2358, "token_acc": 0.9306389950846532 }, { "epoch": 0.4976793248945148, "grad_norm": 0.64453125, "learning_rate": 8.714215220271208e-07, "loss": 0.2530885934829712, "step": 2359, "token_acc": 0.9289647577092511 }, { "epoch": 0.4978902953586498, "grad_norm": 0.75390625, "learning_rate": 8.71308304437367e-07, "loss": 0.27250760793685913, "step": 2360, "token_acc": 0.9288888888888889 }, { "epoch": 0.4981012658227848, "grad_norm": 0.828125, "learning_rate": 8.711950443853657e-07, "loss": 0.277432918548584, "step": 2361, "token_acc": 0.9221892515661062 }, { "epoch": 0.49831223628691984, "grad_norm": 0.77734375, "learning_rate": 8.710817418840694e-07, "loss": 0.28303825855255127, "step": 2362, "token_acc": 0.9182089552238806 }, { "epoch": 0.4985232067510548, "grad_norm": 0.7890625, "learning_rate": 8.709683969464352e-07, "loss": 0.2992926239967346, "step": 2363, "token_acc": 0.9152383920224368 }, { "epoch": 0.49873417721518987, "grad_norm": 0.671875, "learning_rate": 8.708550095854248e-07, "loss": 0.2452237904071808, "step": 2364, "token_acc": 0.9259796806966618 }, { "epoch": 0.4989451476793249, "grad_norm": 0.7734375, "learning_rate": 8.707415798140052e-07, "loss": 0.2567756175994873, "step": 2365, "token_acc": 0.9297395517867959 }, { "epoch": 0.4991561181434599, "grad_norm": 0.72265625, "learning_rate": 8.70628107645148e-07, "loss": 0.231769859790802, "step": 2366, "token_acc": 0.9313508383422967 }, { "epoch": 0.49936708860759493, "grad_norm": 0.81640625, "learning_rate": 8.705145930918299e-07, "loss": 0.23743245005607605, "step": 2367, "token_acc": 0.9265389082462253 }, { "epoch": 0.49957805907173, "grad_norm": 0.82421875, "learning_rate": 8.704010361670317e-07, "loss": 0.2503661513328552, "step": 2368, "token_acc": 0.9290034183539311 }, { "epoch": 0.49978902953586496, "grad_norm": 0.78125, "learning_rate": 8.702874368837403e-07, "loss": 0.287828266620636, "step": 2369, "token_acc": 0.9235955056179775 }, { "epoch": 0.5, "grad_norm": 1.0234375, "learning_rate": 8.701737952549464e-07, "loss": 0.2757514715194702, "step": 2370, "token_acc": 0.9259592326139089 }, { "epoch": 0.500210970464135, "grad_norm": 0.59375, "learning_rate": 8.700601112936457e-07, "loss": 0.21430604159832, "step": 2371, "token_acc": 0.9361513449600946 }, { "epoch": 0.5004219409282701, "grad_norm": 0.66796875, "learning_rate": 8.699463850128393e-07, "loss": 0.2605946660041809, "step": 2372, "token_acc": 0.9278903456495828 }, { "epoch": 0.5006329113924051, "grad_norm": 0.734375, "learning_rate": 8.698326164255323e-07, "loss": 0.25237998366355896, "step": 2373, "token_acc": 0.927791771620487 }, { "epoch": 0.50084388185654, "grad_norm": 0.60546875, "learning_rate": 8.697188055447355e-07, "loss": 0.2035323679447174, "step": 2374, "token_acc": 0.9390547263681592 }, { "epoch": 0.5010548523206751, "grad_norm": 0.7421875, "learning_rate": 8.696049523834641e-07, "loss": 0.2428814321756363, "step": 2375, "token_acc": 0.9266409266409267 }, { "epoch": 0.5012658227848101, "grad_norm": 0.6796875, "learning_rate": 8.69491056954738e-07, "loss": 0.2293740063905716, "step": 2376, "token_acc": 0.9345679012345679 }, { "epoch": 0.5014767932489451, "grad_norm": 0.7421875, "learning_rate": 8.693771192715823e-07, "loss": 0.25546467304229736, "step": 2377, "token_acc": 0.9275521405049396 }, { "epoch": 0.5016877637130802, "grad_norm": 0.6640625, "learning_rate": 8.692631393470265e-07, "loss": 0.2149960696697235, "step": 2378, "token_acc": 0.934860612751863 }, { "epoch": 0.5018987341772152, "grad_norm": 0.71484375, "learning_rate": 8.691491171941055e-07, "loss": 0.28372305631637573, "step": 2379, "token_acc": 0.9209328782707622 }, { "epoch": 0.5021097046413502, "grad_norm": 0.66015625, "learning_rate": 8.690350528258582e-07, "loss": 0.2718750834465027, "step": 2380, "token_acc": 0.9235747303543914 }, { "epoch": 0.5023206751054853, "grad_norm": 0.6328125, "learning_rate": 8.689209462553293e-07, "loss": 0.22622206807136536, "step": 2381, "token_acc": 0.9394208037825059 }, { "epoch": 0.5025316455696203, "grad_norm": 0.65625, "learning_rate": 8.688067974955677e-07, "loss": 0.21774986386299133, "step": 2382, "token_acc": 0.9371376258773269 }, { "epoch": 0.5027426160337553, "grad_norm": 0.6328125, "learning_rate": 8.686926065596272e-07, "loss": 0.2107391059398651, "step": 2383, "token_acc": 0.9379776601998824 }, { "epoch": 0.5029535864978903, "grad_norm": 0.70703125, "learning_rate": 8.685783734605665e-07, "loss": 0.2711818814277649, "step": 2384, "token_acc": 0.923418095801301 }, { "epoch": 0.5031645569620253, "grad_norm": 0.59375, "learning_rate": 8.684640982114492e-07, "loss": 0.26013168692588806, "step": 2385, "token_acc": 0.9312517903179605 }, { "epoch": 0.5033755274261603, "grad_norm": 0.859375, "learning_rate": 8.683497808253436e-07, "loss": 0.28625619411468506, "step": 2386, "token_acc": 0.9227532311391644 }, { "epoch": 0.5035864978902953, "grad_norm": 0.6953125, "learning_rate": 8.682354213153229e-07, "loss": 0.21498313546180725, "step": 2387, "token_acc": 0.9360135900339751 }, { "epoch": 0.5037974683544304, "grad_norm": 0.8515625, "learning_rate": 8.681210196944651e-07, "loss": 0.25864896178245544, "step": 2388, "token_acc": 0.926822241428983 }, { "epoch": 0.5040084388185654, "grad_norm": 0.6171875, "learning_rate": 8.680065759758529e-07, "loss": 0.250183641910553, "step": 2389, "token_acc": 0.9227444624527282 }, { "epoch": 0.5042194092827004, "grad_norm": 0.765625, "learning_rate": 8.678920901725739e-07, "loss": 0.2598094642162323, "step": 2390, "token_acc": 0.930056087099967 }, { "epoch": 0.5044303797468355, "grad_norm": 0.703125, "learning_rate": 8.677775622977205e-07, "loss": 0.23525072634220123, "step": 2391, "token_acc": 0.9293624663274469 }, { "epoch": 0.5046413502109705, "grad_norm": 0.8046875, "learning_rate": 8.676629923643904e-07, "loss": 0.2582184672355652, "step": 2392, "token_acc": 0.9281635106702735 }, { "epoch": 0.5048523206751054, "grad_norm": 0.796875, "learning_rate": 8.675483803856849e-07, "loss": 0.28031355142593384, "step": 2393, "token_acc": 0.9242819843342036 }, { "epoch": 0.5050632911392405, "grad_norm": 0.625, "learning_rate": 8.674337263747114e-07, "loss": 0.2500080466270447, "step": 2394, "token_acc": 0.9338278931750742 }, { "epoch": 0.5052742616033755, "grad_norm": 0.859375, "learning_rate": 8.673190303445811e-07, "loss": 0.2783668637275696, "step": 2395, "token_acc": 0.9237334801762115 }, { "epoch": 0.5054852320675105, "grad_norm": 0.9453125, "learning_rate": 8.672042923084109e-07, "loss": 0.20730604231357574, "step": 2396, "token_acc": 0.937239738251041 }, { "epoch": 0.5056962025316456, "grad_norm": 1.1171875, "learning_rate": 8.670895122793218e-07, "loss": 0.28636008501052856, "step": 2397, "token_acc": 0.9249468246733515 }, { "epoch": 0.5059071729957806, "grad_norm": 0.82421875, "learning_rate": 8.6697469027044e-07, "loss": 0.2764488458633423, "step": 2398, "token_acc": 0.9267166762839008 }, { "epoch": 0.5061181434599156, "grad_norm": 0.7890625, "learning_rate": 8.668598262948963e-07, "loss": 0.2795909643173218, "step": 2399, "token_acc": 0.9227871939736346 }, { "epoch": 0.5063291139240507, "grad_norm": 0.890625, "learning_rate": 8.667449203658263e-07, "loss": 0.3210592269897461, "step": 2400, "token_acc": 0.9167391935015956 }, { "epoch": 0.5063291139240507, "eval_loss": 0.433657705783844, "eval_runtime": 245.6817, "eval_samples_per_second": 137.19, "eval_steps_per_second": 2.145, "eval_token_acc": 0.8991384725836848, "step": 2400 }, { "epoch": 0.5065400843881857, "grad_norm": 0.6796875, "learning_rate": 8.666299724963708e-07, "loss": 0.2220650166273117, "step": 2401, "token_acc": 0.933641975308642 }, { "epoch": 0.5067510548523206, "grad_norm": 1.0625, "learning_rate": 8.665149826996746e-07, "loss": 0.27462315559387207, "step": 2402, "token_acc": 0.9295261239368166 }, { "epoch": 0.5069620253164557, "grad_norm": 0.74609375, "learning_rate": 8.663999509888881e-07, "loss": 0.26826533675193787, "step": 2403, "token_acc": 0.925561797752809 }, { "epoch": 0.5071729957805907, "grad_norm": 0.78125, "learning_rate": 8.662848773771658e-07, "loss": 0.26744621992111206, "step": 2404, "token_acc": 0.925282098200671 }, { "epoch": 0.5073839662447257, "grad_norm": 0.66796875, "learning_rate": 8.661697618776678e-07, "loss": 0.2565361559391022, "step": 2405, "token_acc": 0.9211365613221224 }, { "epoch": 0.5075949367088608, "grad_norm": 0.83203125, "learning_rate": 8.660546045035582e-07, "loss": 0.32611238956451416, "step": 2406, "token_acc": 0.9123367972288836 }, { "epoch": 0.5078059071729958, "grad_norm": 0.65234375, "learning_rate": 8.659394052680064e-07, "loss": 0.2602841258049011, "step": 2407, "token_acc": 0.9248440158820193 }, { "epoch": 0.5080168776371308, "grad_norm": 0.6875, "learning_rate": 8.658241641841862e-07, "loss": 0.2297843098640442, "step": 2408, "token_acc": 0.9332753118653901 }, { "epoch": 0.5082278481012659, "grad_norm": 0.6484375, "learning_rate": 8.657088812652765e-07, "loss": 0.2420978844165802, "step": 2409, "token_acc": 0.9344581440622972 }, { "epoch": 0.5084388185654009, "grad_norm": 0.77734375, "learning_rate": 8.655935565244609e-07, "loss": 0.2358092963695526, "step": 2410, "token_acc": 0.9337368588722523 }, { "epoch": 0.5086497890295358, "grad_norm": 0.71484375, "learning_rate": 8.654781899749279e-07, "loss": 0.26209738850593567, "step": 2411, "token_acc": 0.9253508510002986 }, { "epoch": 0.5088607594936709, "grad_norm": 0.76171875, "learning_rate": 8.653627816298703e-07, "loss": 0.23714405298233032, "step": 2412, "token_acc": 0.9315883402736467 }, { "epoch": 0.5090717299578059, "grad_norm": 0.71875, "learning_rate": 8.652473315024864e-07, "loss": 0.2499234825372696, "step": 2413, "token_acc": 0.9284521158129176 }, { "epoch": 0.5092827004219409, "grad_norm": 0.796875, "learning_rate": 8.651318396059786e-07, "loss": 0.26731836795806885, "step": 2414, "token_acc": 0.9226031065881093 }, { "epoch": 0.509493670886076, "grad_norm": 0.73828125, "learning_rate": 8.650163059535545e-07, "loss": 0.2651350498199463, "step": 2415, "token_acc": 0.9303944315545244 }, { "epoch": 0.509704641350211, "grad_norm": 0.68359375, "learning_rate": 8.649007305584263e-07, "loss": 0.2561206817626953, "step": 2416, "token_acc": 0.9255349500713267 }, { "epoch": 0.509915611814346, "grad_norm": 0.8046875, "learning_rate": 8.647851134338113e-07, "loss": 0.25718846917152405, "step": 2417, "token_acc": 0.9216283577956245 }, { "epoch": 0.5101265822784811, "grad_norm": 0.69921875, "learning_rate": 8.646694545929307e-07, "loss": 0.2495173066854477, "step": 2418, "token_acc": 0.9315693430656934 }, { "epoch": 0.510337552742616, "grad_norm": 0.78125, "learning_rate": 8.645537540490118e-07, "loss": 0.24702274799346924, "step": 2419, "token_acc": 0.9299302473050095 }, { "epoch": 0.510548523206751, "grad_norm": 1.28125, "learning_rate": 8.644380118152853e-07, "loss": 0.2602848410606384, "step": 2420, "token_acc": 0.925796178343949 }, { "epoch": 0.510759493670886, "grad_norm": 0.90234375, "learning_rate": 8.643222279049877e-07, "loss": 0.29092293977737427, "step": 2421, "token_acc": 0.9222860598729967 }, { "epoch": 0.5109704641350211, "grad_norm": 0.92578125, "learning_rate": 8.642064023313597e-07, "loss": 0.2597566843032837, "step": 2422, "token_acc": 0.9293624663274469 }, { "epoch": 0.5111814345991561, "grad_norm": 2.46875, "learning_rate": 8.640905351076469e-07, "loss": 0.2636124789714813, "step": 2423, "token_acc": 0.9239401496259352 }, { "epoch": 0.5113924050632911, "grad_norm": 0.83984375, "learning_rate": 8.639746262470998e-07, "loss": 0.27759090065956116, "step": 2424, "token_acc": 0.9255352156376048 }, { "epoch": 0.5116033755274262, "grad_norm": 0.671875, "learning_rate": 8.638586757629735e-07, "loss": 0.22381386160850525, "step": 2425, "token_acc": 0.9398471252907943 }, { "epoch": 0.5118143459915612, "grad_norm": 0.7578125, "learning_rate": 8.637426836685282e-07, "loss": 0.20817376673221588, "step": 2426, "token_acc": 0.9425357251444207 }, { "epoch": 0.5120253164556962, "grad_norm": 0.79296875, "learning_rate": 8.63626649977028e-07, "loss": 0.2539479732513428, "step": 2427, "token_acc": 0.9304040106163374 }, { "epoch": 0.5122362869198313, "grad_norm": 0.80078125, "learning_rate": 8.63510574701743e-07, "loss": 0.2605137228965759, "step": 2428, "token_acc": 0.9265553869499241 }, { "epoch": 0.5124472573839662, "grad_norm": 0.6796875, "learning_rate": 8.633944578559467e-07, "loss": 0.2617969512939453, "step": 2429, "token_acc": 0.9281914893617021 }, { "epoch": 0.5126582278481012, "grad_norm": 0.6484375, "learning_rate": 8.632782994529186e-07, "loss": 0.2611042261123657, "step": 2430, "token_acc": 0.9250905040378724 }, { "epoch": 0.5128691983122363, "grad_norm": 0.73046875, "learning_rate": 8.631620995059421e-07, "loss": 0.2714442014694214, "step": 2431, "token_acc": 0.9170662905500705 }, { "epoch": 0.5130801687763713, "grad_norm": 0.59375, "learning_rate": 8.63045858028306e-07, "loss": 0.23238138854503632, "step": 2432, "token_acc": 0.9353836595215905 }, { "epoch": 0.5132911392405063, "grad_norm": 0.77734375, "learning_rate": 8.62929575033303e-07, "loss": 0.2850421369075775, "step": 2433, "token_acc": 0.9262738400227726 }, { "epoch": 0.5135021097046414, "grad_norm": 0.6640625, "learning_rate": 8.628132505342313e-07, "loss": 0.23337498307228088, "step": 2434, "token_acc": 0.9331369661266569 }, { "epoch": 0.5137130801687764, "grad_norm": 0.79296875, "learning_rate": 8.626968845443936e-07, "loss": 0.29799211025238037, "step": 2435, "token_acc": 0.9259772664263931 }, { "epoch": 0.5139240506329114, "grad_norm": 0.8515625, "learning_rate": 8.625804770770973e-07, "loss": 0.3144356310367584, "step": 2436, "token_acc": 0.915684496826836 }, { "epoch": 0.5141350210970465, "grad_norm": 0.6953125, "learning_rate": 8.624640281456546e-07, "loss": 0.2649328112602234, "step": 2437, "token_acc": 0.9269420468557337 }, { "epoch": 0.5143459915611814, "grad_norm": 0.6640625, "learning_rate": 8.623475377633825e-07, "loss": 0.23344099521636963, "step": 2438, "token_acc": 0.9327119137780367 }, { "epoch": 0.5145569620253164, "grad_norm": 0.765625, "learning_rate": 8.622310059436024e-07, "loss": 0.238409623503685, "step": 2439, "token_acc": 0.9295981226166031 }, { "epoch": 0.5147679324894515, "grad_norm": 0.70703125, "learning_rate": 8.621144326996409e-07, "loss": 0.2880132794380188, "step": 2440, "token_acc": 0.9211908931698775 }, { "epoch": 0.5149789029535865, "grad_norm": 0.6953125, "learning_rate": 8.619978180448292e-07, "loss": 0.2760353684425354, "step": 2441, "token_acc": 0.9227799227799228 }, { "epoch": 0.5151898734177215, "grad_norm": 0.703125, "learning_rate": 8.61881161992503e-07, "loss": 0.22869959473609924, "step": 2442, "token_acc": 0.933037037037037 }, { "epoch": 0.5154008438818566, "grad_norm": 0.86328125, "learning_rate": 8.617644645560031e-07, "loss": 0.2857171595096588, "step": 2443, "token_acc": 0.9207523897625656 }, { "epoch": 0.5156118143459916, "grad_norm": 0.75390625, "learning_rate": 8.616477257486747e-07, "loss": 0.238547220826149, "step": 2444, "token_acc": 0.932398316970547 }, { "epoch": 0.5158227848101266, "grad_norm": 0.7734375, "learning_rate": 8.615309455838677e-07, "loss": 0.2387697994709015, "step": 2445, "token_acc": 0.9356814701378254 }, { "epoch": 0.5160337552742617, "grad_norm": 0.87890625, "learning_rate": 8.614141240749373e-07, "loss": 0.2545635402202606, "step": 2446, "token_acc": 0.9312295325989878 }, { "epoch": 0.5162447257383966, "grad_norm": 0.6953125, "learning_rate": 8.612972612352428e-07, "loss": 0.27836939692497253, "step": 2447, "token_acc": 0.9207174421627242 }, { "epoch": 0.5164556962025316, "grad_norm": 0.69140625, "learning_rate": 8.611803570781486e-07, "loss": 0.2463090419769287, "step": 2448, "token_acc": 0.9342779620138679 }, { "epoch": 0.5166666666666667, "grad_norm": 0.69140625, "learning_rate": 8.610634116170234e-07, "loss": 0.2754680812358856, "step": 2449, "token_acc": 0.9263062518876473 }, { "epoch": 0.5168776371308017, "grad_norm": 1.7109375, "learning_rate": 8.609464248652411e-07, "loss": 0.2973451614379883, "step": 2450, "token_acc": 0.9169278996865203 }, { "epoch": 0.5170886075949367, "grad_norm": 0.6796875, "learning_rate": 8.6082939683618e-07, "loss": 0.2534092664718628, "step": 2451, "token_acc": 0.9319882850634559 }, { "epoch": 0.5172995780590718, "grad_norm": 0.703125, "learning_rate": 8.607123275432235e-07, "loss": 0.24445638060569763, "step": 2452, "token_acc": 0.9296745725317154 }, { "epoch": 0.5175105485232068, "grad_norm": 0.73828125, "learning_rate": 8.605952169997592e-07, "loss": 0.3004859685897827, "step": 2453, "token_acc": 0.917799939558779 }, { "epoch": 0.5177215189873418, "grad_norm": 0.671875, "learning_rate": 8.604780652191798e-07, "loss": 0.23142023384571075, "step": 2454, "token_acc": 0.9372359686179843 }, { "epoch": 0.5179324894514767, "grad_norm": 0.6953125, "learning_rate": 8.603608722148826e-07, "loss": 0.26562827825546265, "step": 2455, "token_acc": 0.9286120591581343 }, { "epoch": 0.5181434599156118, "grad_norm": 0.796875, "learning_rate": 8.602436380002695e-07, "loss": 0.26223617792129517, "step": 2456, "token_acc": 0.9266216612855885 }, { "epoch": 0.5183544303797468, "grad_norm": 0.73828125, "learning_rate": 8.601263625887475e-07, "loss": 0.3517577052116394, "step": 2457, "token_acc": 0.9124957439564181 }, { "epoch": 0.5185654008438818, "grad_norm": 0.6328125, "learning_rate": 8.600090459937277e-07, "loss": 0.21878892183303833, "step": 2458, "token_acc": 0.9389159736162891 }, { "epoch": 0.5187763713080169, "grad_norm": 0.77734375, "learning_rate": 8.598916882286264e-07, "loss": 0.22228926420211792, "step": 2459, "token_acc": 0.9371108343711083 }, { "epoch": 0.5189873417721519, "grad_norm": 0.6796875, "learning_rate": 8.597742893068647e-07, "loss": 0.26276683807373047, "step": 2460, "token_acc": 0.9265671641791045 }, { "epoch": 0.5191983122362869, "grad_norm": 0.62890625, "learning_rate": 8.596568492418677e-07, "loss": 0.22646142542362213, "step": 2461, "token_acc": 0.936 }, { "epoch": 0.519409282700422, "grad_norm": 0.8046875, "learning_rate": 8.595393680470659e-07, "loss": 0.306907594203949, "step": 2462, "token_acc": 0.9214351995051037 }, { "epoch": 0.519620253164557, "grad_norm": 0.83203125, "learning_rate": 8.594218457358942e-07, "loss": 0.2099585235118866, "step": 2463, "token_acc": 0.9369774919614148 }, { "epoch": 0.5198312236286919, "grad_norm": 0.75, "learning_rate": 8.593042823217923e-07, "loss": 0.2557579576969147, "step": 2464, "token_acc": 0.9266648692369911 }, { "epoch": 0.520042194092827, "grad_norm": 0.78125, "learning_rate": 8.591866778182046e-07, "loss": 0.2521863877773285, "step": 2465, "token_acc": 0.9329146445903511 }, { "epoch": 0.520253164556962, "grad_norm": 0.66796875, "learning_rate": 8.590690322385803e-07, "loss": 0.25757896900177, "step": 2466, "token_acc": 0.9229888348124821 }, { "epoch": 0.520464135021097, "grad_norm": 0.83203125, "learning_rate": 8.589513455963729e-07, "loss": 0.3220735192298889, "step": 2467, "token_acc": 0.917519566526189 }, { "epoch": 0.5206751054852321, "grad_norm": 0.6171875, "learning_rate": 8.588336179050411e-07, "loss": 0.22446265816688538, "step": 2468, "token_acc": 0.9407806191117093 }, { "epoch": 0.5208860759493671, "grad_norm": 0.81640625, "learning_rate": 8.587158491780477e-07, "loss": 0.23208804428577423, "step": 2469, "token_acc": 0.9324651726226529 }, { "epoch": 0.5210970464135021, "grad_norm": 0.76171875, "learning_rate": 8.585980394288612e-07, "loss": 0.24118682742118835, "step": 2470, "token_acc": 0.9320495185694635 }, { "epoch": 0.5213080168776372, "grad_norm": 0.71875, "learning_rate": 8.584801886709534e-07, "loss": 0.24908380210399628, "step": 2471, "token_acc": 0.9314145744029394 }, { "epoch": 0.5215189873417722, "grad_norm": 0.765625, "learning_rate": 8.58362296917802e-07, "loss": 0.29010510444641113, "step": 2472, "token_acc": 0.9234247814290021 }, { "epoch": 0.5217299578059071, "grad_norm": 0.6484375, "learning_rate": 8.582443641828888e-07, "loss": 0.2637540400028229, "step": 2473, "token_acc": 0.9303721488595438 }, { "epoch": 0.5219409282700422, "grad_norm": 0.82421875, "learning_rate": 8.581263904797004e-07, "loss": 0.24447697401046753, "step": 2474, "token_acc": 0.9357456881974975 }, { "epoch": 0.5221518987341772, "grad_norm": 0.66796875, "learning_rate": 8.58008375821728e-07, "loss": 0.2402324229478836, "step": 2475, "token_acc": 0.9314000623635796 }, { "epoch": 0.5223628691983122, "grad_norm": 0.6953125, "learning_rate": 8.578903202224679e-07, "loss": 0.24342304468154907, "step": 2476, "token_acc": 0.9342498505678422 }, { "epoch": 0.5225738396624473, "grad_norm": 0.59375, "learning_rate": 8.577722236954202e-07, "loss": 0.2419014871120453, "step": 2477, "token_acc": 0.9394124847001224 }, { "epoch": 0.5227848101265823, "grad_norm": 0.65625, "learning_rate": 8.576540862540908e-07, "loss": 0.22893425822257996, "step": 2478, "token_acc": 0.9395846444304594 }, { "epoch": 0.5229957805907173, "grad_norm": 0.7890625, "learning_rate": 8.575359079119893e-07, "loss": 0.28049004077911377, "step": 2479, "token_acc": 0.9243485570187727 }, { "epoch": 0.5232067510548524, "grad_norm": 0.84765625, "learning_rate": 8.574176886826308e-07, "loss": 0.24520409107208252, "step": 2480, "token_acc": 0.9287897310513448 }, { "epoch": 0.5234177215189874, "grad_norm": 1.109375, "learning_rate": 8.572994285795343e-07, "loss": 0.27223461866378784, "step": 2481, "token_acc": 0.9214720370906984 }, { "epoch": 0.5236286919831223, "grad_norm": 1.3828125, "learning_rate": 8.57181127616224e-07, "loss": 0.24474994838237762, "step": 2482, "token_acc": 0.9342764515178308 }, { "epoch": 0.5238396624472574, "grad_norm": 0.859375, "learning_rate": 8.570627858062286e-07, "loss": 0.2651122808456421, "step": 2483, "token_acc": 0.9313022700119474 }, { "epoch": 0.5240506329113924, "grad_norm": 0.7109375, "learning_rate": 8.569444031630815e-07, "loss": 0.25621944665908813, "step": 2484, "token_acc": 0.9315233785822021 }, { "epoch": 0.5242616033755274, "grad_norm": 0.66015625, "learning_rate": 8.568259797003207e-07, "loss": 0.25606659054756165, "step": 2485, "token_acc": 0.933968058968059 }, { "epoch": 0.5244725738396624, "grad_norm": 0.83203125, "learning_rate": 8.567075154314889e-07, "loss": 0.2756558656692505, "step": 2486, "token_acc": 0.9290305991257393 }, { "epoch": 0.5246835443037975, "grad_norm": 0.71484375, "learning_rate": 8.565890103701337e-07, "loss": 0.22563622891902924, "step": 2487, "token_acc": 0.9330232558139535 }, { "epoch": 0.5248945147679325, "grad_norm": 0.7421875, "learning_rate": 8.564704645298071e-07, "loss": 0.30218616127967834, "step": 2488, "token_acc": 0.9169787765293383 }, { "epoch": 0.5251054852320675, "grad_norm": 0.703125, "learning_rate": 8.563518779240655e-07, "loss": 0.2556324005126953, "step": 2489, "token_acc": 0.923202614379085 }, { "epoch": 0.5253164556962026, "grad_norm": 1.2265625, "learning_rate": 8.562332505664706e-07, "loss": 0.2872592806816101, "step": 2490, "token_acc": 0.9206865936667653 }, { "epoch": 0.5255274261603375, "grad_norm": 0.640625, "learning_rate": 8.561145824705884e-07, "loss": 0.24566414952278137, "step": 2491, "token_acc": 0.9289340101522843 }, { "epoch": 0.5257383966244725, "grad_norm": 1.015625, "learning_rate": 8.559958736499897e-07, "loss": 0.2547341287136078, "step": 2492, "token_acc": 0.9276683328432814 }, { "epoch": 0.5259493670886076, "grad_norm": 0.765625, "learning_rate": 8.558771241182497e-07, "loss": 0.26815658807754517, "step": 2493, "token_acc": 0.9291285181417429 }, { "epoch": 0.5261603375527426, "grad_norm": 0.62109375, "learning_rate": 8.557583338889482e-07, "loss": 0.21880872547626495, "step": 2494, "token_acc": 0.9383561643835616 }, { "epoch": 0.5263713080168776, "grad_norm": 0.7109375, "learning_rate": 8.556395029756702e-07, "loss": 0.2580570876598358, "step": 2495, "token_acc": 0.9271220348671049 }, { "epoch": 0.5265822784810127, "grad_norm": 0.81640625, "learning_rate": 8.555206313920049e-07, "loss": 0.2814309000968933, "step": 2496, "token_acc": 0.9171319424037614 }, { "epoch": 0.5267932489451477, "grad_norm": 0.68359375, "learning_rate": 8.554017191515465e-07, "loss": 0.3364434838294983, "step": 2497, "token_acc": 0.9142607174103237 }, { "epoch": 0.5270042194092827, "grad_norm": 0.61328125, "learning_rate": 8.552827662678932e-07, "loss": 0.2199023813009262, "step": 2498, "token_acc": 0.9355213476619227 }, { "epoch": 0.5272151898734178, "grad_norm": 0.671875, "learning_rate": 8.551637727546486e-07, "loss": 0.24434736371040344, "step": 2499, "token_acc": 0.9320872274143303 }, { "epoch": 0.5274261603375527, "grad_norm": 0.609375, "learning_rate": 8.550447386254205e-07, "loss": 0.23291081190109253, "step": 2500, "token_acc": 0.934733893557423 }, { "epoch": 0.5276371308016877, "grad_norm": 0.921875, "learning_rate": 8.549256638938213e-07, "loss": 0.2421799898147583, "step": 2501, "token_acc": 0.9354550862581796 }, { "epoch": 0.5278481012658228, "grad_norm": 0.68359375, "learning_rate": 8.548065485734686e-07, "loss": 0.2299726903438568, "step": 2502, "token_acc": 0.9346629986244842 }, { "epoch": 0.5280590717299578, "grad_norm": 0.78125, "learning_rate": 8.54687392677984e-07, "loss": 0.29011255502700806, "step": 2503, "token_acc": 0.9178403755868545 }, { "epoch": 0.5282700421940928, "grad_norm": 0.6171875, "learning_rate": 8.545681962209938e-07, "loss": 0.25929439067840576, "step": 2504, "token_acc": 0.9292899408284023 }, { "epoch": 0.5284810126582279, "grad_norm": 0.91015625, "learning_rate": 8.544489592161295e-07, "loss": 0.24807614088058472, "step": 2505, "token_acc": 0.9324362209139333 }, { "epoch": 0.5286919831223629, "grad_norm": 0.68359375, "learning_rate": 8.543296816770267e-07, "loss": 0.23297901451587677, "step": 2506, "token_acc": 0.931261207411835 }, { "epoch": 0.5289029535864979, "grad_norm": 0.85546875, "learning_rate": 8.542103636173256e-07, "loss": 0.287057489156723, "step": 2507, "token_acc": 0.9156471345782357 }, { "epoch": 0.529113924050633, "grad_norm": 0.8203125, "learning_rate": 8.540910050506715e-07, "loss": 0.25185930728912354, "step": 2508, "token_acc": 0.9279576999339062 }, { "epoch": 0.5293248945147679, "grad_norm": 0.72265625, "learning_rate": 8.539716059907141e-07, "loss": 0.2848084270954132, "step": 2509, "token_acc": 0.9244791666666666 }, { "epoch": 0.5295358649789029, "grad_norm": 1.0546875, "learning_rate": 8.538521664511073e-07, "loss": 0.25254541635513306, "step": 2510, "token_acc": 0.928911456680419 }, { "epoch": 0.529746835443038, "grad_norm": 0.625, "learning_rate": 8.537326864455105e-07, "loss": 0.26126307249069214, "step": 2511, "token_acc": 0.9250559284116331 }, { "epoch": 0.529957805907173, "grad_norm": 0.6484375, "learning_rate": 8.536131659875869e-07, "loss": 0.2512105107307434, "step": 2512, "token_acc": 0.9298989240299967 }, { "epoch": 0.530168776371308, "grad_norm": 0.6953125, "learning_rate": 8.534936050910049e-07, "loss": 0.2810831069946289, "step": 2513, "token_acc": 0.9219620958751393 }, { "epoch": 0.5303797468354431, "grad_norm": 0.609375, "learning_rate": 8.533740037694371e-07, "loss": 0.2434484362602234, "step": 2514, "token_acc": 0.9297602256699576 }, { "epoch": 0.5305907172995781, "grad_norm": 1.0703125, "learning_rate": 8.532543620365611e-07, "loss": 0.25123822689056396, "step": 2515, "token_acc": 0.9271303824149353 }, { "epoch": 0.5308016877637131, "grad_norm": 0.8203125, "learning_rate": 8.531346799060589e-07, "loss": 0.24271540343761444, "step": 2516, "token_acc": 0.9361119561910557 }, { "epoch": 0.5310126582278482, "grad_norm": 0.6328125, "learning_rate": 8.530149573916172e-07, "loss": 0.2507641613483429, "step": 2517, "token_acc": 0.927969556944822 }, { "epoch": 0.5312236286919831, "grad_norm": 0.71875, "learning_rate": 8.528951945069271e-07, "loss": 0.26492685079574585, "step": 2518, "token_acc": 0.9248009436744323 }, { "epoch": 0.5314345991561181, "grad_norm": 0.75390625, "learning_rate": 8.527753912656848e-07, "loss": 0.2783654034137726, "step": 2519, "token_acc": 0.924896510940272 }, { "epoch": 0.5316455696202531, "grad_norm": 0.75390625, "learning_rate": 8.526555476815905e-07, "loss": 0.2902885675430298, "step": 2520, "token_acc": 0.924300744162176 }, { "epoch": 0.5318565400843882, "grad_norm": 0.8359375, "learning_rate": 8.525356637683494e-07, "loss": 0.26359689235687256, "step": 2521, "token_acc": 0.9227877385772123 }, { "epoch": 0.5320675105485232, "grad_norm": 0.71484375, "learning_rate": 8.524157395396715e-07, "loss": 0.2716497778892517, "step": 2522, "token_acc": 0.9304267161410018 }, { "epoch": 0.5322784810126582, "grad_norm": 0.5703125, "learning_rate": 8.522957750092709e-07, "loss": 0.20532673597335815, "step": 2523, "token_acc": 0.9395079594790159 }, { "epoch": 0.5324894514767933, "grad_norm": 0.76171875, "learning_rate": 8.521757701908667e-07, "loss": 0.22517184913158417, "step": 2524, "token_acc": 0.9341870160810006 }, { "epoch": 0.5327004219409283, "grad_norm": 0.765625, "learning_rate": 8.520557250981821e-07, "loss": 0.27452120184898376, "step": 2525, "token_acc": 0.9231734085920351 }, { "epoch": 0.5329113924050632, "grad_norm": 0.71875, "learning_rate": 8.519356397449458e-07, "loss": 0.2842588722705841, "step": 2526, "token_acc": 0.9281177829099307 }, { "epoch": 0.5331223628691983, "grad_norm": 0.7421875, "learning_rate": 8.518155141448904e-07, "loss": 0.2685563564300537, "step": 2527, "token_acc": 0.9254515599343186 }, { "epoch": 0.5333333333333333, "grad_norm": 0.74609375, "learning_rate": 8.516953483117529e-07, "loss": 0.31973540782928467, "step": 2528, "token_acc": 0.9127372933251684 }, { "epoch": 0.5335443037974683, "grad_norm": 0.69921875, "learning_rate": 8.515751422592759e-07, "loss": 0.2862204313278198, "step": 2529, "token_acc": 0.9247058823529412 }, { "epoch": 0.5337552742616034, "grad_norm": 0.63671875, "learning_rate": 8.514548960012055e-07, "loss": 0.2583780884742737, "step": 2530, "token_acc": 0.9300841311285175 }, { "epoch": 0.5339662447257384, "grad_norm": 0.8203125, "learning_rate": 8.513346095512932e-07, "loss": 0.2810664474964142, "step": 2531, "token_acc": 0.9199391943248036 }, { "epoch": 0.5341772151898734, "grad_norm": 0.83203125, "learning_rate": 8.512142829232944e-07, "loss": 0.30742138624191284, "step": 2532, "token_acc": 0.914881462439303 }, { "epoch": 0.5343881856540085, "grad_norm": 0.703125, "learning_rate": 8.510939161309699e-07, "loss": 0.2908022403717041, "step": 2533, "token_acc": 0.9199761122723201 }, { "epoch": 0.5345991561181435, "grad_norm": 0.8125, "learning_rate": 8.509735091880844e-07, "loss": 0.26338642835617065, "step": 2534, "token_acc": 0.9259051361212461 }, { "epoch": 0.5348101265822784, "grad_norm": 0.7109375, "learning_rate": 8.508530621084076e-07, "loss": 0.27282026410102844, "step": 2535, "token_acc": 0.9235181644359465 }, { "epoch": 0.5350210970464135, "grad_norm": 1.3125, "learning_rate": 8.507325749057134e-07, "loss": 0.2689321041107178, "step": 2536, "token_acc": 0.9262270400481782 }, { "epoch": 0.5352320675105485, "grad_norm": 1.7578125, "learning_rate": 8.506120475937808e-07, "loss": 0.30176982283592224, "step": 2537, "token_acc": 0.9222781623662681 }, { "epoch": 0.5354430379746835, "grad_norm": 0.6640625, "learning_rate": 8.504914801863928e-07, "loss": 0.2510386109352112, "step": 2538, "token_acc": 0.935154608225758 }, { "epoch": 0.5356540084388186, "grad_norm": 0.68359375, "learning_rate": 8.503708726973377e-07, "loss": 0.3038210868835449, "step": 2539, "token_acc": 0.9151857835218093 }, { "epoch": 0.5358649789029536, "grad_norm": 0.73046875, "learning_rate": 8.502502251404077e-07, "loss": 0.26024386286735535, "step": 2540, "token_acc": 0.9252501471453797 }, { "epoch": 0.5360759493670886, "grad_norm": 0.64453125, "learning_rate": 8.501295375294e-07, "loss": 0.24608099460601807, "step": 2541, "token_acc": 0.9323417906095072 }, { "epoch": 0.5362869198312237, "grad_norm": 0.734375, "learning_rate": 8.500088098781162e-07, "loss": 0.2552410066127777, "step": 2542, "token_acc": 0.9226925338036449 }, { "epoch": 0.5364978902953587, "grad_norm": 0.63671875, "learning_rate": 8.498880422003626e-07, "loss": 0.25837090611457825, "step": 2543, "token_acc": 0.9286343612334802 }, { "epoch": 0.5367088607594936, "grad_norm": 0.703125, "learning_rate": 8.497672345099498e-07, "loss": 0.25335896015167236, "step": 2544, "token_acc": 0.932460577209164 }, { "epoch": 0.5369198312236287, "grad_norm": 0.72265625, "learning_rate": 8.496463868206934e-07, "loss": 0.253152996301651, "step": 2545, "token_acc": 0.9286912751677853 }, { "epoch": 0.5371308016877637, "grad_norm": 0.75, "learning_rate": 8.495254991464134e-07, "loss": 0.2589966058731079, "step": 2546, "token_acc": 0.9304703476482618 }, { "epoch": 0.5373417721518987, "grad_norm": 0.65234375, "learning_rate": 8.494045715009342e-07, "loss": 0.24122105538845062, "step": 2547, "token_acc": 0.9351535836177475 }, { "epoch": 0.5375527426160338, "grad_norm": 0.74609375, "learning_rate": 8.492836038980848e-07, "loss": 0.28431183099746704, "step": 2548, "token_acc": 0.9243090452261307 }, { "epoch": 0.5377637130801688, "grad_norm": 0.80859375, "learning_rate": 8.491625963516989e-07, "loss": 0.2857764959335327, "step": 2549, "token_acc": 0.9214711729622267 }, { "epoch": 0.5379746835443038, "grad_norm": 0.78125, "learning_rate": 8.490415488756149e-07, "loss": 0.22653721272945404, "step": 2550, "token_acc": 0.9364864864864865 }, { "epoch": 0.5381856540084389, "grad_norm": 0.703125, "learning_rate": 8.489204614836755e-07, "loss": 0.2659202814102173, "step": 2551, "token_acc": 0.9259911894273127 }, { "epoch": 0.5383966244725739, "grad_norm": 0.6953125, "learning_rate": 8.487993341897281e-07, "loss": 0.22629112005233765, "step": 2552, "token_acc": 0.9346692286576997 }, { "epoch": 0.5386075949367088, "grad_norm": 0.94140625, "learning_rate": 8.486781670076248e-07, "loss": 0.3022962808609009, "step": 2553, "token_acc": 0.9146103896103897 }, { "epoch": 0.5388185654008438, "grad_norm": 1.359375, "learning_rate": 8.485569599512218e-07, "loss": 0.28023257851600647, "step": 2554, "token_acc": 0.9242593627724986 }, { "epoch": 0.5390295358649789, "grad_norm": 0.80078125, "learning_rate": 8.484357130343802e-07, "loss": 0.3295441269874573, "step": 2555, "token_acc": 0.9104477611940298 }, { "epoch": 0.5392405063291139, "grad_norm": 1.2265625, "learning_rate": 8.483144262709658e-07, "loss": 0.27596184611320496, "step": 2556, "token_acc": 0.9232902033271719 }, { "epoch": 0.5394514767932489, "grad_norm": 0.6796875, "learning_rate": 8.481930996748486e-07, "loss": 0.22927063703536987, "step": 2557, "token_acc": 0.9365079365079365 }, { "epoch": 0.539662447257384, "grad_norm": 0.82421875, "learning_rate": 8.480717332599032e-07, "loss": 0.2843690514564514, "step": 2558, "token_acc": 0.9215856481481481 }, { "epoch": 0.539873417721519, "grad_norm": 0.80078125, "learning_rate": 8.479503270400093e-07, "loss": 0.25006401538848877, "step": 2559, "token_acc": 0.9348082595870206 }, { "epoch": 0.540084388185654, "grad_norm": 0.921875, "learning_rate": 8.478288810290504e-07, "loss": 0.28820398449897766, "step": 2560, "token_acc": 0.9172324603944391 }, { "epoch": 0.5402953586497891, "grad_norm": 0.72265625, "learning_rate": 8.477073952409148e-07, "loss": 0.2397686392068863, "step": 2561, "token_acc": 0.9310035842293907 }, { "epoch": 0.540506329113924, "grad_norm": 0.796875, "learning_rate": 8.475858696894957e-07, "loss": 0.25805336236953735, "step": 2562, "token_acc": 0.9334384858044164 }, { "epoch": 0.540717299578059, "grad_norm": 0.8046875, "learning_rate": 8.474643043886904e-07, "loss": 0.2584247291088104, "step": 2563, "token_acc": 0.9266837169650469 }, { "epoch": 0.5409282700421941, "grad_norm": 0.8125, "learning_rate": 8.473426993524011e-07, "loss": 0.2660512924194336, "step": 2564, "token_acc": 0.9236923076923077 }, { "epoch": 0.5411392405063291, "grad_norm": 0.6875, "learning_rate": 8.472210545945342e-07, "loss": 0.2601029872894287, "step": 2565, "token_acc": 0.9255970606246172 }, { "epoch": 0.5413502109704641, "grad_norm": 0.73046875, "learning_rate": 8.470993701290008e-07, "loss": 0.21796312928199768, "step": 2566, "token_acc": 0.9377628259041211 }, { "epoch": 0.5415611814345992, "grad_norm": 0.76953125, "learning_rate": 8.469776459697167e-07, "loss": 0.2508326768875122, "step": 2567, "token_acc": 0.9285911221395092 }, { "epoch": 0.5417721518987342, "grad_norm": 0.7890625, "learning_rate": 8.468558821306017e-07, "loss": 0.24336743354797363, "step": 2568, "token_acc": 0.9286553141514974 }, { "epoch": 0.5419831223628692, "grad_norm": 0.85546875, "learning_rate": 8.467340786255811e-07, "loss": 0.2455786168575287, "step": 2569, "token_acc": 0.9322697170379289 }, { "epoch": 0.5421940928270043, "grad_norm": 0.65625, "learning_rate": 8.466122354685838e-07, "loss": 0.25604596734046936, "step": 2570, "token_acc": 0.9221238938053097 }, { "epoch": 0.5424050632911392, "grad_norm": 0.80859375, "learning_rate": 8.464903526735437e-07, "loss": 0.28357216715812683, "step": 2571, "token_acc": 0.9187948350071736 }, { "epoch": 0.5426160337552742, "grad_norm": 0.765625, "learning_rate": 8.46368430254399e-07, "loss": 0.24011358618736267, "step": 2572, "token_acc": 0.9420243433696348 }, { "epoch": 0.5428270042194093, "grad_norm": 0.89453125, "learning_rate": 8.462464682250928e-07, "loss": 0.2642650306224823, "step": 2573, "token_acc": 0.927845793175306 }, { "epoch": 0.5430379746835443, "grad_norm": 0.640625, "learning_rate": 8.461244665995723e-07, "loss": 0.256681889295578, "step": 2574, "token_acc": 0.9306811332127788 }, { "epoch": 0.5432489451476793, "grad_norm": 0.734375, "learning_rate": 8.460024253917894e-07, "loss": 0.23439694941043854, "step": 2575, "token_acc": 0.9362615587846763 }, { "epoch": 0.5434599156118144, "grad_norm": 0.91796875, "learning_rate": 8.458803446157008e-07, "loss": 0.20310401916503906, "step": 2576, "token_acc": 0.9405304045004018 }, { "epoch": 0.5436708860759494, "grad_norm": 0.6484375, "learning_rate": 8.45758224285267e-07, "loss": 0.22486239671707153, "step": 2577, "token_acc": 0.9365977922445514 }, { "epoch": 0.5438818565400844, "grad_norm": 0.81640625, "learning_rate": 8.45636064414454e-07, "loss": 0.25661221146583557, "step": 2578, "token_acc": 0.9317738791423001 }, { "epoch": 0.5440928270042195, "grad_norm": 0.78125, "learning_rate": 8.455138650172315e-07, "loss": 0.28514364361763, "step": 2579, "token_acc": 0.9185823754789272 }, { "epoch": 0.5443037974683544, "grad_norm": 0.59375, "learning_rate": 8.453916261075743e-07, "loss": 0.2316652238368988, "step": 2580, "token_acc": 0.932656023222061 }, { "epoch": 0.5445147679324894, "grad_norm": 0.65625, "learning_rate": 8.45269347699461e-07, "loss": 0.24590986967086792, "step": 2581, "token_acc": 0.9323262839879154 }, { "epoch": 0.5447257383966245, "grad_norm": 0.609375, "learning_rate": 8.451470298068757e-07, "loss": 0.23515790700912476, "step": 2582, "token_acc": 0.9325681492109039 }, { "epoch": 0.5449367088607595, "grad_norm": 0.73828125, "learning_rate": 8.450246724438062e-07, "loss": 0.20705512166023254, "step": 2583, "token_acc": 0.9384920634920635 }, { "epoch": 0.5451476793248945, "grad_norm": 0.6953125, "learning_rate": 8.44902275624245e-07, "loss": 0.25460585951805115, "step": 2584, "token_acc": 0.9258763714209258 }, { "epoch": 0.5453586497890295, "grad_norm": 0.71875, "learning_rate": 8.447798393621895e-07, "loss": 0.31564241647720337, "step": 2585, "token_acc": 0.9212575713873666 }, { "epoch": 0.5455696202531646, "grad_norm": 0.7265625, "learning_rate": 8.446573636716411e-07, "loss": 0.2664661109447479, "step": 2586, "token_acc": 0.9278861033735686 }, { "epoch": 0.5457805907172996, "grad_norm": 0.6796875, "learning_rate": 8.44534848566606e-07, "loss": 0.26693183183670044, "step": 2587, "token_acc": 0.9242658423493045 }, { "epoch": 0.5459915611814345, "grad_norm": 0.91015625, "learning_rate": 8.444122940610949e-07, "loss": 0.3010410666465759, "step": 2588, "token_acc": 0.9134363159428738 }, { "epoch": 0.5462025316455696, "grad_norm": 0.66015625, "learning_rate": 8.442897001691227e-07, "loss": 0.22087593376636505, "step": 2589, "token_acc": 0.9403122130394858 }, { "epoch": 0.5464135021097046, "grad_norm": 0.62109375, "learning_rate": 8.441670669047096e-07, "loss": 0.22192814946174622, "step": 2590, "token_acc": 0.9399882903981265 }, { "epoch": 0.5466244725738396, "grad_norm": 0.8671875, "learning_rate": 8.440443942818791e-07, "loss": 0.3086521625518799, "step": 2591, "token_acc": 0.9114367633004141 }, { "epoch": 0.5468354430379747, "grad_norm": 0.71484375, "learning_rate": 8.439216823146603e-07, "loss": 0.27392855286598206, "step": 2592, "token_acc": 0.9269242288110212 }, { "epoch": 0.5470464135021097, "grad_norm": 0.84375, "learning_rate": 8.437989310170861e-07, "loss": 0.2849975526332855, "step": 2593, "token_acc": 0.921875 }, { "epoch": 0.5472573839662447, "grad_norm": 0.7421875, "learning_rate": 8.436761404031943e-07, "loss": 0.27943551540374756, "step": 2594, "token_acc": 0.9233809668592278 }, { "epoch": 0.5474683544303798, "grad_norm": 0.6484375, "learning_rate": 8.435533104870269e-07, "loss": 0.20296721160411835, "step": 2595, "token_acc": 0.9368421052631579 }, { "epoch": 0.5476793248945148, "grad_norm": 0.80078125, "learning_rate": 8.434304412826308e-07, "loss": 0.27841201424598694, "step": 2596, "token_acc": 0.919015047879617 }, { "epoch": 0.5478902953586497, "grad_norm": 0.56640625, "learning_rate": 8.433075328040569e-07, "loss": 0.23092350363731384, "step": 2597, "token_acc": 0.9328550932568149 }, { "epoch": 0.5481012658227848, "grad_norm": 0.6953125, "learning_rate": 8.431845850653608e-07, "loss": 0.2339271903038025, "step": 2598, "token_acc": 0.9331210191082803 }, { "epoch": 0.5483122362869198, "grad_norm": 0.83984375, "learning_rate": 8.430615980806029e-07, "loss": 0.27288681268692017, "step": 2599, "token_acc": 0.9220963172804533 }, { "epoch": 0.5485232067510548, "grad_norm": 0.7265625, "learning_rate": 8.429385718638474e-07, "loss": 0.2347344011068344, "step": 2600, "token_acc": 0.9344608879492601 }, { "epoch": 0.5485232067510548, "eval_loss": 0.4336564242839813, "eval_runtime": 245.5268, "eval_samples_per_second": 137.276, "eval_steps_per_second": 2.146, "eval_token_acc": 0.8990924795835951, "step": 2600 }, { "epoch": 0.5487341772151899, "grad_norm": 0.703125, "learning_rate": 8.428155064291636e-07, "loss": 0.2879828214645386, "step": 2601, "token_acc": 0.9228314476356252 }, { "epoch": 0.5489451476793249, "grad_norm": 0.66796875, "learning_rate": 8.426924017906252e-07, "loss": 0.29434141516685486, "step": 2602, "token_acc": 0.9221347331583553 }, { "epoch": 0.5491561181434599, "grad_norm": 0.83203125, "learning_rate": 8.425692579623101e-07, "loss": 0.31449979543685913, "step": 2603, "token_acc": 0.9129682997118156 }, { "epoch": 0.549367088607595, "grad_norm": 0.76953125, "learning_rate": 8.424460749583009e-07, "loss": 0.28606468439102173, "step": 2604, "token_acc": 0.9260977118119975 }, { "epoch": 0.54957805907173, "grad_norm": 0.73828125, "learning_rate": 8.423228527926844e-07, "loss": 0.28885042667388916, "step": 2605, "token_acc": 0.9201474201474201 }, { "epoch": 0.549789029535865, "grad_norm": 0.69921875, "learning_rate": 8.421995914795525e-07, "loss": 0.28102123737335205, "step": 2606, "token_acc": 0.9231619679380874 }, { "epoch": 0.55, "grad_norm": 0.62109375, "learning_rate": 8.42076291033001e-07, "loss": 0.2174537181854248, "step": 2607, "token_acc": 0.9384146341463414 }, { "epoch": 0.550210970464135, "grad_norm": 0.953125, "learning_rate": 8.419529514671302e-07, "loss": 0.2687872350215912, "step": 2608, "token_acc": 0.9322766570605188 }, { "epoch": 0.55042194092827, "grad_norm": 0.68359375, "learning_rate": 8.418295727960452e-07, "loss": 0.2529855966567993, "step": 2609, "token_acc": 0.9293266555370061 }, { "epoch": 0.5506329113924051, "grad_norm": 0.625, "learning_rate": 8.417061550338551e-07, "loss": 0.23254430294036865, "step": 2610, "token_acc": 0.9334648999154215 }, { "epoch": 0.5508438818565401, "grad_norm": 0.64453125, "learning_rate": 8.41582698194674e-07, "loss": 0.23072651028633118, "step": 2611, "token_acc": 0.9321041849575651 }, { "epoch": 0.5510548523206751, "grad_norm": 0.71875, "learning_rate": 8.414592022926206e-07, "loss": 0.2386835664510727, "step": 2612, "token_acc": 0.9336476928258673 }, { "epoch": 0.5512658227848102, "grad_norm": 0.62890625, "learning_rate": 8.413356673418171e-07, "loss": 0.2478591501712799, "step": 2613, "token_acc": 0.9271483794231341 }, { "epoch": 0.5514767932489452, "grad_norm": 0.65625, "learning_rate": 8.412120933563909e-07, "loss": 0.24771589040756226, "step": 2614, "token_acc": 0.9313179527976664 }, { "epoch": 0.5516877637130801, "grad_norm": 0.6796875, "learning_rate": 8.410884803504741e-07, "loss": 0.26035720109939575, "step": 2615, "token_acc": 0.9254331683168316 }, { "epoch": 0.5518987341772152, "grad_norm": 0.765625, "learning_rate": 8.409648283382024e-07, "loss": 0.2916993200778961, "step": 2616, "token_acc": 0.9223576083876452 }, { "epoch": 0.5521097046413502, "grad_norm": 0.72265625, "learning_rate": 8.408411373337168e-07, "loss": 0.23952031135559082, "step": 2617, "token_acc": 0.9317464869515343 }, { "epoch": 0.5523206751054852, "grad_norm": 1.1328125, "learning_rate": 8.407174073511622e-07, "loss": 0.24818843603134155, "step": 2618, "token_acc": 0.9275659824046921 }, { "epoch": 0.5525316455696202, "grad_norm": 0.6171875, "learning_rate": 8.405936384046884e-07, "loss": 0.24904057383537292, "step": 2619, "token_acc": 0.9323546344271733 }, { "epoch": 0.5527426160337553, "grad_norm": 0.8046875, "learning_rate": 8.40469830508449e-07, "loss": 0.25590598583221436, "step": 2620, "token_acc": 0.9309651474530831 }, { "epoch": 0.5529535864978903, "grad_norm": 0.69921875, "learning_rate": 8.40345983676603e-07, "loss": 0.25597819685935974, "step": 2621, "token_acc": 0.9286150091519219 }, { "epoch": 0.5531645569620253, "grad_norm": 0.9296875, "learning_rate": 8.40222097923313e-07, "loss": 0.32118701934814453, "step": 2622, "token_acc": 0.9164656212303981 }, { "epoch": 0.5533755274261604, "grad_norm": 0.7734375, "learning_rate": 8.400981732627466e-07, "loss": 0.23506760597229004, "step": 2623, "token_acc": 0.9312725090036015 }, { "epoch": 0.5535864978902953, "grad_norm": 0.828125, "learning_rate": 8.399742097090754e-07, "loss": 0.27046120166778564, "step": 2624, "token_acc": 0.9221574344023323 }, { "epoch": 0.5537974683544303, "grad_norm": 1.671875, "learning_rate": 8.398502072764759e-07, "loss": 0.2787461578845978, "step": 2625, "token_acc": 0.9241176470588235 }, { "epoch": 0.5540084388185654, "grad_norm": 0.640625, "learning_rate": 8.397261659791286e-07, "loss": 0.2294963002204895, "step": 2626, "token_acc": 0.9301622216112181 }, { "epoch": 0.5542194092827004, "grad_norm": 0.72265625, "learning_rate": 8.39602085831219e-07, "loss": 0.2696976065635681, "step": 2627, "token_acc": 0.9249259137306552 }, { "epoch": 0.5544303797468354, "grad_norm": 0.87109375, "learning_rate": 8.394779668469363e-07, "loss": 0.24690134823322296, "step": 2628, "token_acc": 0.9289587852494577 }, { "epoch": 0.5546413502109705, "grad_norm": 0.7734375, "learning_rate": 8.39353809040475e-07, "loss": 0.22493812441825867, "step": 2629, "token_acc": 0.9393063583815029 }, { "epoch": 0.5548523206751055, "grad_norm": 0.65234375, "learning_rate": 8.392296124260332e-07, "loss": 0.21133512258529663, "step": 2630, "token_acc": 0.9405131096701438 }, { "epoch": 0.5550632911392405, "grad_norm": 0.6875, "learning_rate": 8.39105377017814e-07, "loss": 0.25263702869415283, "step": 2631, "token_acc": 0.9295895096921323 }, { "epoch": 0.5552742616033756, "grad_norm": 0.6015625, "learning_rate": 8.389811028300247e-07, "loss": 0.23291683197021484, "step": 2632, "token_acc": 0.932630995286942 }, { "epoch": 0.5554852320675105, "grad_norm": 0.8203125, "learning_rate": 8.388567898768774e-07, "loss": 0.2954631447792053, "step": 2633, "token_acc": 0.9178605539637058 }, { "epoch": 0.5556962025316455, "grad_norm": 0.671875, "learning_rate": 8.38732438172588e-07, "loss": 0.26403701305389404, "step": 2634, "token_acc": 0.9285266457680251 }, { "epoch": 0.5559071729957806, "grad_norm": 0.8359375, "learning_rate": 8.386080477313772e-07, "loss": 0.2658252418041229, "step": 2635, "token_acc": 0.922476135377495 }, { "epoch": 0.5561181434599156, "grad_norm": 0.74609375, "learning_rate": 8.384836185674704e-07, "loss": 0.2872653305530548, "step": 2636, "token_acc": 0.9276967930029154 }, { "epoch": 0.5563291139240506, "grad_norm": 0.546875, "learning_rate": 8.383591506950968e-07, "loss": 0.1877198964357376, "step": 2637, "token_acc": 0.9443635304398486 }, { "epoch": 0.5565400843881857, "grad_norm": 0.6875, "learning_rate": 8.382346441284904e-07, "loss": 0.25758838653564453, "step": 2638, "token_acc": 0.932240099009901 }, { "epoch": 0.5567510548523207, "grad_norm": 0.890625, "learning_rate": 8.381100988818898e-07, "loss": 0.2883344292640686, "step": 2639, "token_acc": 0.9247842170160296 }, { "epoch": 0.5569620253164557, "grad_norm": 0.78515625, "learning_rate": 8.379855149695376e-07, "loss": 0.2700793147087097, "step": 2640, "token_acc": 0.9275277615429574 }, { "epoch": 0.5571729957805908, "grad_norm": 0.78125, "learning_rate": 8.37860892405681e-07, "loss": 0.24384114146232605, "step": 2641, "token_acc": 0.927238215754508 }, { "epoch": 0.5573839662447257, "grad_norm": 0.63671875, "learning_rate": 8.377362312045717e-07, "loss": 0.23657594621181488, "step": 2642, "token_acc": 0.931585292344786 }, { "epoch": 0.5575949367088607, "grad_norm": 0.74609375, "learning_rate": 8.37611531380466e-07, "loss": 0.2722514271736145, "step": 2643, "token_acc": 0.9285925925925926 }, { "epoch": 0.5578059071729958, "grad_norm": 0.765625, "learning_rate": 8.374867929476239e-07, "loss": 0.30291199684143066, "step": 2644, "token_acc": 0.9136057941024315 }, { "epoch": 0.5580168776371308, "grad_norm": 0.65234375, "learning_rate": 8.373620159203107e-07, "loss": 0.2476034164428711, "step": 2645, "token_acc": 0.9291204099060631 }, { "epoch": 0.5582278481012658, "grad_norm": 0.79296875, "learning_rate": 8.372372003127956e-07, "loss": 0.27616044878959656, "step": 2646, "token_acc": 0.9306742640075973 }, { "epoch": 0.5584388185654009, "grad_norm": 0.97265625, "learning_rate": 8.371123461393523e-07, "loss": 0.27190765738487244, "step": 2647, "token_acc": 0.9267202859696158 }, { "epoch": 0.5586497890295359, "grad_norm": 0.6875, "learning_rate": 8.369874534142588e-07, "loss": 0.23533740639686584, "step": 2648, "token_acc": 0.9349159775940251 }, { "epoch": 0.5588607594936709, "grad_norm": 0.6796875, "learning_rate": 8.368625221517977e-07, "loss": 0.2168492078781128, "step": 2649, "token_acc": 0.9408825978351374 }, { "epoch": 0.5590717299578059, "grad_norm": 0.64453125, "learning_rate": 8.367375523662562e-07, "loss": 0.2400965392589569, "step": 2650, "token_acc": 0.9309944911568571 }, { "epoch": 0.559282700421941, "grad_norm": 0.78125, "learning_rate": 8.366125440719254e-07, "loss": 0.25689658522605896, "step": 2651, "token_acc": 0.9324283559577677 }, { "epoch": 0.5594936708860759, "grad_norm": 0.71484375, "learning_rate": 8.364874972831011e-07, "loss": 0.24038714170455933, "step": 2652, "token_acc": 0.929819366301451 }, { "epoch": 0.5597046413502109, "grad_norm": 0.93359375, "learning_rate": 8.363624120140835e-07, "loss": 0.2736024856567383, "step": 2653, "token_acc": 0.9210944957047407 }, { "epoch": 0.559915611814346, "grad_norm": 0.57421875, "learning_rate": 8.362372882791772e-07, "loss": 0.23916926980018616, "step": 2654, "token_acc": 0.9308009422850412 }, { "epoch": 0.560126582278481, "grad_norm": 0.65234375, "learning_rate": 8.361121260926911e-07, "loss": 0.25289350748062134, "step": 2655, "token_acc": 0.9297520661157025 }, { "epoch": 0.560337552742616, "grad_norm": 0.70703125, "learning_rate": 8.359869254689384e-07, "loss": 0.23803725838661194, "step": 2656, "token_acc": 0.9330505907300818 }, { "epoch": 0.5605485232067511, "grad_norm": 0.6875, "learning_rate": 8.358616864222371e-07, "loss": 0.2833866775035858, "step": 2657, "token_acc": 0.9187406296851575 }, { "epoch": 0.5607594936708861, "grad_norm": 0.78515625, "learning_rate": 8.357364089669092e-07, "loss": 0.28516727685928345, "step": 2658, "token_acc": 0.9209474007997539 }, { "epoch": 0.560970464135021, "grad_norm": 0.671875, "learning_rate": 8.356110931172812e-07, "loss": 0.2699287533760071, "step": 2659, "token_acc": 0.9282339043118725 }, { "epoch": 0.5611814345991561, "grad_norm": 0.8984375, "learning_rate": 8.354857388876844e-07, "loss": 0.30059972405433655, "step": 2660, "token_acc": 0.9227764423076923 }, { "epoch": 0.5613924050632911, "grad_norm": 0.69140625, "learning_rate": 8.353603462924537e-07, "loss": 0.23169153928756714, "step": 2661, "token_acc": 0.9344632768361582 }, { "epoch": 0.5616033755274261, "grad_norm": 0.73828125, "learning_rate": 8.352349153459289e-07, "loss": 0.26684826612472534, "step": 2662, "token_acc": 0.9217061057551855 }, { "epoch": 0.5618143459915612, "grad_norm": 0.9609375, "learning_rate": 8.351094460624542e-07, "loss": 0.26441407203674316, "step": 2663, "token_acc": 0.9281354051054383 }, { "epoch": 0.5620253164556962, "grad_norm": 0.7265625, "learning_rate": 8.349839384563777e-07, "loss": 0.2654181122779846, "step": 2664, "token_acc": 0.9254372506903958 }, { "epoch": 0.5622362869198312, "grad_norm": 0.67578125, "learning_rate": 8.348583925420529e-07, "loss": 0.2508045732975006, "step": 2665, "token_acc": 0.9302777777777778 }, { "epoch": 0.5624472573839663, "grad_norm": 0.6171875, "learning_rate": 8.347328083338366e-07, "loss": 0.21960122883319855, "step": 2666, "token_acc": 0.9345307068366164 }, { "epoch": 0.5626582278481013, "grad_norm": 0.68359375, "learning_rate": 8.346071858460903e-07, "loss": 0.22674736380577087, "step": 2667, "token_acc": 0.9377791009228937 }, { "epoch": 0.5628691983122363, "grad_norm": 0.8515625, "learning_rate": 8.344815250931805e-07, "loss": 0.25689297914505005, "step": 2668, "token_acc": 0.9283297446939404 }, { "epoch": 0.5630801687763713, "grad_norm": 0.82421875, "learning_rate": 8.343558260894772e-07, "loss": 0.2350965291261673, "step": 2669, "token_acc": 0.9362084456424079 }, { "epoch": 0.5632911392405063, "grad_norm": 0.71484375, "learning_rate": 8.342300888493552e-07, "loss": 0.2454356551170349, "step": 2670, "token_acc": 0.9325468844525105 }, { "epoch": 0.5635021097046413, "grad_norm": 0.88671875, "learning_rate": 8.341043133871935e-07, "loss": 0.2703838646411896, "step": 2671, "token_acc": 0.9278678106132825 }, { "epoch": 0.5637130801687764, "grad_norm": 0.671875, "learning_rate": 8.33978499717376e-07, "loss": 0.19629967212677002, "step": 2672, "token_acc": 0.9454094292803971 }, { "epoch": 0.5639240506329114, "grad_norm": 0.71484375, "learning_rate": 8.338526478542902e-07, "loss": 0.2869308590888977, "step": 2673, "token_acc": 0.919976289270895 }, { "epoch": 0.5641350210970464, "grad_norm": 0.62109375, "learning_rate": 8.337267578123281e-07, "loss": 0.18096670508384705, "step": 2674, "token_acc": 0.9426619132501486 }, { "epoch": 0.5643459915611815, "grad_norm": 1.8125, "learning_rate": 8.33600829605887e-07, "loss": 0.28571566939353943, "step": 2675, "token_acc": 0.9192185850052799 }, { "epoch": 0.5645569620253165, "grad_norm": 0.71875, "learning_rate": 8.334748632493674e-07, "loss": 0.2322588711977005, "step": 2676, "token_acc": 0.9389444636081408 }, { "epoch": 0.5647679324894515, "grad_norm": 0.7265625, "learning_rate": 8.333488587571744e-07, "loss": 0.28094035387039185, "step": 2677, "token_acc": 0.9224137931034483 }, { "epoch": 0.5649789029535865, "grad_norm": 0.671875, "learning_rate": 8.332228161437183e-07, "loss": 0.22083649039268494, "step": 2678, "token_acc": 0.9334285714285714 }, { "epoch": 0.5651898734177215, "grad_norm": 0.73046875, "learning_rate": 8.330967354234126e-07, "loss": 0.23791581392288208, "step": 2679, "token_acc": 0.9317860175488254 }, { "epoch": 0.5654008438818565, "grad_norm": 0.8828125, "learning_rate": 8.32970616610676e-07, "loss": 0.2533890902996063, "step": 2680, "token_acc": 0.9279208951340099 }, { "epoch": 0.5656118143459916, "grad_norm": 0.59375, "learning_rate": 8.32844459719931e-07, "loss": 0.2348913699388504, "step": 2681, "token_acc": 0.9307978267086073 }, { "epoch": 0.5658227848101266, "grad_norm": 0.7109375, "learning_rate": 8.32718264765605e-07, "loss": 0.27703386545181274, "step": 2682, "token_acc": 0.9255289914811762 }, { "epoch": 0.5660337552742616, "grad_norm": 0.97265625, "learning_rate": 8.325920317621293e-07, "loss": 0.24374079704284668, "step": 2683, "token_acc": 0.9315970234688037 }, { "epoch": 0.5662447257383966, "grad_norm": 0.88671875, "learning_rate": 8.324657607239395e-07, "loss": 0.2532427906990051, "step": 2684, "token_acc": 0.927681660899654 }, { "epoch": 0.5664556962025317, "grad_norm": 0.78125, "learning_rate": 8.323394516654762e-07, "loss": 0.2598961293697357, "step": 2685, "token_acc": 0.9305144884683619 }, { "epoch": 0.5666666666666667, "grad_norm": 0.56640625, "learning_rate": 8.322131046011838e-07, "loss": 0.21831172704696655, "step": 2686, "token_acc": 0.9374820762833381 }, { "epoch": 0.5668776371308016, "grad_norm": 0.71875, "learning_rate": 8.32086719545511e-07, "loss": 0.25684654712677, "step": 2687, "token_acc": 0.9263610315186246 }, { "epoch": 0.5670886075949367, "grad_norm": 0.71875, "learning_rate": 8.31960296512911e-07, "loss": 0.22141197323799133, "step": 2688, "token_acc": 0.9365867107802591 }, { "epoch": 0.5672995780590717, "grad_norm": 0.65234375, "learning_rate": 8.318338355178414e-07, "loss": 0.25332045555114746, "step": 2689, "token_acc": 0.925513698630137 }, { "epoch": 0.5675105485232067, "grad_norm": 0.53515625, "learning_rate": 8.317073365747642e-07, "loss": 0.21818161010742188, "step": 2690, "token_acc": 0.9353546910755148 }, { "epoch": 0.5677215189873418, "grad_norm": 0.64453125, "learning_rate": 8.315807996981454e-07, "loss": 0.2513672113418579, "step": 2691, "token_acc": 0.9288750354207991 }, { "epoch": 0.5679324894514768, "grad_norm": 0.5703125, "learning_rate": 8.314542249024558e-07, "loss": 0.2252429872751236, "step": 2692, "token_acc": 0.9282831418814033 }, { "epoch": 0.5681434599156118, "grad_norm": 0.73046875, "learning_rate": 8.313276122021699e-07, "loss": 0.2333730012178421, "step": 2693, "token_acc": 0.9374461979913917 }, { "epoch": 0.5683544303797469, "grad_norm": 0.6796875, "learning_rate": 8.312009616117676e-07, "loss": 0.26267820596694946, "step": 2694, "token_acc": 0.9245810055865922 }, { "epoch": 0.5685654008438819, "grad_norm": 0.75390625, "learning_rate": 8.310742731457317e-07, "loss": 0.2679429352283478, "step": 2695, "token_acc": 0.9265919811320755 }, { "epoch": 0.5687763713080168, "grad_norm": 0.7421875, "learning_rate": 8.309475468185507e-07, "loss": 0.23267057538032532, "step": 2696, "token_acc": 0.9364925854287557 }, { "epoch": 0.5689873417721519, "grad_norm": 0.765625, "learning_rate": 8.308207826447165e-07, "loss": 0.2867773175239563, "step": 2697, "token_acc": 0.9221859221859222 }, { "epoch": 0.5691983122362869, "grad_norm": 0.62890625, "learning_rate": 8.306939806387259e-07, "loss": 0.25969308614730835, "step": 2698, "token_acc": 0.9296732026143791 }, { "epoch": 0.5694092827004219, "grad_norm": 0.703125, "learning_rate": 8.305671408150796e-07, "loss": 0.2742140293121338, "step": 2699, "token_acc": 0.9242982704848313 }, { "epoch": 0.569620253164557, "grad_norm": 0.73828125, "learning_rate": 8.304402631882828e-07, "loss": 0.31750059127807617, "step": 2700, "token_acc": 0.9233836513813728 }, { "epoch": 0.569831223628692, "grad_norm": 0.8125, "learning_rate": 8.30313347772845e-07, "loss": 0.2553901672363281, "step": 2701, "token_acc": 0.9252085816448152 }, { "epoch": 0.570042194092827, "grad_norm": 1.0859375, "learning_rate": 8.301863945832803e-07, "loss": 0.28199517726898193, "step": 2702, "token_acc": 0.9218487394957983 }, { "epoch": 0.5702531645569621, "grad_norm": 0.65625, "learning_rate": 8.300594036341066e-07, "loss": 0.27853235602378845, "step": 2703, "token_acc": 0.9235531628532975 }, { "epoch": 0.570464135021097, "grad_norm": 0.5625, "learning_rate": 8.299323749398466e-07, "loss": 0.234930157661438, "step": 2704, "token_acc": 0.9338019917984769 }, { "epoch": 0.570675105485232, "grad_norm": 0.78125, "learning_rate": 8.298053085150267e-07, "loss": 0.27506327629089355, "step": 2705, "token_acc": 0.9275527666856817 }, { "epoch": 0.5708860759493671, "grad_norm": 0.6015625, "learning_rate": 8.296782043741786e-07, "loss": 0.23263251781463623, "step": 2706, "token_acc": 0.9332023575638507 }, { "epoch": 0.5710970464135021, "grad_norm": 0.875, "learning_rate": 8.295510625318374e-07, "loss": 0.2742362916469574, "step": 2707, "token_acc": 0.923568915893828 }, { "epoch": 0.5713080168776371, "grad_norm": 0.82421875, "learning_rate": 8.294238830025429e-07, "loss": 0.2526121437549591, "step": 2708, "token_acc": 0.9350947731188972 }, { "epoch": 0.5715189873417722, "grad_norm": 0.765625, "learning_rate": 8.292966658008391e-07, "loss": 0.28160983324050903, "step": 2709, "token_acc": 0.9240292628024761 }, { "epoch": 0.5717299578059072, "grad_norm": 0.7109375, "learning_rate": 8.291694109412746e-07, "loss": 0.26464521884918213, "step": 2710, "token_acc": 0.9221663213968629 }, { "epoch": 0.5719409282700422, "grad_norm": 0.6953125, "learning_rate": 8.290421184384017e-07, "loss": 0.2662385404109955, "step": 2711, "token_acc": 0.9317897371714643 }, { "epoch": 0.5721518987341773, "grad_norm": 0.69140625, "learning_rate": 8.289147883067776e-07, "loss": 0.2539859414100647, "step": 2712, "token_acc": 0.932868352223191 }, { "epoch": 0.5723628691983123, "grad_norm": 0.97265625, "learning_rate": 8.287874205609635e-07, "loss": 0.2625485062599182, "step": 2713, "token_acc": 0.9315951780207458 }, { "epoch": 0.5725738396624472, "grad_norm": 0.71875, "learning_rate": 8.286600152155252e-07, "loss": 0.2616201937198639, "step": 2714, "token_acc": 0.9289737171464331 }, { "epoch": 0.5727848101265823, "grad_norm": 0.765625, "learning_rate": 8.285325722850323e-07, "loss": 0.2657352685928345, "step": 2715, "token_acc": 0.9250759365507931 }, { "epoch": 0.5729957805907173, "grad_norm": 0.69921875, "learning_rate": 8.284050917840591e-07, "loss": 0.24797630310058594, "step": 2716, "token_acc": 0.9314185228604924 }, { "epoch": 0.5732067510548523, "grad_norm": 0.65234375, "learning_rate": 8.282775737271841e-07, "loss": 0.21330232918262482, "step": 2717, "token_acc": 0.9318373071528752 }, { "epoch": 0.5734177215189873, "grad_norm": 0.95703125, "learning_rate": 8.281500181289899e-07, "loss": 0.2646872401237488, "step": 2718, "token_acc": 0.9281842818428184 }, { "epoch": 0.5736286919831224, "grad_norm": 0.7734375, "learning_rate": 8.280224250040637e-07, "loss": 0.2921873927116394, "step": 2719, "token_acc": 0.9260129183793306 }, { "epoch": 0.5738396624472574, "grad_norm": 0.73046875, "learning_rate": 8.278947943669969e-07, "loss": 0.22265753149986267, "step": 2720, "token_acc": 0.9395635142697258 }, { "epoch": 0.5740506329113924, "grad_norm": 0.78125, "learning_rate": 8.277671262323851e-07, "loss": 0.2447950839996338, "step": 2721, "token_acc": 0.9318181818181818 }, { "epoch": 0.5742616033755275, "grad_norm": 0.71875, "learning_rate": 8.276394206148283e-07, "loss": 0.22373399138450623, "step": 2722, "token_acc": 0.935989010989011 }, { "epoch": 0.5744725738396624, "grad_norm": 0.71484375, "learning_rate": 8.275116775289304e-07, "loss": 0.27078738808631897, "step": 2723, "token_acc": 0.9280337247816922 }, { "epoch": 0.5746835443037974, "grad_norm": 0.55859375, "learning_rate": 8.273838969893003e-07, "loss": 0.2107236385345459, "step": 2724, "token_acc": 0.9361835900660169 }, { "epoch": 0.5748945147679325, "grad_norm": 0.8203125, "learning_rate": 8.272560790105506e-07, "loss": 0.3053116798400879, "step": 2725, "token_acc": 0.9181320265206111 }, { "epoch": 0.5751054852320675, "grad_norm": 0.6640625, "learning_rate": 8.271282236072985e-07, "loss": 0.28734153509140015, "step": 2726, "token_acc": 0.918015102481122 }, { "epoch": 0.5753164556962025, "grad_norm": 0.6171875, "learning_rate": 8.270003307941651e-07, "loss": 0.2245621681213379, "step": 2727, "token_acc": 0.9306451612903226 }, { "epoch": 0.5755274261603376, "grad_norm": 0.9375, "learning_rate": 8.268724005857761e-07, "loss": 0.28480589389801025, "step": 2728, "token_acc": 0.9261763115197404 }, { "epoch": 0.5757383966244726, "grad_norm": 0.7421875, "learning_rate": 8.267444329967617e-07, "loss": 0.21688902378082275, "step": 2729, "token_acc": 0.9366218236173394 }, { "epoch": 0.5759493670886076, "grad_norm": 0.6640625, "learning_rate": 8.266164280417556e-07, "loss": 0.26106923818588257, "step": 2730, "token_acc": 0.9258864302852573 }, { "epoch": 0.5761603375527427, "grad_norm": 0.66015625, "learning_rate": 8.264883857353966e-07, "loss": 0.2671416997909546, "step": 2731, "token_acc": 0.9307842590008373 }, { "epoch": 0.5763713080168776, "grad_norm": 0.671875, "learning_rate": 8.263603060923273e-07, "loss": 0.2618143558502197, "step": 2732, "token_acc": 0.9267175572519084 }, { "epoch": 0.5765822784810126, "grad_norm": 0.8046875, "learning_rate": 8.262321891271948e-07, "loss": 0.22360292077064514, "step": 2733, "token_acc": 0.9355942750466708 }, { "epoch": 0.5767932489451477, "grad_norm": 0.765625, "learning_rate": 8.261040348546501e-07, "loss": 0.27134057879447937, "step": 2734, "token_acc": 0.9265793966989186 }, { "epoch": 0.5770042194092827, "grad_norm": 0.6328125, "learning_rate": 8.259758432893488e-07, "loss": 0.24803023040294647, "step": 2735, "token_acc": 0.932520325203252 }, { "epoch": 0.5772151898734177, "grad_norm": 0.9609375, "learning_rate": 8.258476144459512e-07, "loss": 0.2681199312210083, "step": 2736, "token_acc": 0.9264900662251656 }, { "epoch": 0.5774261603375528, "grad_norm": 0.80078125, "learning_rate": 8.257193483391207e-07, "loss": 0.26183950901031494, "step": 2737, "token_acc": 0.9217494793216304 }, { "epoch": 0.5776371308016878, "grad_norm": 0.76171875, "learning_rate": 8.255910449835258e-07, "loss": 0.2246299684047699, "step": 2738, "token_acc": 0.929097605893186 }, { "epoch": 0.5778481012658228, "grad_norm": 0.68359375, "learning_rate": 8.254627043938392e-07, "loss": 0.24866081774234772, "step": 2739, "token_acc": 0.9261763877148492 }, { "epoch": 0.5780590717299579, "grad_norm": 0.93359375, "learning_rate": 8.253343265847377e-07, "loss": 0.2962813377380371, "step": 2740, "token_acc": 0.9244299674267101 }, { "epoch": 0.5782700421940928, "grad_norm": 0.75, "learning_rate": 8.252059115709022e-07, "loss": 0.27512338757514954, "step": 2741, "token_acc": 0.9279136233724992 }, { "epoch": 0.5784810126582278, "grad_norm": 0.6875, "learning_rate": 8.250774593670182e-07, "loss": 0.24102629721164703, "step": 2742, "token_acc": 0.9331896551724138 }, { "epoch": 0.5786919831223629, "grad_norm": 0.6171875, "learning_rate": 8.249489699877754e-07, "loss": 0.24846646189689636, "step": 2743, "token_acc": 0.9324946302546794 }, { "epoch": 0.5789029535864979, "grad_norm": 0.8828125, "learning_rate": 8.248204434478676e-07, "loss": 0.30632054805755615, "step": 2744, "token_acc": 0.9144481005085253 }, { "epoch": 0.5791139240506329, "grad_norm": 0.73828125, "learning_rate": 8.246918797619926e-07, "loss": 0.2345350980758667, "step": 2745, "token_acc": 0.9382004482869036 }, { "epoch": 0.579324894514768, "grad_norm": 1.015625, "learning_rate": 8.245632789448531e-07, "loss": 0.2546769976615906, "step": 2746, "token_acc": 0.9234209055338177 }, { "epoch": 0.579535864978903, "grad_norm": 0.6640625, "learning_rate": 8.244346410111556e-07, "loss": 0.21427327394485474, "step": 2747, "token_acc": 0.9373467719967311 }, { "epoch": 0.579746835443038, "grad_norm": 1.421875, "learning_rate": 8.243059659756109e-07, "loss": 0.23843082785606384, "step": 2748, "token_acc": 0.9294558019003744 }, { "epoch": 0.5799578059071729, "grad_norm": 0.9375, "learning_rate": 8.241772538529342e-07, "loss": 0.2841801643371582, "step": 2749, "token_acc": 0.9207612456747405 }, { "epoch": 0.580168776371308, "grad_norm": 0.671875, "learning_rate": 8.240485046578445e-07, "loss": 0.27378368377685547, "step": 2750, "token_acc": 0.9247098782904047 }, { "epoch": 0.580379746835443, "grad_norm": 1.046875, "learning_rate": 8.239197184050657e-07, "loss": 0.2440560758113861, "step": 2751, "token_acc": 0.927027027027027 }, { "epoch": 0.580590717299578, "grad_norm": 0.5859375, "learning_rate": 8.237908951093253e-07, "loss": 0.2358388900756836, "step": 2752, "token_acc": 0.932463295269168 }, { "epoch": 0.5808016877637131, "grad_norm": 0.6484375, "learning_rate": 8.236620347853557e-07, "loss": 0.23560798168182373, "step": 2753, "token_acc": 0.9371681415929204 }, { "epoch": 0.5810126582278481, "grad_norm": 0.54296875, "learning_rate": 8.235331374478929e-07, "loss": 0.24375225603580475, "step": 2754, "token_acc": 0.9386993603411514 }, { "epoch": 0.5812236286919831, "grad_norm": 0.7421875, "learning_rate": 8.234042031116775e-07, "loss": 0.288701593875885, "step": 2755, "token_acc": 0.9222813238770685 }, { "epoch": 0.5814345991561182, "grad_norm": 0.796875, "learning_rate": 8.232752317914541e-07, "loss": 0.27194279432296753, "step": 2756, "token_acc": 0.9229005445686443 }, { "epoch": 0.5816455696202532, "grad_norm": 0.90234375, "learning_rate": 8.231462235019717e-07, "loss": 0.24534761905670166, "step": 2757, "token_acc": 0.9320541050644857 }, { "epoch": 0.5818565400843881, "grad_norm": 0.9296875, "learning_rate": 8.230171782579837e-07, "loss": 0.2602353096008301, "step": 2758, "token_acc": 0.9245454545454546 }, { "epoch": 0.5820675105485232, "grad_norm": 0.71484375, "learning_rate": 8.228880960742473e-07, "loss": 0.2598557770252228, "step": 2759, "token_acc": 0.93159977388355 }, { "epoch": 0.5822784810126582, "grad_norm": 0.94921875, "learning_rate": 8.227589769655243e-07, "loss": 0.2807292342185974, "step": 2760, "token_acc": 0.9202610669693531 }, { "epoch": 0.5824894514767932, "grad_norm": 0.80078125, "learning_rate": 8.226298209465805e-07, "loss": 0.26177534461021423, "step": 2761, "token_acc": 0.9305647840531561 }, { "epoch": 0.5827004219409283, "grad_norm": 0.8046875, "learning_rate": 8.225006280321858e-07, "loss": 0.26776787638664246, "step": 2762, "token_acc": 0.9287671232876712 }, { "epoch": 0.5829113924050633, "grad_norm": 0.76171875, "learning_rate": 8.223713982371148e-07, "loss": 0.2697315514087677, "step": 2763, "token_acc": 0.9317216595883698 }, { "epoch": 0.5831223628691983, "grad_norm": 0.625, "learning_rate": 8.222421315761459e-07, "loss": 0.2476009577512741, "step": 2764, "token_acc": 0.9284880326911851 }, { "epoch": 0.5833333333333334, "grad_norm": 1.109375, "learning_rate": 8.221128280640617e-07, "loss": 0.310168981552124, "step": 2765, "token_acc": 0.9181847235538511 }, { "epoch": 0.5835443037974684, "grad_norm": 0.75390625, "learning_rate": 8.219834877156493e-07, "loss": 0.2492801398038864, "step": 2766, "token_acc": 0.9253561253561253 }, { "epoch": 0.5837552742616033, "grad_norm": 0.890625, "learning_rate": 8.218541105457e-07, "loss": 0.28547802567481995, "step": 2767, "token_acc": 0.9232415902140673 }, { "epoch": 0.5839662447257384, "grad_norm": 0.68359375, "learning_rate": 8.217246965690088e-07, "loss": 0.24955560266971588, "step": 2768, "token_acc": 0.9237458193979933 }, { "epoch": 0.5841772151898734, "grad_norm": 0.66015625, "learning_rate": 8.215952458003754e-07, "loss": 0.27297264337539673, "step": 2769, "token_acc": 0.9229847168115073 }, { "epoch": 0.5843881856540084, "grad_norm": 0.80859375, "learning_rate": 8.21465758254604e-07, "loss": 0.24587011337280273, "step": 2770, "token_acc": 0.9281472684085511 }, { "epoch": 0.5845991561181435, "grad_norm": 0.80859375, "learning_rate": 8.213362339465019e-07, "loss": 0.2849137783050537, "step": 2771, "token_acc": 0.9226420708122582 }, { "epoch": 0.5848101265822785, "grad_norm": 0.82421875, "learning_rate": 8.212066728908821e-07, "loss": 0.2952771782875061, "step": 2772, "token_acc": 0.9197303921568627 }, { "epoch": 0.5850210970464135, "grad_norm": 0.63671875, "learning_rate": 8.210770751025603e-07, "loss": 0.23787042498588562, "step": 2773, "token_acc": 0.9273936939542956 }, { "epoch": 0.5852320675105486, "grad_norm": 0.828125, "learning_rate": 8.209474405963574e-07, "loss": 0.25758111476898193, "step": 2774, "token_acc": 0.9265053128689492 }, { "epoch": 0.5854430379746836, "grad_norm": 0.66015625, "learning_rate": 8.208177693870983e-07, "loss": 0.2343241572380066, "step": 2775, "token_acc": 0.9327846364883402 }, { "epoch": 0.5856540084388185, "grad_norm": 0.8125, "learning_rate": 8.206880614896119e-07, "loss": 0.25258535146713257, "step": 2776, "token_acc": 0.9309597523219815 }, { "epoch": 0.5858649789029536, "grad_norm": 0.78515625, "learning_rate": 8.205583169187313e-07, "loss": 0.2897900938987732, "step": 2777, "token_acc": 0.9173038799207024 }, { "epoch": 0.5860759493670886, "grad_norm": 0.70703125, "learning_rate": 8.20428535689294e-07, "loss": 0.2542757987976074, "step": 2778, "token_acc": 0.9291091593475533 }, { "epoch": 0.5862869198312236, "grad_norm": 0.5390625, "learning_rate": 8.202987178161416e-07, "loss": 0.2111850082874298, "step": 2779, "token_acc": 0.9351383516810473 }, { "epoch": 0.5864978902953587, "grad_norm": 0.8046875, "learning_rate": 8.201688633141199e-07, "loss": 0.299186110496521, "step": 2780, "token_acc": 0.9141630901287554 }, { "epoch": 0.5867088607594937, "grad_norm": 0.953125, "learning_rate": 8.200389721980786e-07, "loss": 0.28198009729385376, "step": 2781, "token_acc": 0.9262023217247097 }, { "epoch": 0.5869198312236287, "grad_norm": 0.88671875, "learning_rate": 8.199090444828723e-07, "loss": 0.2553563117980957, "step": 2782, "token_acc": 0.9257318952234207 }, { "epoch": 0.5871308016877637, "grad_norm": 0.7109375, "learning_rate": 8.197790801833587e-07, "loss": 0.21473509073257446, "step": 2783, "token_acc": 0.9376337511464384 }, { "epoch": 0.5873417721518988, "grad_norm": 0.8046875, "learning_rate": 8.19649079314401e-07, "loss": 0.2005259394645691, "step": 2784, "token_acc": 0.9411402157164869 }, { "epoch": 0.5875527426160337, "grad_norm": 0.98046875, "learning_rate": 8.195190418908655e-07, "loss": 0.23495784401893616, "step": 2785, "token_acc": 0.9412708600770219 }, { "epoch": 0.5877637130801687, "grad_norm": 1.234375, "learning_rate": 8.193889679276231e-07, "loss": 0.24771223962306976, "step": 2786, "token_acc": 0.9306930693069307 }, { "epoch": 0.5879746835443038, "grad_norm": 0.58984375, "learning_rate": 8.19258857439549e-07, "loss": 0.245734304189682, "step": 2787, "token_acc": 0.9330876098667423 }, { "epoch": 0.5881856540084388, "grad_norm": 0.75, "learning_rate": 8.191287104415224e-07, "loss": 0.27598288655281067, "step": 2788, "token_acc": 0.9255742725880551 }, { "epoch": 0.5883966244725738, "grad_norm": 0.62109375, "learning_rate": 8.189985269484268e-07, "loss": 0.2514112591743469, "step": 2789, "token_acc": 0.932459390139641 }, { "epoch": 0.5886075949367089, "grad_norm": 0.7265625, "learning_rate": 8.188683069751493e-07, "loss": 0.27666395902633667, "step": 2790, "token_acc": 0.9286390906220398 }, { "epoch": 0.5888185654008439, "grad_norm": 0.78125, "learning_rate": 8.187380505365822e-07, "loss": 0.2573499381542206, "step": 2791, "token_acc": 0.9278236914600551 }, { "epoch": 0.5890295358649789, "grad_norm": 0.75, "learning_rate": 8.186077576476213e-07, "loss": 0.23622915148735046, "step": 2792, "token_acc": 0.9352045386682591 }, { "epoch": 0.589240506329114, "grad_norm": 0.703125, "learning_rate": 8.184774283231666e-07, "loss": 0.2501944601535797, "step": 2793, "token_acc": 0.9281658148779103 }, { "epoch": 0.5894514767932489, "grad_norm": 0.6328125, "learning_rate": 8.183470625781224e-07, "loss": 0.2235751450061798, "step": 2794, "token_acc": 0.9340195302190551 }, { "epoch": 0.5896624472573839, "grad_norm": 0.6875, "learning_rate": 8.182166604273972e-07, "loss": 0.24877938628196716, "step": 2795, "token_acc": 0.9226315789473685 }, { "epoch": 0.589873417721519, "grad_norm": 1.0078125, "learning_rate": 8.180862218859033e-07, "loss": 0.27491050958633423, "step": 2796, "token_acc": 0.9177910260433009 }, { "epoch": 0.590084388185654, "grad_norm": 0.76171875, "learning_rate": 8.17955746968558e-07, "loss": 0.2558938264846802, "step": 2797, "token_acc": 0.9245647969052224 }, { "epoch": 0.590295358649789, "grad_norm": 0.76171875, "learning_rate": 8.178252356902815e-07, "loss": 0.29133355617523193, "step": 2798, "token_acc": 0.9229564672721579 }, { "epoch": 0.5905063291139241, "grad_norm": 0.62890625, "learning_rate": 8.176946880659997e-07, "loss": 0.25243043899536133, "step": 2799, "token_acc": 0.926525198938992 }, { "epoch": 0.5907172995780591, "grad_norm": 0.6953125, "learning_rate": 8.17564104110641e-07, "loss": 0.2076282501220703, "step": 2800, "token_acc": 0.9396774193548387 }, { "epoch": 0.5907172995780591, "eval_loss": 0.43371495604515076, "eval_runtime": 245.7969, "eval_samples_per_second": 137.125, "eval_steps_per_second": 2.144, "eval_token_acc": 0.8991586646325047, "step": 2800 }, { "epoch": 0.5909282700421941, "grad_norm": 0.66015625, "learning_rate": 8.174334838391393e-07, "loss": 0.3168339431285858, "step": 2801, "token_acc": 0.9106666666666666 }, { "epoch": 0.5911392405063292, "grad_norm": 0.671875, "learning_rate": 8.17302827266432e-07, "loss": 0.2165396362543106, "step": 2802, "token_acc": 0.9353176874642244 }, { "epoch": 0.5913502109704641, "grad_norm": 0.58984375, "learning_rate": 8.171721344074606e-07, "loss": 0.23719705641269684, "step": 2803, "token_acc": 0.9348102877514642 }, { "epoch": 0.5915611814345991, "grad_norm": 0.69140625, "learning_rate": 8.170414052771712e-07, "loss": 0.267633855342865, "step": 2804, "token_acc": 0.9304932735426009 }, { "epoch": 0.5917721518987342, "grad_norm": 0.7109375, "learning_rate": 8.169106398905138e-07, "loss": 0.2688809037208557, "step": 2805, "token_acc": 0.9272727272727272 }, { "epoch": 0.5919831223628692, "grad_norm": 0.7265625, "learning_rate": 8.167798382624423e-07, "loss": 0.2561337947845459, "step": 2806, "token_acc": 0.9282814614343707 }, { "epoch": 0.5921940928270042, "grad_norm": 0.63671875, "learning_rate": 8.166490004079152e-07, "loss": 0.25608569383621216, "step": 2807, "token_acc": 0.9250201558720774 }, { "epoch": 0.5924050632911393, "grad_norm": 0.65625, "learning_rate": 8.165181263418946e-07, "loss": 0.2602469325065613, "step": 2808, "token_acc": 0.9254901960784314 }, { "epoch": 0.5926160337552743, "grad_norm": 0.79296875, "learning_rate": 8.163872160793474e-07, "loss": 0.2503916025161743, "step": 2809, "token_acc": 0.9252159827213823 }, { "epoch": 0.5928270042194093, "grad_norm": 0.57421875, "learning_rate": 8.162562696352442e-07, "loss": 0.1848316192626953, "step": 2810, "token_acc": 0.9460490463215259 }, { "epoch": 0.5930379746835444, "grad_norm": 0.7265625, "learning_rate": 8.161252870245599e-07, "loss": 0.2535549998283386, "step": 2811, "token_acc": 0.9260265700483091 }, { "epoch": 0.5932489451476793, "grad_norm": 0.80859375, "learning_rate": 8.159942682622731e-07, "loss": 0.26517677307128906, "step": 2812, "token_acc": 0.9287317620650954 }, { "epoch": 0.5934599156118143, "grad_norm": 0.81640625, "learning_rate": 8.158632133633674e-07, "loss": 0.2886946499347687, "step": 2813, "token_acc": 0.9204476709013915 }, { "epoch": 0.5936708860759494, "grad_norm": 0.640625, "learning_rate": 8.157321223428297e-07, "loss": 0.2592094838619232, "step": 2814, "token_acc": 0.9242755604155276 }, { "epoch": 0.5938818565400844, "grad_norm": 0.62109375, "learning_rate": 8.156009952156515e-07, "loss": 0.240816131234169, "step": 2815, "token_acc": 0.9230363732208751 }, { "epoch": 0.5940928270042194, "grad_norm": 0.6796875, "learning_rate": 8.154698319968283e-07, "loss": 0.23700900375843048, "step": 2816, "token_acc": 0.9275059665871122 }, { "epoch": 0.5943037974683544, "grad_norm": 0.7578125, "learning_rate": 8.153386327013596e-07, "loss": 0.2888278365135193, "step": 2817, "token_acc": 0.9207266721717589 }, { "epoch": 0.5945147679324895, "grad_norm": 0.640625, "learning_rate": 8.152073973442492e-07, "loss": 0.25595495104789734, "step": 2818, "token_acc": 0.928062015503876 }, { "epoch": 0.5947257383966245, "grad_norm": 0.8046875, "learning_rate": 8.150761259405051e-07, "loss": 0.22711777687072754, "step": 2819, "token_acc": 0.9298949969116739 }, { "epoch": 0.5949367088607594, "grad_norm": 1.0625, "learning_rate": 8.149448185051393e-07, "loss": 0.2565338611602783, "step": 2820, "token_acc": 0.9274907222380816 }, { "epoch": 0.5951476793248945, "grad_norm": 0.74609375, "learning_rate": 8.148134750531678e-07, "loss": 0.2613461911678314, "step": 2821, "token_acc": 0.9377554228230116 }, { "epoch": 0.5953586497890295, "grad_norm": 0.69140625, "learning_rate": 8.146820955996108e-07, "loss": 0.26925691962242126, "step": 2822, "token_acc": 0.926509186351706 }, { "epoch": 0.5955696202531645, "grad_norm": 0.71875, "learning_rate": 8.145506801594928e-07, "loss": 0.256938099861145, "step": 2823, "token_acc": 0.9338532640092432 }, { "epoch": 0.5957805907172996, "grad_norm": 0.7421875, "learning_rate": 8.144192287478421e-07, "loss": 0.23407740890979767, "step": 2824, "token_acc": 0.9348682217509845 }, { "epoch": 0.5959915611814346, "grad_norm": 0.6875, "learning_rate": 8.142877413796914e-07, "loss": 0.22536033391952515, "step": 2825, "token_acc": 0.9341120095837077 }, { "epoch": 0.5962025316455696, "grad_norm": 0.80859375, "learning_rate": 8.141562180700774e-07, "loss": 0.27405592799186707, "step": 2826, "token_acc": 0.9207175925925926 }, { "epoch": 0.5964135021097047, "grad_norm": 0.67578125, "learning_rate": 8.140246588340408e-07, "loss": 0.2534022331237793, "step": 2827, "token_acc": 0.9263485477178424 }, { "epoch": 0.5966244725738397, "grad_norm": 0.76953125, "learning_rate": 8.138930636866266e-07, "loss": 0.22176939249038696, "step": 2828, "token_acc": 0.9369255150554675 }, { "epoch": 0.5968354430379746, "grad_norm": 0.71484375, "learning_rate": 8.137614326428839e-07, "loss": 0.24852940440177917, "step": 2829, "token_acc": 0.9286105032822757 }, { "epoch": 0.5970464135021097, "grad_norm": 0.69140625, "learning_rate": 8.136297657178654e-07, "loss": 0.26572227478027344, "step": 2830, "token_acc": 0.9284530386740332 }, { "epoch": 0.5972573839662447, "grad_norm": 0.84375, "learning_rate": 8.13498062926629e-07, "loss": 0.33351415395736694, "step": 2831, "token_acc": 0.9135618479880775 }, { "epoch": 0.5974683544303797, "grad_norm": 0.7109375, "learning_rate": 8.133663242842356e-07, "loss": 0.24992308020591736, "step": 2832, "token_acc": 0.9255014326647565 }, { "epoch": 0.5976793248945148, "grad_norm": 0.69921875, "learning_rate": 8.132345498057506e-07, "loss": 0.2308383285999298, "step": 2833, "token_acc": 0.9356607039815349 }, { "epoch": 0.5978902953586498, "grad_norm": 0.8828125, "learning_rate": 8.131027395062437e-07, "loss": 0.3063763380050659, "step": 2834, "token_acc": 0.922509225092251 }, { "epoch": 0.5981012658227848, "grad_norm": 0.5390625, "learning_rate": 8.129708934007886e-07, "loss": 0.21880397200584412, "step": 2835, "token_acc": 0.9380556352165185 }, { "epoch": 0.5983122362869199, "grad_norm": 1.40625, "learning_rate": 8.128390115044628e-07, "loss": 0.2567862272262573, "step": 2836, "token_acc": 0.92561260210035 }, { "epoch": 0.5985232067510549, "grad_norm": 0.734375, "learning_rate": 8.12707093832348e-07, "loss": 0.24346551299095154, "step": 2837, "token_acc": 0.93359375 }, { "epoch": 0.5987341772151898, "grad_norm": 0.71484375, "learning_rate": 8.125751403995305e-07, "loss": 0.25235140323638916, "step": 2838, "token_acc": 0.9251465103889185 }, { "epoch": 0.5989451476793249, "grad_norm": 0.578125, "learning_rate": 8.124431512210998e-07, "loss": 0.21017123758792877, "step": 2839, "token_acc": 0.9385705847607797 }, { "epoch": 0.5991561181434599, "grad_norm": 0.6953125, "learning_rate": 8.123111263121508e-07, "loss": 0.25864100456237793, "step": 2840, "token_acc": 0.9276044732195409 }, { "epoch": 0.5993670886075949, "grad_norm": 0.74609375, "learning_rate": 8.121790656877805e-07, "loss": 0.2439347803592682, "step": 2841, "token_acc": 0.9316635745207174 }, { "epoch": 0.59957805907173, "grad_norm": 0.7421875, "learning_rate": 8.12046969363092e-07, "loss": 0.22959116101264954, "step": 2842, "token_acc": 0.9328051643192489 }, { "epoch": 0.599789029535865, "grad_norm": 0.7421875, "learning_rate": 8.119148373531917e-07, "loss": 0.2781035900115967, "step": 2843, "token_acc": 0.9259567387687188 }, { "epoch": 0.6, "grad_norm": 0.87109375, "learning_rate": 8.117826696731893e-07, "loss": 0.2499370276927948, "step": 2844, "token_acc": 0.9276846679081316 }, { "epoch": 0.6002109704641351, "grad_norm": 0.75, "learning_rate": 8.116504663382e-07, "loss": 0.2552322447299957, "step": 2845, "token_acc": 0.9324817518248175 }, { "epoch": 0.6004219409282701, "grad_norm": 0.68359375, "learning_rate": 8.11518227363342e-07, "loss": 0.26489153504371643, "step": 2846, "token_acc": 0.9245391705069125 }, { "epoch": 0.600632911392405, "grad_norm": 0.765625, "learning_rate": 8.11385952763738e-07, "loss": 0.3084232807159424, "step": 2847, "token_acc": 0.9194227300060133 }, { "epoch": 0.60084388185654, "grad_norm": 0.78125, "learning_rate": 8.112536425545148e-07, "loss": 0.26001930236816406, "step": 2848, "token_acc": 0.9261198371146016 }, { "epoch": 0.6010548523206751, "grad_norm": 0.83203125, "learning_rate": 8.11121296750803e-07, "loss": 0.27421990036964417, "step": 2849, "token_acc": 0.9236249643773154 }, { "epoch": 0.6012658227848101, "grad_norm": 0.6796875, "learning_rate": 8.109889153677378e-07, "loss": 0.2168891429901123, "step": 2850, "token_acc": 0.9404761904761905 }, { "epoch": 0.6014767932489451, "grad_norm": 1.265625, "learning_rate": 8.108564984204577e-07, "loss": 0.2261057198047638, "step": 2851, "token_acc": 0.9343873517786562 }, { "epoch": 0.6016877637130802, "grad_norm": 0.72265625, "learning_rate": 8.107240459241063e-07, "loss": 0.2726239562034607, "step": 2852, "token_acc": 0.9229781771501926 }, { "epoch": 0.6018987341772152, "grad_norm": 0.78125, "learning_rate": 8.1059155789383e-07, "loss": 0.29077231884002686, "step": 2853, "token_acc": 0.9213075060532687 }, { "epoch": 0.6021097046413502, "grad_norm": 0.71875, "learning_rate": 8.104590343447804e-07, "loss": 0.28699952363967896, "step": 2854, "token_acc": 0.9194651741293532 }, { "epoch": 0.6023206751054853, "grad_norm": 0.6640625, "learning_rate": 8.103264752921124e-07, "loss": 0.2611261010169983, "step": 2855, "token_acc": 0.9263189812007278 }, { "epoch": 0.6025316455696202, "grad_norm": 0.828125, "learning_rate": 8.101938807509855e-07, "loss": 0.2884120047092438, "step": 2856, "token_acc": 0.9230289809909629 }, { "epoch": 0.6027426160337552, "grad_norm": 0.6640625, "learning_rate": 8.100612507365627e-07, "loss": 0.2541283667087555, "step": 2857, "token_acc": 0.9266537503459729 }, { "epoch": 0.6029535864978903, "grad_norm": 0.75, "learning_rate": 8.099285852640116e-07, "loss": 0.24310517311096191, "step": 2858, "token_acc": 0.93341478313989 }, { "epoch": 0.6031645569620253, "grad_norm": 0.85546875, "learning_rate": 8.097958843485036e-07, "loss": 0.27362561225891113, "step": 2859, "token_acc": 0.9268063809821708 }, { "epoch": 0.6033755274261603, "grad_norm": 0.9453125, "learning_rate": 8.096631480052143e-07, "loss": 0.32928210496902466, "step": 2860, "token_acc": 0.9203296703296703 }, { "epoch": 0.6035864978902954, "grad_norm": 0.76953125, "learning_rate": 8.095303762493229e-07, "loss": 0.26055383682250977, "step": 2861, "token_acc": 0.9245122985581001 }, { "epoch": 0.6037974683544304, "grad_norm": 0.83984375, "learning_rate": 8.093975690960131e-07, "loss": 0.25679540634155273, "step": 2862, "token_acc": 0.9274478330658106 }, { "epoch": 0.6040084388185654, "grad_norm": 0.62890625, "learning_rate": 8.092647265604725e-07, "loss": 0.2469652146100998, "step": 2863, "token_acc": 0.9321005489742848 }, { "epoch": 0.6042194092827005, "grad_norm": 0.67578125, "learning_rate": 8.091318486578928e-07, "loss": 0.24178554117679596, "step": 2864, "token_acc": 0.9334797615312206 }, { "epoch": 0.6044303797468354, "grad_norm": 0.8359375, "learning_rate": 8.089989354034699e-07, "loss": 0.23334816098213196, "step": 2865, "token_acc": 0.9294189230301186 }, { "epoch": 0.6046413502109704, "grad_norm": 0.8984375, "learning_rate": 8.088659868124032e-07, "loss": 0.2601286768913269, "step": 2866, "token_acc": 0.9281496062992126 }, { "epoch": 0.6048523206751055, "grad_norm": 0.859375, "learning_rate": 8.08733002899897e-07, "loss": 0.27465569972991943, "step": 2867, "token_acc": 0.9268057784911717 }, { "epoch": 0.6050632911392405, "grad_norm": 0.73046875, "learning_rate": 8.085999836811586e-07, "loss": 0.2712014317512512, "step": 2868, "token_acc": 0.9239713774597496 }, { "epoch": 0.6052742616033755, "grad_norm": 0.60546875, "learning_rate": 8.084669291714002e-07, "loss": 0.24549713730812073, "step": 2869, "token_acc": 0.9293430656934306 }, { "epoch": 0.6054852320675106, "grad_norm": 0.87109375, "learning_rate": 8.083338393858375e-07, "loss": 0.2567541003227234, "step": 2870, "token_acc": 0.9278350515463918 }, { "epoch": 0.6056962025316456, "grad_norm": 0.8359375, "learning_rate": 8.082007143396905e-07, "loss": 0.2743789553642273, "step": 2871, "token_acc": 0.9221128021486124 }, { "epoch": 0.6059071729957806, "grad_norm": 1.0546875, "learning_rate": 8.080675540481833e-07, "loss": 0.2856948673725128, "step": 2872, "token_acc": 0.9199064600993861 }, { "epoch": 0.6061181434599157, "grad_norm": 0.7265625, "learning_rate": 8.079343585265439e-07, "loss": 0.24717864394187927, "step": 2873, "token_acc": 0.9277628032345013 }, { "epoch": 0.6063291139240506, "grad_norm": 0.9921875, "learning_rate": 8.078011277900041e-07, "loss": 0.2613459825515747, "step": 2874, "token_acc": 0.9289940828402367 }, { "epoch": 0.6065400843881856, "grad_norm": 0.734375, "learning_rate": 8.076678618538003e-07, "loss": 0.27766627073287964, "step": 2875, "token_acc": 0.9256432004523607 }, { "epoch": 0.6067510548523207, "grad_norm": 0.73828125, "learning_rate": 8.075345607331723e-07, "loss": 0.2768717408180237, "step": 2876, "token_acc": 0.9257455873402313 }, { "epoch": 0.6069620253164557, "grad_norm": 0.61328125, "learning_rate": 8.074012244433645e-07, "loss": 0.3454355001449585, "step": 2877, "token_acc": 0.9022056869519001 }, { "epoch": 0.6071729957805907, "grad_norm": 0.72265625, "learning_rate": 8.072678529996246e-07, "loss": 0.2939087152481079, "step": 2878, "token_acc": 0.9205620877545168 }, { "epoch": 0.6073839662447258, "grad_norm": 0.83203125, "learning_rate": 8.071344464172053e-07, "loss": 0.24933354556560516, "step": 2879, "token_acc": 0.9326952763198518 }, { "epoch": 0.6075949367088608, "grad_norm": 0.84765625, "learning_rate": 8.070010047113623e-07, "loss": 0.29458165168762207, "step": 2880, "token_acc": 0.9198669488962806 }, { "epoch": 0.6078059071729958, "grad_norm": 0.83203125, "learning_rate": 8.06867527897356e-07, "loss": 0.24690523743629456, "step": 2881, "token_acc": 0.928168130489335 }, { "epoch": 0.6080168776371307, "grad_norm": 0.7421875, "learning_rate": 8.067340159904505e-07, "loss": 0.24057328701019287, "step": 2882, "token_acc": 0.9318757921419518 }, { "epoch": 0.6082278481012658, "grad_norm": 0.73828125, "learning_rate": 8.066004690059142e-07, "loss": 0.2868915796279907, "step": 2883, "token_acc": 0.9187682505972923 }, { "epoch": 0.6084388185654008, "grad_norm": 0.6328125, "learning_rate": 8.064668869590195e-07, "loss": 0.25868356227874756, "step": 2884, "token_acc": 0.927484333034915 }, { "epoch": 0.6086497890295358, "grad_norm": 1.125, "learning_rate": 8.063332698650419e-07, "loss": 0.2431940734386444, "step": 2885, "token_acc": 0.9295824486907289 }, { "epoch": 0.6088607594936709, "grad_norm": 7.6875, "learning_rate": 8.061996177392623e-07, "loss": 0.2566065788269043, "step": 2886, "token_acc": 0.9323776877531942 }, { "epoch": 0.6090717299578059, "grad_norm": 0.60546875, "learning_rate": 8.060659305969649e-07, "loss": 0.2511722445487976, "step": 2887, "token_acc": 0.9285915890488287 }, { "epoch": 0.6092827004219409, "grad_norm": 0.546875, "learning_rate": 8.059322084534374e-07, "loss": 0.21672701835632324, "step": 2888, "token_acc": 0.9350443599493029 }, { "epoch": 0.609493670886076, "grad_norm": 0.66015625, "learning_rate": 8.057984513239728e-07, "loss": 0.25521618127822876, "step": 2889, "token_acc": 0.9278320874065555 }, { "epoch": 0.609704641350211, "grad_norm": 0.828125, "learning_rate": 8.056646592238671e-07, "loss": 0.2511005103588104, "step": 2890, "token_acc": 0.9312573443008225 }, { "epoch": 0.609915611814346, "grad_norm": 0.59765625, "learning_rate": 8.055308321684202e-07, "loss": 0.23858973383903503, "step": 2891, "token_acc": 0.928532460447354 }, { "epoch": 0.610126582278481, "grad_norm": 0.7109375, "learning_rate": 8.053969701729369e-07, "loss": 0.27097588777542114, "step": 2892, "token_acc": 0.9269886363636364 }, { "epoch": 0.610337552742616, "grad_norm": 0.71484375, "learning_rate": 8.05263073252725e-07, "loss": 0.2726849913597107, "step": 2893, "token_acc": 0.9237162362718908 }, { "epoch": 0.610548523206751, "grad_norm": 0.62890625, "learning_rate": 8.051291414230972e-07, "loss": 0.23818132281303406, "step": 2894, "token_acc": 0.9291611185086551 }, { "epoch": 0.6107594936708861, "grad_norm": 0.7109375, "learning_rate": 8.049951746993693e-07, "loss": 0.2583930790424347, "step": 2895, "token_acc": 0.9315027157513579 }, { "epoch": 0.6109704641350211, "grad_norm": 0.71484375, "learning_rate": 8.048611730968617e-07, "loss": 0.29945671558380127, "step": 2896, "token_acc": 0.9181071124964579 }, { "epoch": 0.6111814345991561, "grad_norm": 0.63671875, "learning_rate": 8.047271366308988e-07, "loss": 0.2513945698738098, "step": 2897, "token_acc": 0.9339108116778954 }, { "epoch": 0.6113924050632912, "grad_norm": 0.953125, "learning_rate": 8.045930653168085e-07, "loss": 0.27540773153305054, "step": 2898, "token_acc": 0.9186206896551724 }, { "epoch": 0.6116033755274262, "grad_norm": 0.76171875, "learning_rate": 8.044589591699233e-07, "loss": 0.25019872188568115, "step": 2899, "token_acc": 0.9214876033057852 }, { "epoch": 0.6118143459915611, "grad_norm": 0.70703125, "learning_rate": 8.043248182055789e-07, "loss": 0.26117125153541565, "step": 2900, "token_acc": 0.9298136645962732 }, { "epoch": 0.6120253164556962, "grad_norm": 0.80078125, "learning_rate": 8.041906424391161e-07, "loss": 0.294888973236084, "step": 2901, "token_acc": 0.919057686499547 }, { "epoch": 0.6122362869198312, "grad_norm": 0.79296875, "learning_rate": 8.040564318858784e-07, "loss": 0.288154274225235, "step": 2902, "token_acc": 0.9245022123893806 }, { "epoch": 0.6124472573839662, "grad_norm": 0.76953125, "learning_rate": 8.039221865612142e-07, "loss": 0.25859761238098145, "step": 2903, "token_acc": 0.9306698002350177 }, { "epoch": 0.6126582278481013, "grad_norm": 0.66015625, "learning_rate": 8.037879064804757e-07, "loss": 0.21650764346122742, "step": 2904, "token_acc": 0.9365029715358149 }, { "epoch": 0.6128691983122363, "grad_norm": 0.83203125, "learning_rate": 8.036535916590188e-07, "loss": 0.24210359156131744, "step": 2905, "token_acc": 0.9332953249714937 }, { "epoch": 0.6130801687763713, "grad_norm": 0.62890625, "learning_rate": 8.035192421122036e-07, "loss": 0.22828635573387146, "step": 2906, "token_acc": 0.9371936274509803 }, { "epoch": 0.6132911392405064, "grad_norm": 0.83203125, "learning_rate": 8.033848578553942e-07, "loss": 0.2999606430530548, "step": 2907, "token_acc": 0.9195064629847238 }, { "epoch": 0.6135021097046414, "grad_norm": 0.73828125, "learning_rate": 8.032504389039585e-07, "loss": 0.2634912431240082, "step": 2908, "token_acc": 0.9264793783622236 }, { "epoch": 0.6137130801687763, "grad_norm": 0.9140625, "learning_rate": 8.031159852732683e-07, "loss": 0.25779393315315247, "step": 2909, "token_acc": 0.9278204035237283 }, { "epoch": 0.6139240506329114, "grad_norm": 0.6484375, "learning_rate": 8.029814969786996e-07, "loss": 0.2512008547782898, "step": 2910, "token_acc": 0.9298881837413115 }, { "epoch": 0.6141350210970464, "grad_norm": 1.15625, "learning_rate": 8.028469740356326e-07, "loss": 0.24632331728935242, "step": 2911, "token_acc": 0.932068206820682 }, { "epoch": 0.6143459915611814, "grad_norm": 0.81640625, "learning_rate": 8.027124164594504e-07, "loss": 0.24754971265792847, "step": 2912, "token_acc": 0.9322731011790513 }, { "epoch": 0.6145569620253165, "grad_norm": 0.80078125, "learning_rate": 8.025778242655414e-07, "loss": 0.26508378982543945, "step": 2913, "token_acc": 0.9249329758713136 }, { "epoch": 0.6147679324894515, "grad_norm": 0.91796875, "learning_rate": 8.024431974692974e-07, "loss": 0.26550257205963135, "step": 2914, "token_acc": 0.9277905638665133 }, { "epoch": 0.6149789029535865, "grad_norm": 0.8671875, "learning_rate": 8.023085360861137e-07, "loss": 0.2582736015319824, "step": 2915, "token_acc": 0.9345001325908248 }, { "epoch": 0.6151898734177215, "grad_norm": 0.89453125, "learning_rate": 8.021738401313903e-07, "loss": 0.2738432288169861, "step": 2916, "token_acc": 0.9222222222222223 }, { "epoch": 0.6154008438818566, "grad_norm": 0.60546875, "learning_rate": 8.020391096205305e-07, "loss": 0.2440716177225113, "step": 2917, "token_acc": 0.9380748007357449 }, { "epoch": 0.6156118143459915, "grad_norm": 0.9140625, "learning_rate": 8.019043445689423e-07, "loss": 0.3113381862640381, "step": 2918, "token_acc": 0.9194048357098574 }, { "epoch": 0.6158227848101265, "grad_norm": 0.83984375, "learning_rate": 8.017695449920369e-07, "loss": 0.2257789969444275, "step": 2919, "token_acc": 0.9369565217391305 }, { "epoch": 0.6160337552742616, "grad_norm": 0.6953125, "learning_rate": 8.016347109052297e-07, "loss": 0.261264830827713, "step": 2920, "token_acc": 0.926440177252585 }, { "epoch": 0.6162447257383966, "grad_norm": 0.71875, "learning_rate": 8.014998423239406e-07, "loss": 0.2310861349105835, "step": 2921, "token_acc": 0.9353083434099153 }, { "epoch": 0.6164556962025316, "grad_norm": 0.71875, "learning_rate": 8.013649392635926e-07, "loss": 0.22576406598091125, "step": 2922, "token_acc": 0.9366448542534206 }, { "epoch": 0.6166666666666667, "grad_norm": 0.74609375, "learning_rate": 8.01230001739613e-07, "loss": 0.2691919505596161, "step": 2923, "token_acc": 0.927993301702484 }, { "epoch": 0.6168776371308017, "grad_norm": 0.578125, "learning_rate": 8.01095029767433e-07, "loss": 0.19360384345054626, "step": 2924, "token_acc": 0.9447159437996335 }, { "epoch": 0.6170886075949367, "grad_norm": 0.7265625, "learning_rate": 8.00960023362488e-07, "loss": 0.2845591604709625, "step": 2925, "token_acc": 0.9193345888261143 }, { "epoch": 0.6172995780590718, "grad_norm": 0.65625, "learning_rate": 8.008249825402171e-07, "loss": 0.2730807662010193, "step": 2926, "token_acc": 0.923326452373931 }, { "epoch": 0.6175105485232067, "grad_norm": 1.4375, "learning_rate": 8.006899073160632e-07, "loss": 0.26884108781814575, "step": 2927, "token_acc": 0.928904748833379 }, { "epoch": 0.6177215189873417, "grad_norm": 0.69140625, "learning_rate": 8.005547977054735e-07, "loss": 0.23920544981956482, "step": 2928, "token_acc": 0.9273231622746186 }, { "epoch": 0.6179324894514768, "grad_norm": 0.69921875, "learning_rate": 8.004196537238986e-07, "loss": 0.22443151473999023, "step": 2929, "token_acc": 0.9263542300669507 }, { "epoch": 0.6181434599156118, "grad_norm": 0.76953125, "learning_rate": 8.002844753867937e-07, "loss": 0.3260098993778229, "step": 2930, "token_acc": 0.9174484052532833 }, { "epoch": 0.6183544303797468, "grad_norm": 1.1015625, "learning_rate": 8.001492627096174e-07, "loss": 0.29421353340148926, "step": 2931, "token_acc": 0.9204864359214219 }, { "epoch": 0.6185654008438819, "grad_norm": 0.78515625, "learning_rate": 8.000140157078325e-07, "loss": 0.27302688360214233, "step": 2932, "token_acc": 0.9217061057551855 }, { "epoch": 0.6187763713080169, "grad_norm": 0.74609375, "learning_rate": 7.998787343969056e-07, "loss": 0.313492089509964, "step": 2933, "token_acc": 0.9253812636165577 }, { "epoch": 0.6189873417721519, "grad_norm": 0.89453125, "learning_rate": 7.997434187923072e-07, "loss": 0.27433139085769653, "step": 2934, "token_acc": 0.9291693496458467 }, { "epoch": 0.619198312236287, "grad_norm": 0.8671875, "learning_rate": 7.99608068909512e-07, "loss": 0.26900798082351685, "step": 2935, "token_acc": 0.9285083848190644 }, { "epoch": 0.619409282700422, "grad_norm": 0.76953125, "learning_rate": 7.994726847639981e-07, "loss": 0.2807154059410095, "step": 2936, "token_acc": 0.9188879335053024 }, { "epoch": 0.6196202531645569, "grad_norm": 0.65625, "learning_rate": 7.993372663712481e-07, "loss": 0.26509344577789307, "step": 2937, "token_acc": 0.9280864197530864 }, { "epoch": 0.619831223628692, "grad_norm": 0.65625, "learning_rate": 7.99201813746748e-07, "loss": 0.25157222151756287, "step": 2938, "token_acc": 0.9253026276941246 }, { "epoch": 0.620042194092827, "grad_norm": 0.68359375, "learning_rate": 7.990663269059882e-07, "loss": 0.27258867025375366, "step": 2939, "token_acc": 0.931076923076923 }, { "epoch": 0.620253164556962, "grad_norm": 0.62109375, "learning_rate": 7.989308058644626e-07, "loss": 0.23578310012817383, "step": 2940, "token_acc": 0.9328804347826087 }, { "epoch": 0.6204641350210971, "grad_norm": 0.66796875, "learning_rate": 7.987952506376692e-07, "loss": 0.24724677205085754, "step": 2941, "token_acc": 0.9330616996507567 }, { "epoch": 0.6206751054852321, "grad_norm": 0.7890625, "learning_rate": 7.986596612411098e-07, "loss": 0.23196649551391602, "step": 2942, "token_acc": 0.9348424594677271 }, { "epoch": 0.6208860759493671, "grad_norm": 0.625, "learning_rate": 7.985240376902906e-07, "loss": 0.21331241726875305, "step": 2943, "token_acc": 0.9378073474110501 }, { "epoch": 0.6210970464135022, "grad_norm": 0.69140625, "learning_rate": 7.983883800007208e-07, "loss": 0.2383914589881897, "step": 2944, "token_acc": 0.934235368156073 }, { "epoch": 0.6213080168776371, "grad_norm": 0.6796875, "learning_rate": 7.982526881879141e-07, "loss": 0.25219425559043884, "step": 2945, "token_acc": 0.9294973126778375 }, { "epoch": 0.6215189873417721, "grad_norm": 0.94140625, "learning_rate": 7.981169622673882e-07, "loss": 0.22276616096496582, "step": 2946, "token_acc": 0.9324121357765095 }, { "epoch": 0.6217299578059071, "grad_norm": 0.76953125, "learning_rate": 7.979812022546646e-07, "loss": 0.2541449964046478, "step": 2947, "token_acc": 0.9346384591095674 }, { "epoch": 0.6219409282700422, "grad_norm": 0.671875, "learning_rate": 7.978454081652683e-07, "loss": 0.2584494352340698, "step": 2948, "token_acc": 0.9306930693069307 }, { "epoch": 0.6221518987341772, "grad_norm": 1.078125, "learning_rate": 7.977095800147287e-07, "loss": 0.24379205703735352, "step": 2949, "token_acc": 0.9315268065268065 }, { "epoch": 0.6223628691983122, "grad_norm": 0.83203125, "learning_rate": 7.975737178185786e-07, "loss": 0.2582053542137146, "step": 2950, "token_acc": 0.9302593659942363 }, { "epoch": 0.6225738396624473, "grad_norm": 0.68359375, "learning_rate": 7.974378215923554e-07, "loss": 0.2912464141845703, "step": 2951, "token_acc": 0.9203062960862167 }, { "epoch": 0.6227848101265823, "grad_norm": 0.65234375, "learning_rate": 7.973018913515998e-07, "loss": 0.23205941915512085, "step": 2952, "token_acc": 0.930931744312026 }, { "epoch": 0.6229957805907173, "grad_norm": 0.69921875, "learning_rate": 7.971659271118566e-07, "loss": 0.26967668533325195, "step": 2953, "token_acc": 0.9291287386215865 }, { "epoch": 0.6232067510548523, "grad_norm": 0.7265625, "learning_rate": 7.970299288886743e-07, "loss": 0.30414605140686035, "step": 2954, "token_acc": 0.9235102282834272 }, { "epoch": 0.6234177215189873, "grad_norm": 0.6953125, "learning_rate": 7.968938966976058e-07, "loss": 0.23268762230873108, "step": 2955, "token_acc": 0.936819830713422 }, { "epoch": 0.6236286919831223, "grad_norm": 0.6953125, "learning_rate": 7.967578305542072e-07, "loss": 0.26854023337364197, "step": 2956, "token_acc": 0.9218106995884774 }, { "epoch": 0.6238396624472574, "grad_norm": 0.69921875, "learning_rate": 7.96621730474039e-07, "loss": 0.24191662669181824, "step": 2957, "token_acc": 0.9300518134715026 }, { "epoch": 0.6240506329113924, "grad_norm": 0.6640625, "learning_rate": 7.964855964726653e-07, "loss": 0.24472805857658386, "step": 2958, "token_acc": 0.9301605181638974 }, { "epoch": 0.6242616033755274, "grad_norm": 0.6328125, "learning_rate": 7.963494285656543e-07, "loss": 0.24828363955020905, "step": 2959, "token_acc": 0.9276206322795341 }, { "epoch": 0.6244725738396625, "grad_norm": 0.75390625, "learning_rate": 7.962132267685777e-07, "loss": 0.23892956972122192, "step": 2960, "token_acc": 0.9384513567174056 }, { "epoch": 0.6246835443037975, "grad_norm": 0.76953125, "learning_rate": 7.960769910970116e-07, "loss": 0.2803322374820709, "step": 2961, "token_acc": 0.9246009906439185 }, { "epoch": 0.6248945147679325, "grad_norm": 1.8984375, "learning_rate": 7.959407215665354e-07, "loss": 0.25415313243865967, "step": 2962, "token_acc": 0.9258143991240076 }, { "epoch": 0.6251054852320675, "grad_norm": 0.953125, "learning_rate": 7.958044181927331e-07, "loss": 0.252442866563797, "step": 2963, "token_acc": 0.9255874673629243 }, { "epoch": 0.6253164556962025, "grad_norm": 0.578125, "learning_rate": 7.956680809911917e-07, "loss": 0.21444612741470337, "step": 2964, "token_acc": 0.937809576224546 }, { "epoch": 0.6255274261603375, "grad_norm": 0.765625, "learning_rate": 7.95531709977503e-07, "loss": 0.2460983395576477, "step": 2965, "token_acc": 0.9302689180737961 }, { "epoch": 0.6257383966244726, "grad_norm": 0.8515625, "learning_rate": 7.953953051672617e-07, "loss": 0.2522473931312561, "step": 2966, "token_acc": 0.9277745155607751 }, { "epoch": 0.6259493670886076, "grad_norm": 0.62890625, "learning_rate": 7.952588665760671e-07, "loss": 0.2588465213775635, "step": 2967, "token_acc": 0.930835734870317 }, { "epoch": 0.6261603375527426, "grad_norm": 0.96875, "learning_rate": 7.951223942195221e-07, "loss": 0.27054277062416077, "step": 2968, "token_acc": 0.9243295019157088 }, { "epoch": 0.6263713080168777, "grad_norm": 0.703125, "learning_rate": 7.949858881132334e-07, "loss": 0.27497026324272156, "step": 2969, "token_acc": 0.924022346368715 }, { "epoch": 0.6265822784810127, "grad_norm": 0.76171875, "learning_rate": 7.948493482728116e-07, "loss": 0.27661532163619995, "step": 2970, "token_acc": 0.922992600712524 }, { "epoch": 0.6267932489451477, "grad_norm": 0.625, "learning_rate": 7.947127747138713e-07, "loss": 0.21763741970062256, "step": 2971, "token_acc": 0.9394031668696712 }, { "epoch": 0.6270042194092827, "grad_norm": 0.875, "learning_rate": 7.945761674520308e-07, "loss": 0.25178205966949463, "step": 2972, "token_acc": 0.9346582354712436 }, { "epoch": 0.6272151898734177, "grad_norm": 0.5625, "learning_rate": 7.944395265029122e-07, "loss": 0.21728010475635529, "step": 2973, "token_acc": 0.9347759829320329 }, { "epoch": 0.6274261603375527, "grad_norm": 0.77734375, "learning_rate": 7.943028518821418e-07, "loss": 0.3015125095844269, "step": 2974, "token_acc": 0.9165280088740987 }, { "epoch": 0.6276371308016878, "grad_norm": 0.90625, "learning_rate": 7.941661436053491e-07, "loss": 0.25528305768966675, "step": 2975, "token_acc": 0.9292230261088392 }, { "epoch": 0.6278481012658228, "grad_norm": 0.71484375, "learning_rate": 7.940294016881681e-07, "loss": 0.2644398808479309, "step": 2976, "token_acc": 0.928629579375848 }, { "epoch": 0.6280590717299578, "grad_norm": 0.77734375, "learning_rate": 7.938926261462365e-07, "loss": 0.2605530619621277, "step": 2977, "token_acc": 0.9233157404532754 }, { "epoch": 0.6282700421940929, "grad_norm": 1.0703125, "learning_rate": 7.937558169951957e-07, "loss": 0.28005075454711914, "step": 2978, "token_acc": 0.924408014571949 }, { "epoch": 0.6284810126582279, "grad_norm": 0.69921875, "learning_rate": 7.936189742506906e-07, "loss": 0.30343836545944214, "step": 2979, "token_acc": 0.9247437774524158 }, { "epoch": 0.6286919831223629, "grad_norm": 0.8125, "learning_rate": 7.934820979283708e-07, "loss": 0.25176873803138733, "step": 2980, "token_acc": 0.9264622373651334 }, { "epoch": 0.6289029535864978, "grad_norm": 0.83203125, "learning_rate": 7.93345188043889e-07, "loss": 0.28678590059280396, "step": 2981, "token_acc": 0.9224393132030787 }, { "epoch": 0.6291139240506329, "grad_norm": 0.640625, "learning_rate": 7.932082446129023e-07, "loss": 0.2724168002605438, "step": 2982, "token_acc": 0.9255561893896178 }, { "epoch": 0.6293248945147679, "grad_norm": 1.0078125, "learning_rate": 7.93071267651071e-07, "loss": 0.3193906545639038, "step": 2983, "token_acc": 0.91180086047941 }, { "epoch": 0.6295358649789029, "grad_norm": 0.90625, "learning_rate": 7.929342571740597e-07, "loss": 0.23172667622566223, "step": 2984, "token_acc": 0.9312537855844942 }, { "epoch": 0.629746835443038, "grad_norm": 0.875, "learning_rate": 7.927972131975367e-07, "loss": 0.2763270139694214, "step": 2985, "token_acc": 0.9168717528028438 }, { "epoch": 0.629957805907173, "grad_norm": 0.625, "learning_rate": 7.926601357371741e-07, "loss": 0.25127995014190674, "step": 2986, "token_acc": 0.9257846875916691 }, { "epoch": 0.630168776371308, "grad_norm": 0.59765625, "learning_rate": 7.925230248086481e-07, "loss": 0.2867969274520874, "step": 2987, "token_acc": 0.9224674589700057 }, { "epoch": 0.6303797468354431, "grad_norm": 0.60546875, "learning_rate": 7.923858804276382e-07, "loss": 0.23399406671524048, "step": 2988, "token_acc": 0.9348066298342541 }, { "epoch": 0.630590717299578, "grad_norm": 0.59765625, "learning_rate": 7.922487026098281e-07, "loss": 0.23724466562271118, "step": 2989, "token_acc": 0.9362715902322811 }, { "epoch": 0.630801687763713, "grad_norm": 0.82421875, "learning_rate": 7.921114913709055e-07, "loss": 0.230056032538414, "step": 2990, "token_acc": 0.9330911292764154 }, { "epoch": 0.6310126582278481, "grad_norm": 0.9609375, "learning_rate": 7.919742467265613e-07, "loss": 0.26069891452789307, "step": 2991, "token_acc": 0.9220674879485806 }, { "epoch": 0.6312236286919831, "grad_norm": 0.78515625, "learning_rate": 7.918369686924907e-07, "loss": 0.25934264063835144, "step": 2992, "token_acc": 0.922696890438826 }, { "epoch": 0.6314345991561181, "grad_norm": 0.7109375, "learning_rate": 7.916996572843926e-07, "loss": 0.28514841198921204, "step": 2993, "token_acc": 0.9227855477855478 }, { "epoch": 0.6316455696202532, "grad_norm": 0.6953125, "learning_rate": 7.915623125179699e-07, "loss": 0.2062632292509079, "step": 2994, "token_acc": 0.9397000306091216 }, { "epoch": 0.6318565400843882, "grad_norm": 0.69140625, "learning_rate": 7.914249344089289e-07, "loss": 0.27586013078689575, "step": 2995, "token_acc": 0.922019719151479 }, { "epoch": 0.6320675105485232, "grad_norm": 0.77734375, "learning_rate": 7.912875229729801e-07, "loss": 0.2746962904930115, "step": 2996, "token_acc": 0.9218166139695315 }, { "epoch": 0.6322784810126583, "grad_norm": 0.8046875, "learning_rate": 7.911500782258375e-07, "loss": 0.24974144995212555, "step": 2997, "token_acc": 0.9322493224932249 }, { "epoch": 0.6324894514767933, "grad_norm": 0.94921875, "learning_rate": 7.910126001832194e-07, "loss": 0.22028183937072754, "step": 2998, "token_acc": 0.9373426573426573 }, { "epoch": 0.6327004219409282, "grad_norm": 0.68359375, "learning_rate": 7.908750888608472e-07, "loss": 0.2803043723106384, "step": 2999, "token_acc": 0.924993443482822 }, { "epoch": 0.6329113924050633, "grad_norm": 1.078125, "learning_rate": 7.907375442744467e-07, "loss": 0.2976187467575073, "step": 3000, "token_acc": 0.9169163715608826 }, { "epoch": 0.6329113924050633, "eval_loss": 0.4337725341320038, "eval_runtime": 245.5976, "eval_samples_per_second": 137.237, "eval_steps_per_second": 2.146, "eval_token_acc": 0.899099210266535, "step": 3000 }, { "epoch": 0.6331223628691983, "grad_norm": 1.171875, "learning_rate": 7.905999664397471e-07, "loss": 0.25155290961265564, "step": 3001, "token_acc": 0.9299847792998478 }, { "epoch": 0.6333333333333333, "grad_norm": 0.7578125, "learning_rate": 7.904623553724818e-07, "loss": 0.2618260383605957, "step": 3002, "token_acc": 0.9296285110238599 }, { "epoch": 0.6335443037974684, "grad_norm": 0.70703125, "learning_rate": 7.903247110883877e-07, "loss": 0.2524300217628479, "step": 3003, "token_acc": 0.9275118947663028 }, { "epoch": 0.6337552742616034, "grad_norm": 0.80859375, "learning_rate": 7.901870336032057e-07, "loss": 0.26307445764541626, "step": 3004, "token_acc": 0.9297404063205418 }, { "epoch": 0.6339662447257384, "grad_norm": 0.70703125, "learning_rate": 7.900493229326802e-07, "loss": 0.3230569660663605, "step": 3005, "token_acc": 0.9201006605850897 }, { "epoch": 0.6341772151898735, "grad_norm": 1.0390625, "learning_rate": 7.899115790925598e-07, "loss": 0.3005709648132324, "step": 3006, "token_acc": 0.923180957533135 }, { "epoch": 0.6343881856540085, "grad_norm": 0.68359375, "learning_rate": 7.897738020985963e-07, "loss": 0.2491353154182434, "step": 3007, "token_acc": 0.9247626004382761 }, { "epoch": 0.6345991561181434, "grad_norm": 0.68359375, "learning_rate": 7.89635991966546e-07, "loss": 0.2842658758163452, "step": 3008, "token_acc": 0.9228238109482501 }, { "epoch": 0.6348101265822785, "grad_norm": 0.74609375, "learning_rate": 7.894981487121687e-07, "loss": 0.23799820244312286, "step": 3009, "token_acc": 0.9323912441119424 }, { "epoch": 0.6350210970464135, "grad_norm": 0.92578125, "learning_rate": 7.893602723512276e-07, "loss": 0.3062479794025421, "step": 3010, "token_acc": 0.9176538572666859 }, { "epoch": 0.6352320675105485, "grad_norm": 0.8359375, "learning_rate": 7.892223628994905e-07, "loss": 0.2699859142303467, "step": 3011, "token_acc": 0.9329062710721511 }, { "epoch": 0.6354430379746835, "grad_norm": 0.765625, "learning_rate": 7.89084420372728e-07, "loss": 0.2560504972934723, "step": 3012, "token_acc": 0.9287943262411348 }, { "epoch": 0.6356540084388186, "grad_norm": 0.77734375, "learning_rate": 7.889464447867153e-07, "loss": 0.298359751701355, "step": 3013, "token_acc": 0.9212765957446809 }, { "epoch": 0.6358649789029536, "grad_norm": 0.609375, "learning_rate": 7.888084361572309e-07, "loss": 0.24123089015483856, "step": 3014, "token_acc": 0.9314481576692374 }, { "epoch": 0.6360759493670886, "grad_norm": 0.6484375, "learning_rate": 7.886703945000575e-07, "loss": 0.22440260648727417, "step": 3015, "token_acc": 0.936334024281907 }, { "epoch": 0.6362869198312237, "grad_norm": 0.74609375, "learning_rate": 7.885323198309812e-07, "loss": 0.23181332647800446, "step": 3016, "token_acc": 0.9333973743195645 }, { "epoch": 0.6364978902953586, "grad_norm": 0.765625, "learning_rate": 7.883942121657921e-07, "loss": 0.2938428521156311, "step": 3017, "token_acc": 0.9230561555075594 }, { "epoch": 0.6367088607594936, "grad_norm": 1.109375, "learning_rate": 7.882560715202838e-07, "loss": 0.3309740126132965, "step": 3018, "token_acc": 0.914218258132214 }, { "epoch": 0.6369198312236287, "grad_norm": 0.76953125, "learning_rate": 7.881178979102538e-07, "loss": 0.30414843559265137, "step": 3019, "token_acc": 0.9177027827116637 }, { "epoch": 0.6371308016877637, "grad_norm": 0.90625, "learning_rate": 7.879796913515038e-07, "loss": 0.2283640205860138, "step": 3020, "token_acc": 0.9349835967790039 }, { "epoch": 0.6373417721518987, "grad_norm": 1.0859375, "learning_rate": 7.878414518598384e-07, "loss": 0.24317654967308044, "step": 3021, "token_acc": 0.9288663067640521 }, { "epoch": 0.6375527426160338, "grad_norm": 0.69140625, "learning_rate": 7.877031794510669e-07, "loss": 0.26068800687789917, "step": 3022, "token_acc": 0.9291653834308593 }, { "epoch": 0.6377637130801688, "grad_norm": 0.70703125, "learning_rate": 7.875648741410017e-07, "loss": 0.24604400992393494, "step": 3023, "token_acc": 0.9321126760563381 }, { "epoch": 0.6379746835443038, "grad_norm": 0.9375, "learning_rate": 7.874265359454593e-07, "loss": 0.25130170583724976, "step": 3024, "token_acc": 0.9305515861674765 }, { "epoch": 0.6381856540084389, "grad_norm": 0.703125, "learning_rate": 7.872881648802596e-07, "loss": 0.27274632453918457, "step": 3025, "token_acc": 0.9272727272727272 }, { "epoch": 0.6383966244725738, "grad_norm": 0.640625, "learning_rate": 7.871497609612268e-07, "loss": 0.22998088598251343, "step": 3026, "token_acc": 0.934956934956935 }, { "epoch": 0.6386075949367088, "grad_norm": 0.70703125, "learning_rate": 7.870113242041882e-07, "loss": 0.2632472813129425, "step": 3027, "token_acc": 0.9281565270559462 }, { "epoch": 0.6388185654008439, "grad_norm": 0.6796875, "learning_rate": 7.868728546249757e-07, "loss": 0.25073331594467163, "step": 3028, "token_acc": 0.9269570011025359 }, { "epoch": 0.6390295358649789, "grad_norm": 0.8046875, "learning_rate": 7.867343522394238e-07, "loss": 0.277784138917923, "step": 3029, "token_acc": 0.9242649242649242 }, { "epoch": 0.6392405063291139, "grad_norm": 0.5703125, "learning_rate": 7.865958170633722e-07, "loss": 0.21380919218063354, "step": 3030, "token_acc": 0.9405013591060103 }, { "epoch": 0.639451476793249, "grad_norm": 0.74609375, "learning_rate": 7.86457249112663e-07, "loss": 0.25967007875442505, "step": 3031, "token_acc": 0.9249568717653824 }, { "epoch": 0.639662447257384, "grad_norm": 0.96875, "learning_rate": 7.86318648403143e-07, "loss": 0.2457112967967987, "step": 3032, "token_acc": 0.9339454976303317 }, { "epoch": 0.639873417721519, "grad_norm": 0.625, "learning_rate": 7.861800149506621e-07, "loss": 0.22581438720226288, "step": 3033, "token_acc": 0.931454196028187 }, { "epoch": 0.640084388185654, "grad_norm": 0.61328125, "learning_rate": 7.860413487710742e-07, "loss": 0.24152715504169464, "step": 3034, "token_acc": 0.9340866290018832 }, { "epoch": 0.640295358649789, "grad_norm": 0.765625, "learning_rate": 7.859026498802371e-07, "loss": 0.23373782634735107, "step": 3035, "token_acc": 0.9316692667706709 }, { "epoch": 0.640506329113924, "grad_norm": 0.75, "learning_rate": 7.857639182940122e-07, "loss": 0.26505571603775024, "step": 3036, "token_acc": 0.9318323952470294 }, { "epoch": 0.6407172995780591, "grad_norm": 0.890625, "learning_rate": 7.856251540282643e-07, "loss": 0.3071167469024658, "step": 3037, "token_acc": 0.9146666666666666 }, { "epoch": 0.6409282700421941, "grad_norm": 1.1015625, "learning_rate": 7.854863570988628e-07, "loss": 0.25089165568351746, "step": 3038, "token_acc": 0.9252130877096508 }, { "epoch": 0.6411392405063291, "grad_norm": 0.78125, "learning_rate": 7.853475275216799e-07, "loss": 0.2748779058456421, "step": 3039, "token_acc": 0.9183131819522264 }, { "epoch": 0.6413502109704642, "grad_norm": 0.7109375, "learning_rate": 7.85208665312592e-07, "loss": 0.24846871197223663, "step": 3040, "token_acc": 0.9344936708860759 }, { "epoch": 0.6415611814345992, "grad_norm": 0.69140625, "learning_rate": 7.850697704874794e-07, "loss": 0.267638236284256, "step": 3041, "token_acc": 0.933028048082427 }, { "epoch": 0.6417721518987342, "grad_norm": 0.70703125, "learning_rate": 7.849308430622255e-07, "loss": 0.2612975239753723, "step": 3042, "token_acc": 0.9269924374636417 }, { "epoch": 0.6419831223628693, "grad_norm": 0.85546875, "learning_rate": 7.847918830527182e-07, "loss": 0.25324520468711853, "step": 3043, "token_acc": 0.932306705165223 }, { "epoch": 0.6421940928270042, "grad_norm": 0.82421875, "learning_rate": 7.846528904748488e-07, "loss": 0.33454716205596924, "step": 3044, "token_acc": 0.9150058616647128 }, { "epoch": 0.6424050632911392, "grad_norm": 0.78515625, "learning_rate": 7.845138653445118e-07, "loss": 0.28602325916290283, "step": 3045, "token_acc": 0.9296066252587992 }, { "epoch": 0.6426160337552742, "grad_norm": 0.8125, "learning_rate": 7.843748076776062e-07, "loss": 0.32718777656555176, "step": 3046, "token_acc": 0.9150912106135987 }, { "epoch": 0.6428270042194093, "grad_norm": 0.765625, "learning_rate": 7.842357174900347e-07, "loss": 0.27191758155822754, "step": 3047, "token_acc": 0.9263689526847422 }, { "epoch": 0.6430379746835443, "grad_norm": 0.7578125, "learning_rate": 7.84096594797703e-07, "loss": 0.24455609917640686, "step": 3048, "token_acc": 0.9323216995447648 }, { "epoch": 0.6432489451476793, "grad_norm": 0.87109375, "learning_rate": 7.83957439616521e-07, "loss": 0.29937100410461426, "step": 3049, "token_acc": 0.9179954441913439 }, { "epoch": 0.6434599156118144, "grad_norm": 0.67578125, "learning_rate": 7.838182519624025e-07, "loss": 0.21008306741714478, "step": 3050, "token_acc": 0.9374827871109888 }, { "epoch": 0.6436708860759494, "grad_norm": 0.765625, "learning_rate": 7.836790318512645e-07, "loss": 0.2814878821372986, "step": 3051, "token_acc": 0.9192145862552594 }, { "epoch": 0.6438818565400843, "grad_norm": 0.81640625, "learning_rate": 7.835397792990283e-07, "loss": 0.2927316427230835, "step": 3052, "token_acc": 0.9172693086491177 }, { "epoch": 0.6440928270042194, "grad_norm": 0.85546875, "learning_rate": 7.834004943216186e-07, "loss": 0.23839804530143738, "step": 3053, "token_acc": 0.9297544260422616 }, { "epoch": 0.6443037974683544, "grad_norm": 0.6640625, "learning_rate": 7.832611769349635e-07, "loss": 0.22734403610229492, "step": 3054, "token_acc": 0.9331713244228432 }, { "epoch": 0.6445147679324894, "grad_norm": 0.66796875, "learning_rate": 7.831218271549954e-07, "loss": 0.24664682149887085, "step": 3055, "token_acc": 0.9306748466257668 }, { "epoch": 0.6447257383966245, "grad_norm": 0.67578125, "learning_rate": 7.8298244499765e-07, "loss": 0.242615208029747, "step": 3056, "token_acc": 0.9309275444037215 }, { "epoch": 0.6449367088607595, "grad_norm": 0.55859375, "learning_rate": 7.828430304788669e-07, "loss": 0.20772212743759155, "step": 3057, "token_acc": 0.9379869118105328 }, { "epoch": 0.6451476793248945, "grad_norm": 0.84375, "learning_rate": 7.827035836145893e-07, "loss": 0.26444724202156067, "step": 3058, "token_acc": 0.9260188087774295 }, { "epoch": 0.6453586497890296, "grad_norm": 0.73828125, "learning_rate": 7.82564104420764e-07, "loss": 0.24940194189548492, "step": 3059, "token_acc": 0.9297265510144075 }, { "epoch": 0.6455696202531646, "grad_norm": 0.73828125, "learning_rate": 7.824245929133419e-07, "loss": 0.3330056667327881, "step": 3060, "token_acc": 0.9122247972190035 }, { "epoch": 0.6457805907172995, "grad_norm": 0.6796875, "learning_rate": 7.822850491082773e-07, "loss": 0.27055835723876953, "step": 3061, "token_acc": 0.926775956284153 }, { "epoch": 0.6459915611814346, "grad_norm": 0.625, "learning_rate": 7.82145473021528e-07, "loss": 0.26316219568252563, "step": 3062, "token_acc": 0.924924924924925 }, { "epoch": 0.6462025316455696, "grad_norm": 0.7578125, "learning_rate": 7.820058646690557e-07, "loss": 0.32788801193237305, "step": 3063, "token_acc": 0.9166666666666666 }, { "epoch": 0.6464135021097046, "grad_norm": 0.8125, "learning_rate": 7.818662240668259e-07, "loss": 0.29409509897232056, "step": 3064, "token_acc": 0.9208809135399674 }, { "epoch": 0.6466244725738397, "grad_norm": 0.9765625, "learning_rate": 7.817265512308077e-07, "loss": 0.27486085891723633, "step": 3065, "token_acc": 0.9250295159386068 }, { "epoch": 0.6468354430379747, "grad_norm": 0.6875, "learning_rate": 7.815868461769739e-07, "loss": 0.22071248292922974, "step": 3066, "token_acc": 0.9410557184750733 }, { "epoch": 0.6470464135021097, "grad_norm": 0.62109375, "learning_rate": 7.81447108921301e-07, "loss": 0.23185127973556519, "step": 3067, "token_acc": 0.9368115942028985 }, { "epoch": 0.6472573839662448, "grad_norm": 0.75, "learning_rate": 7.813073394797689e-07, "loss": 0.25843554735183716, "step": 3068, "token_acc": 0.9287245444801715 }, { "epoch": 0.6474683544303798, "grad_norm": 0.90234375, "learning_rate": 7.811675378683617e-07, "loss": 0.32872653007507324, "step": 3069, "token_acc": 0.9192018318613019 }, { "epoch": 0.6476793248945147, "grad_norm": 0.88671875, "learning_rate": 7.810277041030666e-07, "loss": 0.2778664231300354, "step": 3070, "token_acc": 0.9241071428571429 }, { "epoch": 0.6478902953586498, "grad_norm": 0.8671875, "learning_rate": 7.808878381998749e-07, "loss": 0.28107506036758423, "step": 3071, "token_acc": 0.9221802142407057 }, { "epoch": 0.6481012658227848, "grad_norm": 0.79296875, "learning_rate": 7.807479401747816e-07, "loss": 0.2725943922996521, "step": 3072, "token_acc": 0.9324280633688975 }, { "epoch": 0.6483122362869198, "grad_norm": 1.078125, "learning_rate": 7.806080100437851e-07, "loss": 0.2105576992034912, "step": 3073, "token_acc": 0.9428494041170098 }, { "epoch": 0.6485232067510549, "grad_norm": 0.58203125, "learning_rate": 7.804680478228876e-07, "loss": 0.21921232342720032, "step": 3074, "token_acc": 0.9386331938633193 }, { "epoch": 0.6487341772151899, "grad_norm": 0.73828125, "learning_rate": 7.80328053528095e-07, "loss": 0.21979068219661713, "step": 3075, "token_acc": 0.9364469405852793 }, { "epoch": 0.6489451476793249, "grad_norm": 0.62890625, "learning_rate": 7.801880271754166e-07, "loss": 0.2546820640563965, "step": 3076, "token_acc": 0.9253443526170799 }, { "epoch": 0.64915611814346, "grad_norm": 0.8984375, "learning_rate": 7.800479687808661e-07, "loss": 0.2715860605239868, "step": 3077, "token_acc": 0.9192629815745393 }, { "epoch": 0.649367088607595, "grad_norm": 0.98046875, "learning_rate": 7.799078783604599e-07, "loss": 0.27856212854385376, "step": 3078, "token_acc": 0.9242243436754176 }, { "epoch": 0.6495780590717299, "grad_norm": 0.9609375, "learning_rate": 7.797677559302188e-07, "loss": 0.2882682979106903, "step": 3079, "token_acc": 0.9254372506903958 }, { "epoch": 0.6497890295358649, "grad_norm": 0.8984375, "learning_rate": 7.796276015061666e-07, "loss": 0.2638348937034607, "step": 3080, "token_acc": 0.922069825436409 }, { "epoch": 0.65, "grad_norm": 0.82421875, "learning_rate": 7.794874151043318e-07, "loss": 0.2952510416507721, "step": 3081, "token_acc": 0.9261926192619262 }, { "epoch": 0.650210970464135, "grad_norm": 0.70703125, "learning_rate": 7.793471967407453e-07, "loss": 0.26452693343162537, "step": 3082, "token_acc": 0.9287925696594427 }, { "epoch": 0.65042194092827, "grad_norm": 0.64453125, "learning_rate": 7.792069464314426e-07, "loss": 0.2371031641960144, "step": 3083, "token_acc": 0.9286802804023163 }, { "epoch": 0.6506329113924051, "grad_norm": 0.77734375, "learning_rate": 7.790666641924623e-07, "loss": 0.3190723955631256, "step": 3084, "token_acc": 0.915893271461717 }, { "epoch": 0.6508438818565401, "grad_norm": 0.66015625, "learning_rate": 7.789263500398468e-07, "loss": 0.25250110030174255, "step": 3085, "token_acc": 0.9265940902021773 }, { "epoch": 0.6510548523206751, "grad_norm": 0.828125, "learning_rate": 7.787860039896426e-07, "loss": 0.2537957727909088, "step": 3086, "token_acc": 0.9322747893713546 }, { "epoch": 0.6512658227848102, "grad_norm": 0.77734375, "learning_rate": 7.786456260578991e-07, "loss": 0.2983219623565674, "step": 3087, "token_acc": 0.919376878928669 }, { "epoch": 0.6514767932489451, "grad_norm": 0.7109375, "learning_rate": 7.785052162606697e-07, "loss": 0.20377962291240692, "step": 3088, "token_acc": 0.9394107837687604 }, { "epoch": 0.6516877637130801, "grad_norm": 0.609375, "learning_rate": 7.783647746140118e-07, "loss": 0.22960101068019867, "step": 3089, "token_acc": 0.9318573893041978 }, { "epoch": 0.6518987341772152, "grad_norm": 0.84765625, "learning_rate": 7.782243011339858e-07, "loss": 0.2973095774650574, "step": 3090, "token_acc": 0.9217912330495112 }, { "epoch": 0.6521097046413502, "grad_norm": 0.671875, "learning_rate": 7.780837958366562e-07, "loss": 0.2449365258216858, "step": 3091, "token_acc": 0.9279442930669088 }, { "epoch": 0.6523206751054852, "grad_norm": 0.75, "learning_rate": 7.779432587380906e-07, "loss": 0.24572135508060455, "step": 3092, "token_acc": 0.927710843373494 }, { "epoch": 0.6525316455696203, "grad_norm": 0.71875, "learning_rate": 7.778026898543611e-07, "loss": 0.24269519746303558, "step": 3093, "token_acc": 0.9325554923164485 }, { "epoch": 0.6527426160337553, "grad_norm": 0.953125, "learning_rate": 7.776620892015426e-07, "loss": 0.25180885195732117, "step": 3094, "token_acc": 0.9318713450292397 }, { "epoch": 0.6529535864978903, "grad_norm": 0.76171875, "learning_rate": 7.775214567957141e-07, "loss": 0.28244489431381226, "step": 3095, "token_acc": 0.9211594202898551 }, { "epoch": 0.6531645569620254, "grad_norm": 0.71484375, "learning_rate": 7.773807926529581e-07, "loss": 0.26457691192626953, "step": 3096, "token_acc": 0.9237733644859814 }, { "epoch": 0.6533755274261603, "grad_norm": 0.734375, "learning_rate": 7.772400967893606e-07, "loss": 0.2756320536136627, "step": 3097, "token_acc": 0.9213057417662489 }, { "epoch": 0.6535864978902953, "grad_norm": 0.72265625, "learning_rate": 7.770993692210117e-07, "loss": 0.25129541754722595, "step": 3098, "token_acc": 0.930911256700417 }, { "epoch": 0.6537974683544304, "grad_norm": 0.98828125, "learning_rate": 7.769586099640045e-07, "loss": 0.27121663093566895, "step": 3099, "token_acc": 0.922074074074074 }, { "epoch": 0.6540084388185654, "grad_norm": 0.7578125, "learning_rate": 7.768178190344361e-07, "loss": 0.2529832720756531, "step": 3100, "token_acc": 0.9274744027303754 }, { "epoch": 0.6542194092827004, "grad_norm": 0.71875, "learning_rate": 7.766769964484071e-07, "loss": 0.2826771140098572, "step": 3101, "token_acc": 0.9231197771587744 }, { "epoch": 0.6544303797468355, "grad_norm": 0.78515625, "learning_rate": 7.765361422220218e-07, "loss": 0.29431024193763733, "step": 3102, "token_acc": 0.9194117647058824 }, { "epoch": 0.6546413502109705, "grad_norm": 0.671875, "learning_rate": 7.76395256371388e-07, "loss": 0.2853385806083679, "step": 3103, "token_acc": 0.9229147847034026 }, { "epoch": 0.6548523206751055, "grad_norm": 0.6796875, "learning_rate": 7.762543389126174e-07, "loss": 0.22982831299304962, "step": 3104, "token_acc": 0.9427129712080736 }, { "epoch": 0.6550632911392406, "grad_norm": 0.67578125, "learning_rate": 7.761133898618248e-07, "loss": 0.30975648760795593, "step": 3105, "token_acc": 0.9158653846153846 }, { "epoch": 0.6552742616033755, "grad_norm": 0.69921875, "learning_rate": 7.759724092351292e-07, "loss": 0.225993812084198, "step": 3106, "token_acc": 0.9345386533665836 }, { "epoch": 0.6554852320675105, "grad_norm": 0.75, "learning_rate": 7.758313970486526e-07, "loss": 0.25341343879699707, "step": 3107, "token_acc": 0.9296899968040908 }, { "epoch": 0.6556962025316456, "grad_norm": 1.0234375, "learning_rate": 7.756903533185213e-07, "loss": 0.26343226432800293, "step": 3108, "token_acc": 0.9296171171171171 }, { "epoch": 0.6559071729957806, "grad_norm": 0.7890625, "learning_rate": 7.755492780608646e-07, "loss": 0.2927061915397644, "step": 3109, "token_acc": 0.9187632615944226 }, { "epoch": 0.6561181434599156, "grad_norm": 0.79296875, "learning_rate": 7.754081712918157e-07, "loss": 0.2502285838127136, "step": 3110, "token_acc": 0.9318857822724569 }, { "epoch": 0.6563291139240506, "grad_norm": 0.79296875, "learning_rate": 7.752670330275113e-07, "loss": 0.2888762354850769, "step": 3111, "token_acc": 0.92436433859028 }, { "epoch": 0.6565400843881857, "grad_norm": 0.71484375, "learning_rate": 7.75125863284092e-07, "loss": 0.2523823082447052, "step": 3112, "token_acc": 0.932392710170488 }, { "epoch": 0.6567510548523207, "grad_norm": 0.69140625, "learning_rate": 7.749846620777016e-07, "loss": 0.20616665482521057, "step": 3113, "token_acc": 0.9421255699754472 }, { "epoch": 0.6569620253164556, "grad_norm": 0.6796875, "learning_rate": 7.748434294244875e-07, "loss": 0.25555679202079773, "step": 3114, "token_acc": 0.9315183833847881 }, { "epoch": 0.6571729957805907, "grad_norm": 0.59765625, "learning_rate": 7.747021653406009e-07, "loss": 0.23890173435211182, "step": 3115, "token_acc": 0.9364128885086969 }, { "epoch": 0.6573839662447257, "grad_norm": 0.67578125, "learning_rate": 7.745608698421969e-07, "loss": 0.24340923130512238, "step": 3116, "token_acc": 0.930379746835443 }, { "epoch": 0.6575949367088607, "grad_norm": 0.8046875, "learning_rate": 7.744195429454334e-07, "loss": 0.23826062679290771, "step": 3117, "token_acc": 0.9356948228882834 }, { "epoch": 0.6578059071729958, "grad_norm": 0.8828125, "learning_rate": 7.742781846664725e-07, "loss": 0.22302325069904327, "step": 3118, "token_acc": 0.9344894026974951 }, { "epoch": 0.6580168776371308, "grad_norm": 0.71484375, "learning_rate": 7.741367950214799e-07, "loss": 0.29790037870407104, "step": 3119, "token_acc": 0.9187582562747688 }, { "epoch": 0.6582278481012658, "grad_norm": 0.609375, "learning_rate": 7.739953740266243e-07, "loss": 0.23226679861545563, "step": 3120, "token_acc": 0.9324166179052077 }, { "epoch": 0.6584388185654009, "grad_norm": 1.09375, "learning_rate": 7.738539216980787e-07, "loss": 0.25283533334732056, "step": 3121, "token_acc": 0.9305389221556887 }, { "epoch": 0.6586497890295359, "grad_norm": 0.67578125, "learning_rate": 7.737124380520193e-07, "loss": 0.2053796947002411, "step": 3122, "token_acc": 0.9363001745200699 }, { "epoch": 0.6588607594936708, "grad_norm": 0.69921875, "learning_rate": 7.735709231046258e-07, "loss": 0.2517194151878357, "step": 3123, "token_acc": 0.937144567494762 }, { "epoch": 0.6590717299578059, "grad_norm": 0.8828125, "learning_rate": 7.734293768720819e-07, "loss": 0.23894651234149933, "step": 3124, "token_acc": 0.9386443944747832 }, { "epoch": 0.6592827004219409, "grad_norm": 0.62109375, "learning_rate": 7.732877993705744e-07, "loss": 0.24319568276405334, "step": 3125, "token_acc": 0.9246107620868615 }, { "epoch": 0.6594936708860759, "grad_norm": 0.79296875, "learning_rate": 7.73146190616294e-07, "loss": 0.26895684003829956, "step": 3126, "token_acc": 0.9297520661157025 }, { "epoch": 0.659704641350211, "grad_norm": 0.8046875, "learning_rate": 7.730045506254346e-07, "loss": 0.24235601723194122, "step": 3127, "token_acc": 0.9352678571428571 }, { "epoch": 0.659915611814346, "grad_norm": 0.94140625, "learning_rate": 7.728628794141944e-07, "loss": 0.2073080837726593, "step": 3128, "token_acc": 0.9392014519056261 }, { "epoch": 0.660126582278481, "grad_norm": 0.6953125, "learning_rate": 7.727211769987742e-07, "loss": 0.25979846715927124, "step": 3129, "token_acc": 0.9233004067402673 }, { "epoch": 0.6603375527426161, "grad_norm": 0.7734375, "learning_rate": 7.725794433953794e-07, "loss": 0.2912573516368866, "step": 3130, "token_acc": 0.9209395588656546 }, { "epoch": 0.6605485232067511, "grad_norm": 0.59765625, "learning_rate": 7.724376786202181e-07, "loss": 0.23400036990642548, "step": 3131, "token_acc": 0.9316846986089644 }, { "epoch": 0.660759493670886, "grad_norm": 0.69140625, "learning_rate": 7.722958826895021e-07, "loss": 0.2757851779460907, "step": 3132, "token_acc": 0.9274143302180685 }, { "epoch": 0.6609704641350211, "grad_norm": 0.66015625, "learning_rate": 7.721540556194475e-07, "loss": 0.24161680042743683, "step": 3133, "token_acc": 0.9329232571763327 }, { "epoch": 0.6611814345991561, "grad_norm": 0.640625, "learning_rate": 7.72012197426273e-07, "loss": 0.2144400030374527, "step": 3134, "token_acc": 0.9371106496588549 }, { "epoch": 0.6613924050632911, "grad_norm": 0.8359375, "learning_rate": 7.718703081262015e-07, "loss": 0.27659112215042114, "step": 3135, "token_acc": 0.925089179548157 }, { "epoch": 0.6616033755274262, "grad_norm": 0.78515625, "learning_rate": 7.717283877354591e-07, "loss": 0.3199487328529358, "step": 3136, "token_acc": 0.9124149659863946 }, { "epoch": 0.6618143459915612, "grad_norm": 0.71484375, "learning_rate": 7.715864362702758e-07, "loss": 0.2855612635612488, "step": 3137, "token_acc": 0.9222946544980444 }, { "epoch": 0.6620253164556962, "grad_norm": 0.89453125, "learning_rate": 7.714444537468847e-07, "loss": 0.28211283683776855, "step": 3138, "token_acc": 0.9264541387024608 }, { "epoch": 0.6622362869198313, "grad_norm": 0.796875, "learning_rate": 7.713024401815229e-07, "loss": 0.26146605610847473, "step": 3139, "token_acc": 0.924908424908425 }, { "epoch": 0.6624472573839663, "grad_norm": 0.94140625, "learning_rate": 7.711603955904308e-07, "loss": 0.24192358553409576, "step": 3140, "token_acc": 0.9351383516810473 }, { "epoch": 0.6626582278481012, "grad_norm": 0.69140625, "learning_rate": 7.710183199898522e-07, "loss": 0.2484784871339798, "step": 3141, "token_acc": 0.9318936877076412 }, { "epoch": 0.6628691983122363, "grad_norm": 0.828125, "learning_rate": 7.708762133960351e-07, "loss": 0.26737135648727417, "step": 3142, "token_acc": 0.9215922798552473 }, { "epoch": 0.6630801687763713, "grad_norm": 0.8125, "learning_rate": 7.707340758252301e-07, "loss": 0.2874600291252136, "step": 3143, "token_acc": 0.9173780487804878 }, { "epoch": 0.6632911392405063, "grad_norm": 0.796875, "learning_rate": 7.705919072936922e-07, "loss": 0.3105255961418152, "step": 3144, "token_acc": 0.9209792376820576 }, { "epoch": 0.6635021097046413, "grad_norm": 0.65625, "learning_rate": 7.704497078176793e-07, "loss": 0.2512602210044861, "step": 3145, "token_acc": 0.9300085009917823 }, { "epoch": 0.6637130801687764, "grad_norm": 0.97265625, "learning_rate": 7.703074774134533e-07, "loss": 0.30895164608955383, "step": 3146, "token_acc": 0.9127379209370424 }, { "epoch": 0.6639240506329114, "grad_norm": 0.71875, "learning_rate": 7.701652160972794e-07, "loss": 0.2553439438343048, "step": 3147, "token_acc": 0.927317523868186 }, { "epoch": 0.6641350210970464, "grad_norm": 0.765625, "learning_rate": 7.700229238854264e-07, "loss": 0.28922706842422485, "step": 3148, "token_acc": 0.9223676383105295 }, { "epoch": 0.6643459915611815, "grad_norm": 0.89453125, "learning_rate": 7.698806007941665e-07, "loss": 0.2813107371330261, "step": 3149, "token_acc": 0.9206204379562044 }, { "epoch": 0.6645569620253164, "grad_norm": 0.7109375, "learning_rate": 7.697382468397757e-07, "loss": 0.22922693192958832, "step": 3150, "token_acc": 0.9329159212880143 }, { "epoch": 0.6647679324894514, "grad_norm": 0.7109375, "learning_rate": 7.695958620385335e-07, "loss": 0.26751708984375, "step": 3151, "token_acc": 0.9225768321513003 }, { "epoch": 0.6649789029535865, "grad_norm": 0.65625, "learning_rate": 7.694534464067225e-07, "loss": 0.22117497026920319, "step": 3152, "token_acc": 0.9382972411747256 }, { "epoch": 0.6651898734177215, "grad_norm": 0.6796875, "learning_rate": 7.693109999606292e-07, "loss": 0.24099797010421753, "step": 3153, "token_acc": 0.935771632471008 }, { "epoch": 0.6654008438818565, "grad_norm": 0.86328125, "learning_rate": 7.691685227165436e-07, "loss": 0.25662070512771606, "step": 3154, "token_acc": 0.9281592613964224 }, { "epoch": 0.6656118143459916, "grad_norm": 0.7734375, "learning_rate": 7.690260146907594e-07, "loss": 0.23539036512374878, "step": 3155, "token_acc": 0.9282223579718998 }, { "epoch": 0.6658227848101266, "grad_norm": 0.62890625, "learning_rate": 7.688834758995733e-07, "loss": 0.24052363634109497, "step": 3156, "token_acc": 0.9332542272322752 }, { "epoch": 0.6660337552742616, "grad_norm": 0.7421875, "learning_rate": 7.687409063592857e-07, "loss": 0.22782741487026215, "step": 3157, "token_acc": 0.935761399483797 }, { "epoch": 0.6662447257383967, "grad_norm": 0.75, "learning_rate": 7.685983060862011e-07, "loss": 0.25636208057403564, "step": 3158, "token_acc": 0.9298299845440494 }, { "epoch": 0.6664556962025316, "grad_norm": 0.69921875, "learning_rate": 7.684556750966269e-07, "loss": 0.2227511703968048, "step": 3159, "token_acc": 0.9361763022323984 }, { "epoch": 0.6666666666666666, "grad_norm": 0.84375, "learning_rate": 7.68313013406874e-07, "loss": 0.2606823444366455, "step": 3160, "token_acc": 0.9255706252067483 }, { "epoch": 0.6668776371308017, "grad_norm": 0.78515625, "learning_rate": 7.681703210332569e-07, "loss": 0.293193519115448, "step": 3161, "token_acc": 0.9159318048206937 }, { "epoch": 0.6670886075949367, "grad_norm": 0.90234375, "learning_rate": 7.680275979920938e-07, "loss": 0.34923017024993896, "step": 3162, "token_acc": 0.903337169159954 }, { "epoch": 0.6672995780590717, "grad_norm": 0.69921875, "learning_rate": 7.678848442997064e-07, "loss": 0.22675806283950806, "step": 3163, "token_acc": 0.9358088658606794 }, { "epoch": 0.6675105485232068, "grad_norm": 0.8046875, "learning_rate": 7.677420599724198e-07, "loss": 0.25041741132736206, "step": 3164, "token_acc": 0.923265306122449 }, { "epoch": 0.6677215189873418, "grad_norm": 0.96484375, "learning_rate": 7.675992450265623e-07, "loss": 0.27693238854408264, "step": 3165, "token_acc": 0.9242325277596343 }, { "epoch": 0.6679324894514768, "grad_norm": 0.68359375, "learning_rate": 7.674563994784662e-07, "loss": 0.2447778284549713, "step": 3166, "token_acc": 0.9301297169811321 }, { "epoch": 0.6681434599156119, "grad_norm": 0.65625, "learning_rate": 7.673135233444672e-07, "loss": 0.2642607092857361, "step": 3167, "token_acc": 0.9263803680981595 }, { "epoch": 0.6683544303797468, "grad_norm": 0.625, "learning_rate": 7.671706166409042e-07, "loss": 0.2153715193271637, "step": 3168, "token_acc": 0.938475665748393 }, { "epoch": 0.6685654008438818, "grad_norm": 0.765625, "learning_rate": 7.670276793841199e-07, "loss": 0.30745744705200195, "step": 3169, "token_acc": 0.9186941792423776 }, { "epoch": 0.6687763713080169, "grad_norm": 0.7421875, "learning_rate": 7.668847115904602e-07, "loss": 0.2834430932998657, "step": 3170, "token_acc": 0.9242424242424242 }, { "epoch": 0.6689873417721519, "grad_norm": 0.6953125, "learning_rate": 7.667417132762751e-07, "loss": 0.2888275384902954, "step": 3171, "token_acc": 0.9191862107940096 }, { "epoch": 0.6691983122362869, "grad_norm": 0.6953125, "learning_rate": 7.665986844579173e-07, "loss": 0.2672565281391144, "step": 3172, "token_acc": 0.9227563212003335 }, { "epoch": 0.669409282700422, "grad_norm": 0.6640625, "learning_rate": 7.664556251517435e-07, "loss": 0.23572754859924316, "step": 3173, "token_acc": 0.9287709497206704 }, { "epoch": 0.669620253164557, "grad_norm": 0.7734375, "learning_rate": 7.663125353741135e-07, "loss": 0.25297826528549194, "step": 3174, "token_acc": 0.9297544260422616 }, { "epoch": 0.669831223628692, "grad_norm": 0.75, "learning_rate": 7.661694151413912e-07, "loss": 0.25871604681015015, "step": 3175, "token_acc": 0.9286495352651722 }, { "epoch": 0.6700421940928271, "grad_norm": 0.9375, "learning_rate": 7.660262644699436e-07, "loss": 0.28908419609069824, "step": 3176, "token_acc": 0.9220933690157598 }, { "epoch": 0.670253164556962, "grad_norm": 0.67578125, "learning_rate": 7.658830833761407e-07, "loss": 0.20533034205436707, "step": 3177, "token_acc": 0.9360516280434145 }, { "epoch": 0.670464135021097, "grad_norm": 1.6640625, "learning_rate": 7.657398718763569e-07, "loss": 0.26181352138519287, "step": 3178, "token_acc": 0.9317851959361393 }, { "epoch": 0.670675105485232, "grad_norm": 0.765625, "learning_rate": 7.655966299869696e-07, "loss": 0.2584003508090973, "step": 3179, "token_acc": 0.9225922592259226 }, { "epoch": 0.6708860759493671, "grad_norm": 0.83984375, "learning_rate": 7.654533577243597e-07, "loss": 0.246573805809021, "step": 3180, "token_acc": 0.9345168800931315 }, { "epoch": 0.6710970464135021, "grad_norm": 0.67578125, "learning_rate": 7.653100551049115e-07, "loss": 0.24330395460128784, "step": 3181, "token_acc": 0.9350497366881217 }, { "epoch": 0.6713080168776371, "grad_norm": 0.87890625, "learning_rate": 7.65166722145013e-07, "loss": 0.3328629732131958, "step": 3182, "token_acc": 0.9151607963246554 }, { "epoch": 0.6715189873417722, "grad_norm": 0.90234375, "learning_rate": 7.650233588610554e-07, "loss": 0.30630165338516235, "step": 3183, "token_acc": 0.9208549971114962 }, { "epoch": 0.6717299578059072, "grad_norm": 0.65234375, "learning_rate": 7.648799652694337e-07, "loss": 0.23914943635463715, "step": 3184, "token_acc": 0.9356072680905324 }, { "epoch": 0.6719409282700421, "grad_norm": 0.7421875, "learning_rate": 7.647365413865459e-07, "loss": 0.26040494441986084, "step": 3185, "token_acc": 0.9299173553719008 }, { "epoch": 0.6721518987341772, "grad_norm": 0.68359375, "learning_rate": 7.64593087228794e-07, "loss": 0.23518428206443787, "step": 3186, "token_acc": 0.936328473668551 }, { "epoch": 0.6723628691983122, "grad_norm": 0.64453125, "learning_rate": 7.644496028125828e-07, "loss": 0.2647557854652405, "step": 3187, "token_acc": 0.9272577545978589 }, { "epoch": 0.6725738396624472, "grad_norm": 0.64453125, "learning_rate": 7.643060881543216e-07, "loss": 0.2475782334804535, "step": 3188, "token_acc": 0.928305133352452 }, { "epoch": 0.6727848101265823, "grad_norm": 0.7734375, "learning_rate": 7.641625432704219e-07, "loss": 0.25520020723342896, "step": 3189, "token_acc": 0.9258297258297258 }, { "epoch": 0.6729957805907173, "grad_norm": 0.65625, "learning_rate": 7.640189681772996e-07, "loss": 0.24445882439613342, "step": 3190, "token_acc": 0.9292807703882034 }, { "epoch": 0.6732067510548523, "grad_norm": 0.671875, "learning_rate": 7.638753628913737e-07, "loss": 0.21792951226234436, "step": 3191, "token_acc": 0.9311385459533608 }, { "epoch": 0.6734177215189874, "grad_norm": 0.6953125, "learning_rate": 7.637317274290666e-07, "loss": 0.23328039050102234, "step": 3192, "token_acc": 0.9373848987108656 }, { "epoch": 0.6736286919831224, "grad_norm": 0.8203125, "learning_rate": 7.635880618068043e-07, "loss": 0.278873085975647, "step": 3193, "token_acc": 0.9272943980929678 }, { "epoch": 0.6738396624472573, "grad_norm": 0.59765625, "learning_rate": 7.634443660410162e-07, "loss": 0.21613413095474243, "step": 3194, "token_acc": 0.9387044967880086 }, { "epoch": 0.6740506329113924, "grad_norm": 0.74609375, "learning_rate": 7.633006401481348e-07, "loss": 0.25570034980773926, "step": 3195, "token_acc": 0.9299363057324841 }, { "epoch": 0.6742616033755274, "grad_norm": 0.80078125, "learning_rate": 7.63156884144597e-07, "loss": 0.2741197943687439, "step": 3196, "token_acc": 0.922756981580511 }, { "epoch": 0.6744725738396624, "grad_norm": 0.62890625, "learning_rate": 7.630130980468418e-07, "loss": 0.21763503551483154, "step": 3197, "token_acc": 0.9334818586887333 }, { "epoch": 0.6746835443037975, "grad_norm": 1.21875, "learning_rate": 7.628692818713129e-07, "loss": 0.21089525520801544, "step": 3198, "token_acc": 0.9399449035812673 }, { "epoch": 0.6748945147679325, "grad_norm": 0.7421875, "learning_rate": 7.627254356344568e-07, "loss": 0.26615262031555176, "step": 3199, "token_acc": 0.9312024353120244 }, { "epoch": 0.6751054852320675, "grad_norm": 0.85546875, "learning_rate": 7.625815593527232e-07, "loss": 0.2559445798397064, "step": 3200, "token_acc": 0.9226788432267884 }, { "epoch": 0.6751054852320675, "eval_loss": 0.43363630771636963, "eval_runtime": 245.5289, "eval_samples_per_second": 137.275, "eval_steps_per_second": 2.146, "eval_token_acc": 0.899120524095845, "step": 3200 }, { "epoch": 0.6753164556962026, "grad_norm": 0.6796875, "learning_rate": 7.62437653042566e-07, "loss": 0.22704821825027466, "step": 3201, "token_acc": 0.9374205844980941 }, { "epoch": 0.6755274261603376, "grad_norm": 0.66015625, "learning_rate": 7.622937167204417e-07, "loss": 0.23736219108104706, "step": 3202, "token_acc": 0.9349683108294295 }, { "epoch": 0.6757383966244725, "grad_norm": 0.73046875, "learning_rate": 7.621497504028112e-07, "loss": 0.23948010802268982, "step": 3203, "token_acc": 0.9320053120849934 }, { "epoch": 0.6759493670886076, "grad_norm": 0.70703125, "learning_rate": 7.620057541061378e-07, "loss": 0.23244325816631317, "step": 3204, "token_acc": 0.9356634704608723 }, { "epoch": 0.6761603375527426, "grad_norm": 0.6640625, "learning_rate": 7.618617278468887e-07, "loss": 0.260410338640213, "step": 3205, "token_acc": 0.926048898279505 }, { "epoch": 0.6763713080168776, "grad_norm": 0.67578125, "learning_rate": 7.617176716415348e-07, "loss": 0.2899959087371826, "step": 3206, "token_acc": 0.9259259259259259 }, { "epoch": 0.6765822784810127, "grad_norm": 0.73046875, "learning_rate": 7.615735855065497e-07, "loss": 0.23023764789104462, "step": 3207, "token_acc": 0.9306075439101641 }, { "epoch": 0.6767932489451477, "grad_norm": 0.76171875, "learning_rate": 7.614294694584116e-07, "loss": 0.26817238330841064, "step": 3208, "token_acc": 0.9285305889079474 }, { "epoch": 0.6770042194092827, "grad_norm": 0.82421875, "learning_rate": 7.612853235136006e-07, "loss": 0.30022916197776794, "step": 3209, "token_acc": 0.9253981559094719 }, { "epoch": 0.6772151898734177, "grad_norm": 0.7109375, "learning_rate": 7.611411476886013e-07, "loss": 0.24914616346359253, "step": 3210, "token_acc": 0.930921052631579 }, { "epoch": 0.6774261603375528, "grad_norm": 0.69140625, "learning_rate": 7.609969419999019e-07, "loss": 0.23310689628124237, "step": 3211, "token_acc": 0.9343463302752294 }, { "epoch": 0.6776371308016877, "grad_norm": 0.83203125, "learning_rate": 7.608527064639926e-07, "loss": 0.336066335439682, "step": 3212, "token_acc": 0.9126533054142986 }, { "epoch": 0.6778481012658227, "grad_norm": 0.78515625, "learning_rate": 7.607084410973688e-07, "loss": 0.26848989725112915, "step": 3213, "token_acc": 0.927437641723356 }, { "epoch": 0.6780590717299578, "grad_norm": 0.6484375, "learning_rate": 7.605641459165281e-07, "loss": 0.2375301867723465, "step": 3214, "token_acc": 0.9318181818181818 }, { "epoch": 0.6782700421940928, "grad_norm": 0.828125, "learning_rate": 7.60419820937972e-07, "loss": 0.2517338991165161, "step": 3215, "token_acc": 0.9278860133759814 }, { "epoch": 0.6784810126582278, "grad_norm": 0.75, "learning_rate": 7.60275466178205e-07, "loss": 0.3106905519962311, "step": 3216, "token_acc": 0.9157119476268413 }, { "epoch": 0.6786919831223629, "grad_norm": 0.59375, "learning_rate": 7.601310816537355e-07, "loss": 0.2285967618227005, "step": 3217, "token_acc": 0.9337641357027464 }, { "epoch": 0.6789029535864979, "grad_norm": 0.6171875, "learning_rate": 7.599866673810753e-07, "loss": 0.2275627851486206, "step": 3218, "token_acc": 0.9368780210406596 }, { "epoch": 0.6791139240506329, "grad_norm": 0.78515625, "learning_rate": 7.598422233767389e-07, "loss": 0.2256106436252594, "step": 3219, "token_acc": 0.932919621749409 }, { "epoch": 0.679324894514768, "grad_norm": 0.7265625, "learning_rate": 7.596977496572452e-07, "loss": 0.2990606725215912, "step": 3220, "token_acc": 0.9166429587482219 }, { "epoch": 0.679535864978903, "grad_norm": 0.62109375, "learning_rate": 7.595532462391156e-07, "loss": 0.2771087884902954, "step": 3221, "token_acc": 0.9275082690187431 }, { "epoch": 0.6797468354430379, "grad_norm": 1.703125, "learning_rate": 7.594087131388755e-07, "loss": 0.2527494430541992, "step": 3222, "token_acc": 0.9285506825442927 }, { "epoch": 0.679957805907173, "grad_norm": 0.7890625, "learning_rate": 7.592641503730535e-07, "loss": 0.30084139108657837, "step": 3223, "token_acc": 0.9187184811628597 }, { "epoch": 0.680168776371308, "grad_norm": 0.7265625, "learning_rate": 7.591195579581815e-07, "loss": 0.26312190294265747, "step": 3224, "token_acc": 0.9274447949526814 }, { "epoch": 0.680379746835443, "grad_norm": 0.58203125, "learning_rate": 7.589749359107948e-07, "loss": 0.21497726440429688, "step": 3225, "token_acc": 0.9374812143071837 }, { "epoch": 0.6805907172995781, "grad_norm": 0.71875, "learning_rate": 7.588302842474323e-07, "loss": 0.2509644031524658, "step": 3226, "token_acc": 0.9317792252314021 }, { "epoch": 0.6808016877637131, "grad_norm": 0.7265625, "learning_rate": 7.58685602984636e-07, "loss": 0.24224653840065002, "step": 3227, "token_acc": 0.9299587992937022 }, { "epoch": 0.6810126582278481, "grad_norm": 0.671875, "learning_rate": 7.585408921389516e-07, "loss": 0.2703883647918701, "step": 3228, "token_acc": 0.919431279620853 }, { "epoch": 0.6812236286919832, "grad_norm": 0.66796875, "learning_rate": 7.583961517269278e-07, "loss": 0.19926580786705017, "step": 3229, "token_acc": 0.9410906969962088 }, { "epoch": 0.6814345991561181, "grad_norm": 0.76171875, "learning_rate": 7.582513817651172e-07, "loss": 0.25744202733039856, "step": 3230, "token_acc": 0.9282563951778888 }, { "epoch": 0.6816455696202531, "grad_norm": 0.859375, "learning_rate": 7.58106582270075e-07, "loss": 0.3080776035785675, "step": 3231, "token_acc": 0.916396629941672 }, { "epoch": 0.6818565400843882, "grad_norm": 0.73046875, "learning_rate": 7.579617532583607e-07, "loss": 0.2766522765159607, "step": 3232, "token_acc": 0.9246448424953675 }, { "epoch": 0.6820675105485232, "grad_norm": 0.75, "learning_rate": 7.578168947465366e-07, "loss": 0.2910187840461731, "step": 3233, "token_acc": 0.9181047089792337 }, { "epoch": 0.6822784810126582, "grad_norm": 0.79296875, "learning_rate": 7.576720067511683e-07, "loss": 0.2771153450012207, "step": 3234, "token_acc": 0.9212649241690868 }, { "epoch": 0.6824894514767933, "grad_norm": 1.0625, "learning_rate": 7.575270892888253e-07, "loss": 0.31149500608444214, "step": 3235, "token_acc": 0.9173065564087419 }, { "epoch": 0.6827004219409283, "grad_norm": 0.71484375, "learning_rate": 7.573821423760796e-07, "loss": 0.23002135753631592, "step": 3236, "token_acc": 0.9342583415923357 }, { "epoch": 0.6829113924050633, "grad_norm": 0.70703125, "learning_rate": 7.572371660295077e-07, "loss": 0.22346250712871552, "step": 3237, "token_acc": 0.9367932734125833 }, { "epoch": 0.6831223628691984, "grad_norm": 0.6953125, "learning_rate": 7.570921602656886e-07, "loss": 0.25263550877571106, "step": 3238, "token_acc": 0.9255014326647565 }, { "epoch": 0.6833333333333333, "grad_norm": 0.78515625, "learning_rate": 7.569471251012049e-07, "loss": 0.27680644392967224, "step": 3239, "token_acc": 0.9221218961625283 }, { "epoch": 0.6835443037974683, "grad_norm": 0.828125, "learning_rate": 7.568020605526427e-07, "loss": 0.2889561653137207, "step": 3240, "token_acc": 0.9211808541129264 }, { "epoch": 0.6837552742616034, "grad_norm": 0.76953125, "learning_rate": 7.566569666365914e-07, "loss": 0.22194808721542358, "step": 3241, "token_acc": 0.938422247446084 }, { "epoch": 0.6839662447257384, "grad_norm": 0.76171875, "learning_rate": 7.565118433696435e-07, "loss": 0.2710520327091217, "step": 3242, "token_acc": 0.921152800435019 }, { "epoch": 0.6841772151898734, "grad_norm": 0.6484375, "learning_rate": 7.563666907683953e-07, "loss": 0.24523121118545532, "step": 3243, "token_acc": 0.9322484224510129 }, { "epoch": 0.6843881856540084, "grad_norm": 0.74609375, "learning_rate": 7.562215088494461e-07, "loss": 0.262067973613739, "step": 3244, "token_acc": 0.9256678281068524 }, { "epoch": 0.6845991561181435, "grad_norm": 0.74609375, "learning_rate": 7.560762976293989e-07, "loss": 0.2653249502182007, "step": 3245, "token_acc": 0.9230769230769231 }, { "epoch": 0.6848101265822785, "grad_norm": 0.734375, "learning_rate": 7.559310571248594e-07, "loss": 0.2775425314903259, "step": 3246, "token_acc": 0.9215035931453842 }, { "epoch": 0.6850210970464135, "grad_norm": 1.078125, "learning_rate": 7.557857873524375e-07, "loss": 0.2784033417701721, "step": 3247, "token_acc": 0.9258241758241759 }, { "epoch": 0.6852320675105485, "grad_norm": 0.71484375, "learning_rate": 7.556404883287457e-07, "loss": 0.2980385422706604, "step": 3248, "token_acc": 0.9246978617911373 }, { "epoch": 0.6854430379746835, "grad_norm": 0.68359375, "learning_rate": 7.554951600704005e-07, "loss": 0.22596167027950287, "step": 3249, "token_acc": 0.9325443786982248 }, { "epoch": 0.6856540084388185, "grad_norm": 0.7109375, "learning_rate": 7.55349802594021e-07, "loss": 0.267772912979126, "step": 3250, "token_acc": 0.9188911704312115 }, { "epoch": 0.6858649789029536, "grad_norm": 0.63671875, "learning_rate": 7.552044159162305e-07, "loss": 0.23564210534095764, "step": 3251, "token_acc": 0.9340725202277494 }, { "epoch": 0.6860759493670886, "grad_norm": 0.78125, "learning_rate": 7.550590000536551e-07, "loss": 0.3014252185821533, "step": 3252, "token_acc": 0.9253112033195021 }, { "epoch": 0.6862869198312236, "grad_norm": 0.76953125, "learning_rate": 7.549135550229241e-07, "loss": 0.30903488397598267, "step": 3253, "token_acc": 0.9134101771155468 }, { "epoch": 0.6864978902953587, "grad_norm": 0.6875, "learning_rate": 7.547680808406707e-07, "loss": 0.20883674919605255, "step": 3254, "token_acc": 0.9411431805319751 }, { "epoch": 0.6867088607594937, "grad_norm": 0.66796875, "learning_rate": 7.546225775235306e-07, "loss": 0.24962174892425537, "step": 3255, "token_acc": 0.9298662704309064 }, { "epoch": 0.6869198312236287, "grad_norm": 0.6171875, "learning_rate": 7.544770450881439e-07, "loss": 0.23468412458896637, "step": 3256, "token_acc": 0.9341983317886933 }, { "epoch": 0.6871308016877637, "grad_norm": 0.7578125, "learning_rate": 7.543314835511532e-07, "loss": 0.22160513699054718, "step": 3257, "token_acc": 0.9383259911894273 }, { "epoch": 0.6873417721518987, "grad_norm": 0.609375, "learning_rate": 7.541858929292045e-07, "loss": 0.2420615553855896, "step": 3258, "token_acc": 0.9325810185185185 }, { "epoch": 0.6875527426160337, "grad_norm": 0.69140625, "learning_rate": 7.540402732389478e-07, "loss": 0.25114914774894714, "step": 3259, "token_acc": 0.9274563820018366 }, { "epoch": 0.6877637130801688, "grad_norm": 0.73046875, "learning_rate": 7.538946244970354e-07, "loss": 0.2424725592136383, "step": 3260, "token_acc": 0.9329446064139941 }, { "epoch": 0.6879746835443038, "grad_norm": 0.671875, "learning_rate": 7.537489467201239e-07, "loss": 0.22012485563755035, "step": 3261, "token_acc": 0.9408740359897172 }, { "epoch": 0.6881856540084388, "grad_norm": 0.69921875, "learning_rate": 7.536032399248727e-07, "loss": 0.243309885263443, "step": 3262, "token_acc": 0.9273516642547033 }, { "epoch": 0.6883966244725739, "grad_norm": 0.66796875, "learning_rate": 7.534575041279445e-07, "loss": 0.2567460536956787, "step": 3263, "token_acc": 0.9306406685236769 }, { "epoch": 0.6886075949367089, "grad_norm": 0.68359375, "learning_rate": 7.533117393460053e-07, "loss": 0.2661284804344177, "step": 3264, "token_acc": 0.9224633056796426 }, { "epoch": 0.6888185654008439, "grad_norm": 0.89453125, "learning_rate": 7.531659455957249e-07, "loss": 0.26830020546913147, "step": 3265, "token_acc": 0.9246861924686193 }, { "epoch": 0.689029535864979, "grad_norm": 1.0078125, "learning_rate": 7.530201228937758e-07, "loss": 0.2615075409412384, "step": 3266, "token_acc": 0.9219526454253142 }, { "epoch": 0.6892405063291139, "grad_norm": 0.6875, "learning_rate": 7.528742712568341e-07, "loss": 0.2358209192752838, "step": 3267, "token_acc": 0.9352401511063141 }, { "epoch": 0.6894514767932489, "grad_norm": 0.74609375, "learning_rate": 7.527283907015789e-07, "loss": 0.21003782749176025, "step": 3268, "token_acc": 0.9377245508982036 }, { "epoch": 0.689662447257384, "grad_norm": 0.7109375, "learning_rate": 7.525824812446935e-07, "loss": 0.23090820014476776, "step": 3269, "token_acc": 0.933471318746304 }, { "epoch": 0.689873417721519, "grad_norm": 0.65234375, "learning_rate": 7.524365429028634e-07, "loss": 0.22380530834197998, "step": 3270, "token_acc": 0.9352371239163692 }, { "epoch": 0.690084388185654, "grad_norm": 0.76171875, "learning_rate": 7.522905756927781e-07, "loss": 0.24025195837020874, "step": 3271, "token_acc": 0.9292777944261312 }, { "epoch": 0.6902953586497891, "grad_norm": 0.71484375, "learning_rate": 7.521445796311299e-07, "loss": 0.28366726636886597, "step": 3272, "token_acc": 0.9176706827309237 }, { "epoch": 0.6905063291139241, "grad_norm": 0.9609375, "learning_rate": 7.519985547346151e-07, "loss": 0.24642866849899292, "step": 3273, "token_acc": 0.930057280675309 }, { "epoch": 0.690717299578059, "grad_norm": 0.68359375, "learning_rate": 7.518525010199326e-07, "loss": 0.25360894203186035, "step": 3274, "token_acc": 0.9344125034886966 }, { "epoch": 0.6909282700421941, "grad_norm": 0.73046875, "learning_rate": 7.517064185037848e-07, "loss": 0.2624084949493408, "step": 3275, "token_acc": 0.9256957579032694 }, { "epoch": 0.6911392405063291, "grad_norm": 0.69140625, "learning_rate": 7.515603072028778e-07, "loss": 0.2348766028881073, "step": 3276, "token_acc": 0.9324399776661083 }, { "epoch": 0.6913502109704641, "grad_norm": 0.79296875, "learning_rate": 7.514141671339202e-07, "loss": 0.27015817165374756, "step": 3277, "token_acc": 0.9279482127779342 }, { "epoch": 0.6915611814345991, "grad_norm": 0.87890625, "learning_rate": 7.512679983136249e-07, "loss": 0.28852468729019165, "step": 3278, "token_acc": 0.9254852849092048 }, { "epoch": 0.6917721518987342, "grad_norm": 0.609375, "learning_rate": 7.511218007587073e-07, "loss": 0.2707630395889282, "step": 3279, "token_acc": 0.9345979614949037 }, { "epoch": 0.6919831223628692, "grad_norm": 2.3125, "learning_rate": 7.509755744858862e-07, "loss": 0.2502117156982422, "step": 3280, "token_acc": 0.937192118226601 }, { "epoch": 0.6921940928270042, "grad_norm": 0.58203125, "learning_rate": 7.50829319511884e-07, "loss": 0.2181677520275116, "step": 3281, "token_acc": 0.9353661714881418 }, { "epoch": 0.6924050632911393, "grad_norm": 0.9453125, "learning_rate": 7.506830358534261e-07, "loss": 0.23385372757911682, "step": 3282, "token_acc": 0.9306206088992974 }, { "epoch": 0.6926160337552743, "grad_norm": 0.625, "learning_rate": 7.505367235272411e-07, "loss": 0.2261810153722763, "step": 3283, "token_acc": 0.9303396660909614 }, { "epoch": 0.6928270042194092, "grad_norm": 0.89453125, "learning_rate": 7.503903825500615e-07, "loss": 0.3221948742866516, "step": 3284, "token_acc": 0.9206730769230769 }, { "epoch": 0.6930379746835443, "grad_norm": 0.6953125, "learning_rate": 7.502440129386221e-07, "loss": 0.27527326345443726, "step": 3285, "token_acc": 0.9263059701492538 }, { "epoch": 0.6932489451476793, "grad_norm": 0.75, "learning_rate": 7.50097614709662e-07, "loss": 0.25186580419540405, "step": 3286, "token_acc": 0.9289139633286319 }, { "epoch": 0.6934599156118143, "grad_norm": 1.5, "learning_rate": 7.499511878799228e-07, "loss": 0.233280211687088, "step": 3287, "token_acc": 0.9342498505678422 }, { "epoch": 0.6936708860759494, "grad_norm": 0.625, "learning_rate": 7.498047324661498e-07, "loss": 0.23531746864318848, "step": 3288, "token_acc": 0.9350242234254773 }, { "epoch": 0.6938818565400844, "grad_norm": 0.7109375, "learning_rate": 7.496582484850911e-07, "loss": 0.2801533341407776, "step": 3289, "token_acc": 0.9203164735801074 }, { "epoch": 0.6940928270042194, "grad_norm": 0.7734375, "learning_rate": 7.495117359534986e-07, "loss": 0.26996666193008423, "step": 3290, "token_acc": 0.9294436906377205 }, { "epoch": 0.6943037974683545, "grad_norm": 0.765625, "learning_rate": 7.493651948881274e-07, "loss": 0.25945454835891724, "step": 3291, "token_acc": 0.924645390070922 }, { "epoch": 0.6945147679324895, "grad_norm": 0.68359375, "learning_rate": 7.492186253057355e-07, "loss": 0.22400985658168793, "step": 3292, "token_acc": 0.9333967649857279 }, { "epoch": 0.6947257383966244, "grad_norm": 0.73828125, "learning_rate": 7.490720272230844e-07, "loss": 0.24040983617305756, "step": 3293, "token_acc": 0.932527990571597 }, { "epoch": 0.6949367088607595, "grad_norm": 0.92578125, "learning_rate": 7.489254006569388e-07, "loss": 0.31589072942733765, "step": 3294, "token_acc": 0.9165617128463476 }, { "epoch": 0.6951476793248945, "grad_norm": 0.62890625, "learning_rate": 7.487787456240669e-07, "loss": 0.23786652088165283, "step": 3295, "token_acc": 0.9337226277372263 }, { "epoch": 0.6953586497890295, "grad_norm": 0.72265625, "learning_rate": 7.486320621412396e-07, "loss": 0.2557457685470581, "step": 3296, "token_acc": 0.930635838150289 }, { "epoch": 0.6955696202531646, "grad_norm": 0.79296875, "learning_rate": 7.484853502252316e-07, "loss": 0.2228940725326538, "step": 3297, "token_acc": 0.9356903383114904 }, { "epoch": 0.6957805907172996, "grad_norm": 0.93359375, "learning_rate": 7.483386098928209e-07, "loss": 0.3373339772224426, "step": 3298, "token_acc": 0.9101694915254237 }, { "epoch": 0.6959915611814346, "grad_norm": 0.62109375, "learning_rate": 7.481918411607881e-07, "loss": 0.21629446744918823, "step": 3299, "token_acc": 0.937422934648582 }, { "epoch": 0.6962025316455697, "grad_norm": 0.82421875, "learning_rate": 7.480450440459174e-07, "loss": 0.2676317095756531, "step": 3300, "token_acc": 0.9234379583455559 }, { "epoch": 0.6964135021097047, "grad_norm": 0.7890625, "learning_rate": 7.478982185649967e-07, "loss": 0.2761493921279907, "step": 3301, "token_acc": 0.9231438515081206 }, { "epoch": 0.6966244725738396, "grad_norm": 0.68359375, "learning_rate": 7.477513647348164e-07, "loss": 0.24874435365200043, "step": 3302, "token_acc": 0.9324551569506726 }, { "epoch": 0.6968354430379747, "grad_norm": 0.796875, "learning_rate": 7.476044825721705e-07, "loss": 0.311753511428833, "step": 3303, "token_acc": 0.9174553101997897 }, { "epoch": 0.6970464135021097, "grad_norm": 0.79296875, "learning_rate": 7.474575720938565e-07, "loss": 0.2517383098602295, "step": 3304, "token_acc": 0.9357523302263648 }, { "epoch": 0.6972573839662447, "grad_norm": 1.0703125, "learning_rate": 7.473106333166748e-07, "loss": 0.24386408925056458, "step": 3305, "token_acc": 0.9393463230672533 }, { "epoch": 0.6974683544303798, "grad_norm": 0.6171875, "learning_rate": 7.471636662574287e-07, "loss": 0.2450854778289795, "step": 3306, "token_acc": 0.9313017909649827 }, { "epoch": 0.6976793248945148, "grad_norm": 0.79296875, "learning_rate": 7.470166709329254e-07, "loss": 0.33244168758392334, "step": 3307, "token_acc": 0.9154801575280218 }, { "epoch": 0.6978902953586498, "grad_norm": 0.63671875, "learning_rate": 7.468696473599751e-07, "loss": 0.27744123339653015, "step": 3308, "token_acc": 0.9228441754916793 }, { "epoch": 0.6981012658227848, "grad_norm": 0.67578125, "learning_rate": 7.46722595555391e-07, "loss": 0.26290762424468994, "step": 3309, "token_acc": 0.9235578289286729 }, { "epoch": 0.6983122362869199, "grad_norm": 0.7578125, "learning_rate": 7.465755155359902e-07, "loss": 0.27858880162239075, "step": 3310, "token_acc": 0.9223327805417357 }, { "epoch": 0.6985232067510548, "grad_norm": 0.71484375, "learning_rate": 7.464284073185918e-07, "loss": 0.27536216378211975, "step": 3311, "token_acc": 0.9238011179758753 }, { "epoch": 0.6987341772151898, "grad_norm": 0.74609375, "learning_rate": 7.462812709200194e-07, "loss": 0.28444045782089233, "step": 3312, "token_acc": 0.9175691937424789 }, { "epoch": 0.6989451476793249, "grad_norm": 0.65625, "learning_rate": 7.461341063570993e-07, "loss": 0.25412771105766296, "step": 3313, "token_acc": 0.9323131253678635 }, { "epoch": 0.6991561181434599, "grad_norm": 0.7734375, "learning_rate": 7.459869136466608e-07, "loss": 0.24921976029872894, "step": 3314, "token_acc": 0.9332489718443531 }, { "epoch": 0.6993670886075949, "grad_norm": 1.0234375, "learning_rate": 7.458396928055368e-07, "loss": 0.3039208650588989, "step": 3315, "token_acc": 0.9219035997559487 }, { "epoch": 0.69957805907173, "grad_norm": 0.6796875, "learning_rate": 7.456924438505631e-07, "loss": 0.24910816550254822, "step": 3316, "token_acc": 0.928302977739231 }, { "epoch": 0.699789029535865, "grad_norm": 0.66015625, "learning_rate": 7.455451667985788e-07, "loss": 0.21517165005207062, "step": 3317, "token_acc": 0.9392047321721985 }, { "epoch": 0.7, "grad_norm": 0.671875, "learning_rate": 7.453978616664266e-07, "loss": 0.28196269273757935, "step": 3318, "token_acc": 0.917663421418637 }, { "epoch": 0.700210970464135, "grad_norm": 0.65625, "learning_rate": 7.452505284709517e-07, "loss": 0.24407142400741577, "step": 3319, "token_acc": 0.9341441708691783 }, { "epoch": 0.70042194092827, "grad_norm": 0.734375, "learning_rate": 7.451031672290031e-07, "loss": 0.264909029006958, "step": 3320, "token_acc": 0.9211346633416458 }, { "epoch": 0.700632911392405, "grad_norm": 0.80078125, "learning_rate": 7.44955777957433e-07, "loss": 0.3253895044326782, "step": 3321, "token_acc": 0.9121558923600371 }, { "epoch": 0.7008438818565401, "grad_norm": 0.7421875, "learning_rate": 7.448083606730963e-07, "loss": 0.2703359127044678, "step": 3322, "token_acc": 0.9170445452120566 }, { "epoch": 0.7010548523206751, "grad_norm": 0.609375, "learning_rate": 7.446609153928514e-07, "loss": 0.25644028186798096, "step": 3323, "token_acc": 0.9266702878870179 }, { "epoch": 0.7012658227848101, "grad_norm": 0.78125, "learning_rate": 7.445134421335599e-07, "loss": 0.31338074803352356, "step": 3324, "token_acc": 0.9161290322580645 }, { "epoch": 0.7014767932489452, "grad_norm": 1.2890625, "learning_rate": 7.443659409120871e-07, "loss": 0.2463836967945099, "step": 3325, "token_acc": 0.9260396315936367 }, { "epoch": 0.7016877637130802, "grad_norm": 0.55078125, "learning_rate": 7.442184117453006e-07, "loss": 0.22711589932441711, "step": 3326, "token_acc": 0.9325224442513756 }, { "epoch": 0.7018987341772152, "grad_norm": 0.86328125, "learning_rate": 7.440708546500715e-07, "loss": 0.2403079718351364, "step": 3327, "token_acc": 0.9337788578371811 }, { "epoch": 0.7021097046413503, "grad_norm": 0.8125, "learning_rate": 7.439232696432744e-07, "loss": 0.267782598733902, "step": 3328, "token_acc": 0.9209742895805142 }, { "epoch": 0.7023206751054852, "grad_norm": 0.75390625, "learning_rate": 7.437756567417869e-07, "loss": 0.26804906129837036, "step": 3329, "token_acc": 0.9222782557104716 }, { "epoch": 0.7025316455696202, "grad_norm": 0.68359375, "learning_rate": 7.436280159624898e-07, "loss": 0.26396381855010986, "step": 3330, "token_acc": 0.9230356663984982 }, { "epoch": 0.7027426160337553, "grad_norm": 0.71484375, "learning_rate": 7.434803473222669e-07, "loss": 0.21993085741996765, "step": 3331, "token_acc": 0.9355134474327629 }, { "epoch": 0.7029535864978903, "grad_norm": 0.78125, "learning_rate": 7.433326508380057e-07, "loss": 0.2584623694419861, "step": 3332, "token_acc": 0.9323511725796753 }, { "epoch": 0.7031645569620253, "grad_norm": 0.89453125, "learning_rate": 7.431849265265961e-07, "loss": 0.30114108324050903, "step": 3333, "token_acc": 0.9118492494410732 }, { "epoch": 0.7033755274261604, "grad_norm": 0.640625, "learning_rate": 7.430371744049319e-07, "loss": 0.26374220848083496, "step": 3334, "token_acc": 0.9310113864701942 }, { "epoch": 0.7035864978902954, "grad_norm": 0.64453125, "learning_rate": 7.428893944899099e-07, "loss": 0.28189247846603394, "step": 3335, "token_acc": 0.9258499413833529 }, { "epoch": 0.7037974683544304, "grad_norm": 0.7265625, "learning_rate": 7.427415867984296e-07, "loss": 0.2403988391160965, "step": 3336, "token_acc": 0.9329758713136729 }, { "epoch": 0.7040084388185655, "grad_norm": 0.7109375, "learning_rate": 7.425937513473947e-07, "loss": 0.2805175483226776, "step": 3337, "token_acc": 0.9237518910741301 }, { "epoch": 0.7042194092827004, "grad_norm": 0.97265625, "learning_rate": 7.424458881537106e-07, "loss": 0.1864641010761261, "step": 3338, "token_acc": 0.940152339499456 }, { "epoch": 0.7044303797468354, "grad_norm": 0.72265625, "learning_rate": 7.422979972342874e-07, "loss": 0.24845872819423676, "step": 3339, "token_acc": 0.9301139772045591 }, { "epoch": 0.7046413502109705, "grad_norm": 0.640625, "learning_rate": 7.421500786060373e-07, "loss": 0.26513051986694336, "step": 3340, "token_acc": 0.9286367795059469 }, { "epoch": 0.7048523206751055, "grad_norm": 0.87109375, "learning_rate": 7.420021322858762e-07, "loss": 0.28673577308654785, "step": 3341, "token_acc": 0.9184746877054569 }, { "epoch": 0.7050632911392405, "grad_norm": 0.65625, "learning_rate": 7.418541582907232e-07, "loss": 0.2266722023487091, "step": 3342, "token_acc": 0.940136476426799 }, { "epoch": 0.7052742616033755, "grad_norm": 0.87109375, "learning_rate": 7.417061566374998e-07, "loss": 0.2850743532180786, "step": 3343, "token_acc": 0.9285485421339315 }, { "epoch": 0.7054852320675106, "grad_norm": 0.63671875, "learning_rate": 7.415581273431318e-07, "loss": 0.24485686421394348, "step": 3344, "token_acc": 0.93140589569161 }, { "epoch": 0.7056962025316456, "grad_norm": 0.63671875, "learning_rate": 7.414100704245474e-07, "loss": 0.22903358936309814, "step": 3345, "token_acc": 0.9332566168009206 }, { "epoch": 0.7059071729957805, "grad_norm": 0.84375, "learning_rate": 7.412619858986782e-07, "loss": 0.30186450481414795, "step": 3346, "token_acc": 0.919414969888156 }, { "epoch": 0.7061181434599156, "grad_norm": 0.77734375, "learning_rate": 7.411138737824587e-07, "loss": 0.24068662524223328, "step": 3347, "token_acc": 0.9338303821062441 }, { "epoch": 0.7063291139240506, "grad_norm": 2.15625, "learning_rate": 7.409657340928271e-07, "loss": 0.24176135659217834, "step": 3348, "token_acc": 0.9312736443883984 }, { "epoch": 0.7065400843881856, "grad_norm": 0.6328125, "learning_rate": 7.408175668467244e-07, "loss": 0.24253857135772705, "step": 3349, "token_acc": 0.9339486587828093 }, { "epoch": 0.7067510548523207, "grad_norm": 0.7421875, "learning_rate": 7.406693720610945e-07, "loss": 0.25083667039871216, "step": 3350, "token_acc": 0.9261483038126689 }, { "epoch": 0.7069620253164557, "grad_norm": 0.77734375, "learning_rate": 7.405211497528848e-07, "loss": 0.3051280379295349, "step": 3351, "token_acc": 0.9205432937181663 }, { "epoch": 0.7071729957805907, "grad_norm": 0.9453125, "learning_rate": 7.403728999390461e-07, "loss": 0.2819688022136688, "step": 3352, "token_acc": 0.9256365740740741 }, { "epoch": 0.7073839662447258, "grad_norm": 0.62890625, "learning_rate": 7.402246226365317e-07, "loss": 0.258169025182724, "step": 3353, "token_acc": 0.9287765651924181 }, { "epoch": 0.7075949367088608, "grad_norm": 0.65625, "learning_rate": 7.400763178622987e-07, "loss": 0.23988346755504608, "step": 3354, "token_acc": 0.9323349972268441 }, { "epoch": 0.7078059071729957, "grad_norm": 0.69921875, "learning_rate": 7.399279856333063e-07, "loss": 0.2450937032699585, "step": 3355, "token_acc": 0.9234436343241728 }, { "epoch": 0.7080168776371308, "grad_norm": 0.765625, "learning_rate": 7.397796259665183e-07, "loss": 0.2500232458114624, "step": 3356, "token_acc": 0.9309791332263242 }, { "epoch": 0.7082278481012658, "grad_norm": 0.74609375, "learning_rate": 7.396312388789006e-07, "loss": 0.27502602338790894, "step": 3357, "token_acc": 0.9270743611828883 }, { "epoch": 0.7084388185654008, "grad_norm": 0.7109375, "learning_rate": 7.394828243874223e-07, "loss": 0.25553685426712036, "step": 3358, "token_acc": 0.9242155121373594 }, { "epoch": 0.7086497890295359, "grad_norm": 0.7734375, "learning_rate": 7.393343825090563e-07, "loss": 0.2624126672744751, "step": 3359, "token_acc": 0.9284636281316028 }, { "epoch": 0.7088607594936709, "grad_norm": 0.7578125, "learning_rate": 7.391859132607777e-07, "loss": 0.2519274353981018, "step": 3360, "token_acc": 0.928428285141712 }, { "epoch": 0.7090717299578059, "grad_norm": 0.828125, "learning_rate": 7.390374166595657e-07, "loss": 0.2473929077386856, "step": 3361, "token_acc": 0.9329208726799089 }, { "epoch": 0.709282700421941, "grad_norm": 0.71484375, "learning_rate": 7.388888927224019e-07, "loss": 0.24412378668785095, "step": 3362, "token_acc": 0.9379620700739312 }, { "epoch": 0.709493670886076, "grad_norm": 0.8046875, "learning_rate": 7.38740341466271e-07, "loss": 0.2687251567840576, "step": 3363, "token_acc": 0.9261862917398945 }, { "epoch": 0.7097046413502109, "grad_norm": 0.7890625, "learning_rate": 7.385917629081617e-07, "loss": 0.25928735733032227, "step": 3364, "token_acc": 0.9286926994906621 }, { "epoch": 0.709915611814346, "grad_norm": 0.65625, "learning_rate": 7.384431570650648e-07, "loss": 0.21984651684761047, "step": 3365, "token_acc": 0.937933025404157 }, { "epoch": 0.710126582278481, "grad_norm": 0.80859375, "learning_rate": 7.382945239539746e-07, "loss": 0.3065875768661499, "step": 3366, "token_acc": 0.921888170834504 }, { "epoch": 0.710337552742616, "grad_norm": 0.87890625, "learning_rate": 7.381458635918888e-07, "loss": 0.31620460748672485, "step": 3367, "token_acc": 0.9262782401902497 }, { "epoch": 0.7105485232067511, "grad_norm": 0.78125, "learning_rate": 7.379971759958078e-07, "loss": 0.2293989360332489, "step": 3368, "token_acc": 0.9340329835082459 }, { "epoch": 0.7107594936708861, "grad_norm": 0.74609375, "learning_rate": 7.378484611827354e-07, "loss": 0.2466340959072113, "step": 3369, "token_acc": 0.9325952458482579 }, { "epoch": 0.7109704641350211, "grad_norm": 0.7890625, "learning_rate": 7.376997191696784e-07, "loss": 0.3047744035720825, "step": 3370, "token_acc": 0.9098829294854343 }, { "epoch": 0.7111814345991562, "grad_norm": 0.703125, "learning_rate": 7.375509499736464e-07, "loss": 0.21767741441726685, "step": 3371, "token_acc": 0.9339181286549708 }, { "epoch": 0.7113924050632912, "grad_norm": 0.67578125, "learning_rate": 7.374021536116527e-07, "loss": 0.23855848610401154, "step": 3372, "token_acc": 0.9309623430962343 }, { "epoch": 0.7116033755274261, "grad_norm": 0.578125, "learning_rate": 7.372533301007134e-07, "loss": 0.2265334129333496, "step": 3373, "token_acc": 0.9338532640092432 }, { "epoch": 0.7118143459915611, "grad_norm": 0.73828125, "learning_rate": 7.371044794578478e-07, "loss": 0.25230637192726135, "step": 3374, "token_acc": 0.9270050125313283 }, { "epoch": 0.7120253164556962, "grad_norm": 0.69921875, "learning_rate": 7.369556017000781e-07, "loss": 0.2888968586921692, "step": 3375, "token_acc": 0.9227323628219485 }, { "epoch": 0.7122362869198312, "grad_norm": 0.94921875, "learning_rate": 7.368066968444298e-07, "loss": 0.2988494634628296, "step": 3376, "token_acc": 0.9203431372549019 }, { "epoch": 0.7124472573839662, "grad_norm": 0.78515625, "learning_rate": 7.366577649079314e-07, "loss": 0.23683826625347137, "step": 3377, "token_acc": 0.9335558252427184 }, { "epoch": 0.7126582278481013, "grad_norm": 0.6875, "learning_rate": 7.365088059076145e-07, "loss": 0.2521866261959076, "step": 3378, "token_acc": 0.9256700746062448 }, { "epoch": 0.7128691983122363, "grad_norm": 0.65234375, "learning_rate": 7.363598198605138e-07, "loss": 0.26697826385498047, "step": 3379, "token_acc": 0.9253088193047975 }, { "epoch": 0.7130801687763713, "grad_norm": 0.63671875, "learning_rate": 7.362108067836672e-07, "loss": 0.2645048499107361, "step": 3380, "token_acc": 0.9299516908212561 }, { "epoch": 0.7132911392405064, "grad_norm": 0.7265625, "learning_rate": 7.360617666941156e-07, "loss": 0.26569342613220215, "step": 3381, "token_acc": 0.9230528620581795 }, { "epoch": 0.7135021097046413, "grad_norm": 0.80859375, "learning_rate": 7.359126996089029e-07, "loss": 0.26770979166030884, "step": 3382, "token_acc": 0.9282456673761021 }, { "epoch": 0.7137130801687763, "grad_norm": 0.8125, "learning_rate": 7.357636055450763e-07, "loss": 0.25212687253952026, "step": 3383, "token_acc": 0.929081767663403 }, { "epoch": 0.7139240506329114, "grad_norm": 0.78125, "learning_rate": 7.356144845196859e-07, "loss": 0.23514798283576965, "step": 3384, "token_acc": 0.9295538409929553 }, { "epoch": 0.7141350210970464, "grad_norm": 0.68359375, "learning_rate": 7.354653365497851e-07, "loss": 0.2565401792526245, "step": 3385, "token_acc": 0.9235036496350365 }, { "epoch": 0.7143459915611814, "grad_norm": 0.84765625, "learning_rate": 7.353161616524299e-07, "loss": 0.3198130130767822, "step": 3386, "token_acc": 0.9202032277346085 }, { "epoch": 0.7145569620253165, "grad_norm": 0.73828125, "learning_rate": 7.3516695984468e-07, "loss": 0.28029710054397583, "step": 3387, "token_acc": 0.9204313611192072 }, { "epoch": 0.7147679324894515, "grad_norm": 0.75390625, "learning_rate": 7.350177311435979e-07, "loss": 0.251055508852005, "step": 3388, "token_acc": 0.9280453257790369 }, { "epoch": 0.7149789029535865, "grad_norm": 1.046875, "learning_rate": 7.348684755662489e-07, "loss": 0.2762995660305023, "step": 3389, "token_acc": 0.9220858895705522 }, { "epoch": 0.7151898734177216, "grad_norm": 1.015625, "learning_rate": 7.347191931297019e-07, "loss": 0.274197518825531, "step": 3390, "token_acc": 0.9272893215698205 }, { "epoch": 0.7154008438818565, "grad_norm": 0.7109375, "learning_rate": 7.345698838510284e-07, "loss": 0.2188062220811844, "step": 3391, "token_acc": 0.9328087167070218 }, { "epoch": 0.7156118143459915, "grad_norm": 0.8359375, "learning_rate": 7.344205477473034e-07, "loss": 0.2711675763130188, "step": 3392, "token_acc": 0.9274170683300255 }, { "epoch": 0.7158227848101266, "grad_norm": 0.69921875, "learning_rate": 7.342711848356048e-07, "loss": 0.29105037450790405, "step": 3393, "token_acc": 0.9208743842364532 }, { "epoch": 0.7160337552742616, "grad_norm": 0.6171875, "learning_rate": 7.34121795133013e-07, "loss": 0.2087700366973877, "step": 3394, "token_acc": 0.9365681756573597 }, { "epoch": 0.7162447257383966, "grad_norm": 0.671875, "learning_rate": 7.339723786566127e-07, "loss": 0.227100670337677, "step": 3395, "token_acc": 0.9341265235055136 }, { "epoch": 0.7164556962025317, "grad_norm": 0.85546875, "learning_rate": 7.338229354234904e-07, "loss": 0.2661236822605133, "step": 3396, "token_acc": 0.9316345556246115 }, { "epoch": 0.7166666666666667, "grad_norm": 0.6875, "learning_rate": 7.336734654507364e-07, "loss": 0.2545706033706665, "step": 3397, "token_acc": 0.9315230224321134 }, { "epoch": 0.7168776371308017, "grad_norm": 0.7421875, "learning_rate": 7.335239687554438e-07, "loss": 0.2594372034072876, "step": 3398, "token_acc": 0.9270497094899935 }, { "epoch": 0.7170886075949368, "grad_norm": 0.71875, "learning_rate": 7.333744453547088e-07, "loss": 0.22234737873077393, "step": 3399, "token_acc": 0.9344744584113399 }, { "epoch": 0.7172995780590717, "grad_norm": 0.7578125, "learning_rate": 7.332248952656309e-07, "loss": 0.2726156711578369, "step": 3400, "token_acc": 0.9225786551511412 }, { "epoch": 0.7172995780590717, "eval_loss": 0.4337516129016876, "eval_runtime": 245.6754, "eval_samples_per_second": 137.193, "eval_steps_per_second": 2.145, "eval_token_acc": 0.8991351072422148, "step": 3400 }, { "epoch": 0.7175105485232067, "grad_norm": 0.6953125, "learning_rate": 7.33075318505312e-07, "loss": 0.23455312848091125, "step": 3401, "token_acc": 0.9326805385556916 }, { "epoch": 0.7177215189873418, "grad_norm": 0.6875, "learning_rate": 7.329257150908577e-07, "loss": 0.22099569439888, "step": 3402, "token_acc": 0.9357442686470778 }, { "epoch": 0.7179324894514768, "grad_norm": 0.66015625, "learning_rate": 7.327760850393767e-07, "loss": 0.22732014954090118, "step": 3403, "token_acc": 0.9334895985936127 }, { "epoch": 0.7181434599156118, "grad_norm": 0.72265625, "learning_rate": 7.326264283679799e-07, "loss": 0.27537214756011963, "step": 3404, "token_acc": 0.9254278728606357 }, { "epoch": 0.7183544303797469, "grad_norm": 0.71484375, "learning_rate": 7.324767450937822e-07, "loss": 0.2849426865577698, "step": 3405, "token_acc": 0.9211045364891519 }, { "epoch": 0.7185654008438819, "grad_norm": 0.6875, "learning_rate": 7.323270352339009e-07, "loss": 0.2437846064567566, "step": 3406, "token_acc": 0.9278381642512077 }, { "epoch": 0.7187763713080169, "grad_norm": 0.67578125, "learning_rate": 7.321772988054568e-07, "loss": 0.2616499662399292, "step": 3407, "token_acc": 0.9253652058432935 }, { "epoch": 0.7189873417721518, "grad_norm": 0.80078125, "learning_rate": 7.320275358255736e-07, "loss": 0.29969626665115356, "step": 3408, "token_acc": 0.9191176470588235 }, { "epoch": 0.7191983122362869, "grad_norm": 0.734375, "learning_rate": 7.318777463113777e-07, "loss": 0.2836650013923645, "step": 3409, "token_acc": 0.9175605271222801 }, { "epoch": 0.7194092827004219, "grad_norm": 0.69140625, "learning_rate": 7.317279302799988e-07, "loss": 0.24275150895118713, "step": 3410, "token_acc": 0.928814446479979 }, { "epoch": 0.7196202531645569, "grad_norm": 0.63671875, "learning_rate": 7.315780877485697e-07, "loss": 0.2655457854270935, "step": 3411, "token_acc": 0.9225251076040172 }, { "epoch": 0.719831223628692, "grad_norm": 0.71875, "learning_rate": 7.314282187342264e-07, "loss": 0.25313666462898254, "step": 3412, "token_acc": 0.9281599457810912 }, { "epoch": 0.720042194092827, "grad_norm": 0.69140625, "learning_rate": 7.312783232541076e-07, "loss": 0.2617977559566498, "step": 3413, "token_acc": 0.9271700356718192 }, { "epoch": 0.720253164556962, "grad_norm": 0.69140625, "learning_rate": 7.311284013253547e-07, "loss": 0.2107054591178894, "step": 3414, "token_acc": 0.93561872909699 }, { "epoch": 0.7204641350210971, "grad_norm": 0.70703125, "learning_rate": 7.309784529651131e-07, "loss": 0.267084002494812, "step": 3415, "token_acc": 0.9256602543201826 }, { "epoch": 0.7206751054852321, "grad_norm": 0.69140625, "learning_rate": 7.308284781905302e-07, "loss": 0.2662098705768585, "step": 3416, "token_acc": 0.9273399014778325 }, { "epoch": 0.720886075949367, "grad_norm": 0.87109375, "learning_rate": 7.306784770187572e-07, "loss": 0.28366267681121826, "step": 3417, "token_acc": 0.9236514522821577 }, { "epoch": 0.7210970464135021, "grad_norm": 0.6953125, "learning_rate": 7.305284494669479e-07, "loss": 0.22313302755355835, "step": 3418, "token_acc": 0.9348341232227488 }, { "epoch": 0.7213080168776371, "grad_norm": 0.91015625, "learning_rate": 7.303783955522593e-07, "loss": 0.2925216853618622, "step": 3419, "token_acc": 0.9235255994815295 }, { "epoch": 0.7215189873417721, "grad_norm": 0.7421875, "learning_rate": 7.302283152918513e-07, "loss": 0.2905537784099579, "step": 3420, "token_acc": 0.9189526184538653 }, { "epoch": 0.7217299578059072, "grad_norm": 0.65234375, "learning_rate": 7.300782087028867e-07, "loss": 0.2569305896759033, "step": 3421, "token_acc": 0.929737609329446 }, { "epoch": 0.7219409282700422, "grad_norm": 0.63671875, "learning_rate": 7.299280758025315e-07, "loss": 0.30612248182296753, "step": 3422, "token_acc": 0.9201136722450268 }, { "epoch": 0.7221518987341772, "grad_norm": 0.72265625, "learning_rate": 7.297779166079549e-07, "loss": 0.21593360602855682, "step": 3423, "token_acc": 0.9365126676602087 }, { "epoch": 0.7223628691983123, "grad_norm": 0.76953125, "learning_rate": 7.296277311363286e-07, "loss": 0.23680874705314636, "step": 3424, "token_acc": 0.9292288162488099 }, { "epoch": 0.7225738396624473, "grad_norm": 0.69921875, "learning_rate": 7.294775194048277e-07, "loss": 0.2455701231956482, "step": 3425, "token_acc": 0.9248572287345957 }, { "epoch": 0.7227848101265822, "grad_norm": 0.828125, "learning_rate": 7.293272814306302e-07, "loss": 0.2871856689453125, "step": 3426, "token_acc": 0.9149499705709241 }, { "epoch": 0.7229957805907173, "grad_norm": 0.86328125, "learning_rate": 7.291770172309171e-07, "loss": 0.27855634689331055, "step": 3427, "token_acc": 0.9249557522123893 }, { "epoch": 0.7232067510548523, "grad_norm": 0.91015625, "learning_rate": 7.290267268228723e-07, "loss": 0.26882433891296387, "step": 3428, "token_acc": 0.9302746755206761 }, { "epoch": 0.7234177215189873, "grad_norm": 0.7734375, "learning_rate": 7.288764102236828e-07, "loss": 0.297696590423584, "step": 3429, "token_acc": 0.9210454669207732 }, { "epoch": 0.7236286919831224, "grad_norm": 0.76171875, "learning_rate": 7.287260674505385e-07, "loss": 0.2500598132610321, "step": 3430, "token_acc": 0.9316635745207174 }, { "epoch": 0.7238396624472574, "grad_norm": 0.69921875, "learning_rate": 7.285756985206327e-07, "loss": 0.25109484791755676, "step": 3431, "token_acc": 0.925201072386059 }, { "epoch": 0.7240506329113924, "grad_norm": 0.625, "learning_rate": 7.284253034511611e-07, "loss": 0.22874438762664795, "step": 3432, "token_acc": 0.9303537153825062 }, { "epoch": 0.7242616033755275, "grad_norm": 0.83984375, "learning_rate": 7.282748822593225e-07, "loss": 0.28008225560188293, "step": 3433, "token_acc": 0.9222462203023758 }, { "epoch": 0.7244725738396625, "grad_norm": 0.76171875, "learning_rate": 7.28124434962319e-07, "loss": 0.26574647426605225, "step": 3434, "token_acc": 0.9274028629856851 }, { "epoch": 0.7246835443037974, "grad_norm": 1.25, "learning_rate": 7.279739615773557e-07, "loss": 0.2846786379814148, "step": 3435, "token_acc": 0.9239462431276726 }, { "epoch": 0.7248945147679325, "grad_norm": 0.62109375, "learning_rate": 7.278234621216401e-07, "loss": 0.23861292004585266, "step": 3436, "token_acc": 0.9348377997179126 }, { "epoch": 0.7251054852320675, "grad_norm": 0.6171875, "learning_rate": 7.276729366123836e-07, "loss": 0.2573177218437195, "step": 3437, "token_acc": 0.9247089262613195 }, { "epoch": 0.7253164556962025, "grad_norm": 0.58203125, "learning_rate": 7.275223850667997e-07, "loss": 0.24252337217330933, "step": 3438, "token_acc": 0.937381404174573 }, { "epoch": 0.7255274261603376, "grad_norm": 0.87890625, "learning_rate": 7.273718075021054e-07, "loss": 0.2994224727153778, "step": 3439, "token_acc": 0.9204009433962265 }, { "epoch": 0.7257383966244726, "grad_norm": 0.6328125, "learning_rate": 7.272212039355202e-07, "loss": 0.24744462966918945, "step": 3440, "token_acc": 0.9312955692652832 }, { "epoch": 0.7259493670886076, "grad_norm": 0.66015625, "learning_rate": 7.270705743842674e-07, "loss": 0.22325459122657776, "step": 3441, "token_acc": 0.9352750809061489 }, { "epoch": 0.7261603375527426, "grad_norm": 0.6875, "learning_rate": 7.269199188655724e-07, "loss": 0.23546966910362244, "step": 3442, "token_acc": 0.931098696461825 }, { "epoch": 0.7263713080168777, "grad_norm": 1.2890625, "learning_rate": 7.267692373966639e-07, "loss": 0.2872110605239868, "step": 3443, "token_acc": 0.9239732012816778 }, { "epoch": 0.7265822784810126, "grad_norm": 0.65625, "learning_rate": 7.266185299947741e-07, "loss": 0.2730630338191986, "step": 3444, "token_acc": 0.9305689488910318 }, { "epoch": 0.7267932489451476, "grad_norm": 0.921875, "learning_rate": 7.264677966771371e-07, "loss": 0.2863925099372864, "step": 3445, "token_acc": 0.9295641187618446 }, { "epoch": 0.7270042194092827, "grad_norm": 0.69921875, "learning_rate": 7.263170374609906e-07, "loss": 0.2917223274707794, "step": 3446, "token_acc": 0.9238548483045806 }, { "epoch": 0.7272151898734177, "grad_norm": 0.7734375, "learning_rate": 7.261662523635757e-07, "loss": 0.27878227829933167, "step": 3447, "token_acc": 0.9217518248175183 }, { "epoch": 0.7274261603375527, "grad_norm": 0.78515625, "learning_rate": 7.260154414021353e-07, "loss": 0.21031001210212708, "step": 3448, "token_acc": 0.9406259140099444 }, { "epoch": 0.7276371308016878, "grad_norm": 1.0703125, "learning_rate": 7.258646045939161e-07, "loss": 0.26184573769569397, "step": 3449, "token_acc": 0.9314903846153846 }, { "epoch": 0.7278481012658228, "grad_norm": 0.82421875, "learning_rate": 7.257137419561678e-07, "loss": 0.2831631302833557, "step": 3450, "token_acc": 0.9312950875987633 }, { "epoch": 0.7280590717299578, "grad_norm": 0.6328125, "learning_rate": 7.255628535061429e-07, "loss": 0.24758580327033997, "step": 3451, "token_acc": 0.9322857142857143 }, { "epoch": 0.7282700421940929, "grad_norm": 0.60546875, "learning_rate": 7.254119392610963e-07, "loss": 0.22350794076919556, "step": 3452, "token_acc": 0.9354928790840548 }, { "epoch": 0.7284810126582278, "grad_norm": 1.3671875, "learning_rate": 7.252609992382866e-07, "loss": 0.2677094042301178, "step": 3453, "token_acc": 0.9247463359639233 }, { "epoch": 0.7286919831223628, "grad_norm": 0.8828125, "learning_rate": 7.251100334549751e-07, "loss": 0.2736966609954834, "step": 3454, "token_acc": 0.9266775777414076 }, { "epoch": 0.7289029535864979, "grad_norm": 0.64453125, "learning_rate": 7.24959041928426e-07, "loss": 0.24055925011634827, "step": 3455, "token_acc": 0.9353131431931785 }, { "epoch": 0.7291139240506329, "grad_norm": 0.72265625, "learning_rate": 7.248080246759064e-07, "loss": 0.2612450122833252, "step": 3456, "token_acc": 0.9307384087006296 }, { "epoch": 0.7293248945147679, "grad_norm": 1.078125, "learning_rate": 7.246569817146864e-07, "loss": 0.31522637605667114, "step": 3457, "token_acc": 0.9149286498353457 }, { "epoch": 0.729535864978903, "grad_norm": 0.67578125, "learning_rate": 7.245059130620389e-07, "loss": 0.28033196926116943, "step": 3458, "token_acc": 0.9221264367816092 }, { "epoch": 0.729746835443038, "grad_norm": 0.6484375, "learning_rate": 7.243548187352403e-07, "loss": 0.22926190495491028, "step": 3459, "token_acc": 0.9320474777448071 }, { "epoch": 0.729957805907173, "grad_norm": 0.66015625, "learning_rate": 7.242036987515692e-07, "loss": 0.24089321494102478, "step": 3460, "token_acc": 0.9263618943334245 }, { "epoch": 0.7301687763713081, "grad_norm": 0.90625, "learning_rate": 7.240525531283073e-07, "loss": 0.2575099468231201, "step": 3461, "token_acc": 0.9281752634498058 }, { "epoch": 0.730379746835443, "grad_norm": 0.72265625, "learning_rate": 7.239013818827397e-07, "loss": 0.22139139473438263, "step": 3462, "token_acc": 0.9355751099937146 }, { "epoch": 0.730590717299578, "grad_norm": 0.76953125, "learning_rate": 7.23750185032154e-07, "loss": 0.23429499566555023, "step": 3463, "token_acc": 0.9305210918114144 }, { "epoch": 0.7308016877637131, "grad_norm": 0.73828125, "learning_rate": 7.23598962593841e-07, "loss": 0.2782633304595947, "step": 3464, "token_acc": 0.9219402505621587 }, { "epoch": 0.7310126582278481, "grad_norm": 0.94140625, "learning_rate": 7.234477145850938e-07, "loss": 0.25028058886528015, "step": 3465, "token_acc": 0.930733569675484 }, { "epoch": 0.7312236286919831, "grad_norm": 0.77734375, "learning_rate": 7.232964410232094e-07, "loss": 0.24994972348213196, "step": 3466, "token_acc": 0.9296775831429297 }, { "epoch": 0.7314345991561182, "grad_norm": 0.71484375, "learning_rate": 7.231451419254869e-07, "loss": 0.21919870376586914, "step": 3467, "token_acc": 0.9379229185054427 }, { "epoch": 0.7316455696202532, "grad_norm": 0.6171875, "learning_rate": 7.229938173092288e-07, "loss": 0.2542004883289337, "step": 3468, "token_acc": 0.9229056498747564 }, { "epoch": 0.7318565400843882, "grad_norm": 0.734375, "learning_rate": 7.228424671917403e-07, "loss": 0.2660195827484131, "step": 3469, "token_acc": 0.9265447667087011 }, { "epoch": 0.7320675105485233, "grad_norm": 0.78515625, "learning_rate": 7.226910915903295e-07, "loss": 0.2356107532978058, "step": 3470, "token_acc": 0.9313457330415755 }, { "epoch": 0.7322784810126582, "grad_norm": 0.6875, "learning_rate": 7.225396905223076e-07, "loss": 0.28633275628089905, "step": 3471, "token_acc": 0.9195695270461626 }, { "epoch": 0.7324894514767932, "grad_norm": 0.5703125, "learning_rate": 7.223882640049885e-07, "loss": 0.23838719725608826, "step": 3472, "token_acc": 0.934750516681429 }, { "epoch": 0.7327004219409282, "grad_norm": 0.8359375, "learning_rate": 7.222368120556891e-07, "loss": 0.3123356103897095, "step": 3473, "token_acc": 0.9257457846952011 }, { "epoch": 0.7329113924050633, "grad_norm": 0.73828125, "learning_rate": 7.220853346917292e-07, "loss": 0.2334568202495575, "step": 3474, "token_acc": 0.9300087489063867 }, { "epoch": 0.7331223628691983, "grad_norm": 0.77734375, "learning_rate": 7.219338319304315e-07, "loss": 0.2716429829597473, "step": 3475, "token_acc": 0.9287688056493706 }, { "epoch": 0.7333333333333333, "grad_norm": 0.80078125, "learning_rate": 7.217823037891219e-07, "loss": 0.23722827434539795, "step": 3476, "token_acc": 0.9290396124251924 }, { "epoch": 0.7335443037974684, "grad_norm": 0.7109375, "learning_rate": 7.216307502851286e-07, "loss": 0.2630718946456909, "step": 3477, "token_acc": 0.9288326300984528 }, { "epoch": 0.7337552742616034, "grad_norm": 0.7578125, "learning_rate": 7.214791714357833e-07, "loss": 0.27332985401153564, "step": 3478, "token_acc": 0.9261538461538461 }, { "epoch": 0.7339662447257383, "grad_norm": 0.765625, "learning_rate": 7.213275672584199e-07, "loss": 0.29321518540382385, "step": 3479, "token_acc": 0.9234828496042217 }, { "epoch": 0.7341772151898734, "grad_norm": 0.76171875, "learning_rate": 7.21175937770376e-07, "loss": 0.25036734342575073, "step": 3480, "token_acc": 0.9299655568312285 }, { "epoch": 0.7343881856540084, "grad_norm": 0.75, "learning_rate": 7.210242829889917e-07, "loss": 0.26514938473701477, "step": 3481, "token_acc": 0.9223424570337365 }, { "epoch": 0.7345991561181434, "grad_norm": 0.8203125, "learning_rate": 7.208726029316097e-07, "loss": 0.30216705799102783, "step": 3482, "token_acc": 0.9154809334092202 }, { "epoch": 0.7348101265822785, "grad_norm": 0.65234375, "learning_rate": 7.207208976155764e-07, "loss": 0.24623897671699524, "step": 3483, "token_acc": 0.9292112106327651 }, { "epoch": 0.7350210970464135, "grad_norm": 0.69921875, "learning_rate": 7.2056916705824e-07, "loss": 0.26133692264556885, "step": 3484, "token_acc": 0.9263493751661792 }, { "epoch": 0.7352320675105485, "grad_norm": 0.609375, "learning_rate": 7.204174112769524e-07, "loss": 0.23377394676208496, "step": 3485, "token_acc": 0.9315716272600835 }, { "epoch": 0.7354430379746836, "grad_norm": 0.7734375, "learning_rate": 7.202656302890685e-07, "loss": 0.2723749577999115, "step": 3486, "token_acc": 0.9241649112247968 }, { "epoch": 0.7356540084388186, "grad_norm": 0.63671875, "learning_rate": 7.201138241119453e-07, "loss": 0.26528120040893555, "step": 3487, "token_acc": 0.9275613275613276 }, { "epoch": 0.7358649789029535, "grad_norm": 0.88671875, "learning_rate": 7.199619927629434e-07, "loss": 0.2876410186290741, "step": 3488, "token_acc": 0.9221946683199008 }, { "epoch": 0.7360759493670886, "grad_norm": 0.84375, "learning_rate": 7.198101362594258e-07, "loss": 0.29341617226600647, "step": 3489, "token_acc": 0.9196480938416423 }, { "epoch": 0.7362869198312236, "grad_norm": 0.765625, "learning_rate": 7.196582546187588e-07, "loss": 0.26697084307670593, "step": 3490, "token_acc": 0.9259369422962522 }, { "epoch": 0.7364978902953586, "grad_norm": 0.73828125, "learning_rate": 7.19506347858311e-07, "loss": 0.2466883510351181, "step": 3491, "token_acc": 0.9340590979782271 }, { "epoch": 0.7367088607594937, "grad_norm": 0.67578125, "learning_rate": 7.193544159954546e-07, "loss": 0.2871992886066437, "step": 3492, "token_acc": 0.92128445581131 }, { "epoch": 0.7369198312236287, "grad_norm": 0.82421875, "learning_rate": 7.192024590475639e-07, "loss": 0.26542118191719055, "step": 3493, "token_acc": 0.9291826497491885 }, { "epoch": 0.7371308016877637, "grad_norm": 0.6875, "learning_rate": 7.190504770320168e-07, "loss": 0.22193971276283264, "step": 3494, "token_acc": 0.9337330388135059 }, { "epoch": 0.7373417721518988, "grad_norm": 0.76171875, "learning_rate": 7.188984699661937e-07, "loss": 0.2929496765136719, "step": 3495, "token_acc": 0.9232015554115359 }, { "epoch": 0.7375527426160338, "grad_norm": 0.734375, "learning_rate": 7.187464378674779e-07, "loss": 0.24712993204593658, "step": 3496, "token_acc": 0.9345029239766082 }, { "epoch": 0.7377637130801687, "grad_norm": 0.79296875, "learning_rate": 7.185943807532553e-07, "loss": 0.29557907581329346, "step": 3497, "token_acc": 0.9189446916719644 }, { "epoch": 0.7379746835443038, "grad_norm": 0.70703125, "learning_rate": 7.184422986409152e-07, "loss": 0.23501691222190857, "step": 3498, "token_acc": 0.9306393244873341 }, { "epoch": 0.7381856540084388, "grad_norm": 1.1171875, "learning_rate": 7.182901915478493e-07, "loss": 0.2587708830833435, "step": 3499, "token_acc": 0.9330143540669856 }, { "epoch": 0.7383966244725738, "grad_norm": 0.6796875, "learning_rate": 7.181380594914524e-07, "loss": 0.267791748046875, "step": 3500, "token_acc": 0.9290060851926978 }, { "epoch": 0.7386075949367089, "grad_norm": 0.64453125, "learning_rate": 7.179859024891222e-07, "loss": 0.24980416893959045, "step": 3501, "token_acc": 0.9275545713381841 }, { "epoch": 0.7388185654008439, "grad_norm": 0.88671875, "learning_rate": 7.178337205582588e-07, "loss": 0.2491539567708969, "step": 3502, "token_acc": 0.9238320920785376 }, { "epoch": 0.7390295358649789, "grad_norm": 0.65234375, "learning_rate": 7.17681513716266e-07, "loss": 0.2715263366699219, "step": 3503, "token_acc": 0.9277247759468055 }, { "epoch": 0.739240506329114, "grad_norm": 0.765625, "learning_rate": 7.175292819805495e-07, "loss": 0.30731862783432007, "step": 3504, "token_acc": 0.9149692854833495 }, { "epoch": 0.739451476793249, "grad_norm": 0.5546875, "learning_rate": 7.173770253685185e-07, "loss": 0.23755382001399994, "step": 3505, "token_acc": 0.9290015052684395 }, { "epoch": 0.739662447257384, "grad_norm": 0.63671875, "learning_rate": 7.172247438975848e-07, "loss": 0.20931214094161987, "step": 3506, "token_acc": 0.937195296816748 }, { "epoch": 0.7398734177215189, "grad_norm": 0.7265625, "learning_rate": 7.170724375851631e-07, "loss": 0.24871158599853516, "step": 3507, "token_acc": 0.9266161910308678 }, { "epoch": 0.740084388185654, "grad_norm": 0.67578125, "learning_rate": 7.169201064486708e-07, "loss": 0.22883504629135132, "step": 3508, "token_acc": 0.9290760101723651 }, { "epoch": 0.740295358649789, "grad_norm": 0.69140625, "learning_rate": 7.167677505055283e-07, "loss": 0.2735205888748169, "step": 3509, "token_acc": 0.9263327082775248 }, { "epoch": 0.740506329113924, "grad_norm": 0.84765625, "learning_rate": 7.166153697731589e-07, "loss": 0.24035190045833588, "step": 3510, "token_acc": 0.9310667498440425 }, { "epoch": 0.7407172995780591, "grad_norm": 0.6640625, "learning_rate": 7.164629642689885e-07, "loss": 0.2558228671550751, "step": 3511, "token_acc": 0.9319765821020352 }, { "epoch": 0.7409282700421941, "grad_norm": 0.640625, "learning_rate": 7.16310534010446e-07, "loss": 0.21581366658210754, "step": 3512, "token_acc": 0.9423017587164455 }, { "epoch": 0.7411392405063291, "grad_norm": 0.63671875, "learning_rate": 7.16158079014963e-07, "loss": 0.2450946867465973, "step": 3513, "token_acc": 0.9277247759468055 }, { "epoch": 0.7413502109704642, "grad_norm": 0.63671875, "learning_rate": 7.160055992999743e-07, "loss": 0.25278109312057495, "step": 3514, "token_acc": 0.9312257348863006 }, { "epoch": 0.7415611814345991, "grad_norm": 0.71484375, "learning_rate": 7.158530948829171e-07, "loss": 0.28583824634552, "step": 3515, "token_acc": 0.9163424124513618 }, { "epoch": 0.7417721518987341, "grad_norm": 0.71484375, "learning_rate": 7.157005657812314e-07, "loss": 0.27128326892852783, "step": 3516, "token_acc": 0.9258917589175891 }, { "epoch": 0.7419831223628692, "grad_norm": 0.62890625, "learning_rate": 7.155480120123604e-07, "loss": 0.22974559664726257, "step": 3517, "token_acc": 0.9328859060402684 }, { "epoch": 0.7421940928270042, "grad_norm": 0.75390625, "learning_rate": 7.153954335937498e-07, "loss": 0.21082457900047302, "step": 3518, "token_acc": 0.934711964549483 }, { "epoch": 0.7424050632911392, "grad_norm": 0.890625, "learning_rate": 7.152428305428483e-07, "loss": 0.22847628593444824, "step": 3519, "token_acc": 0.9385630870779386 }, { "epoch": 0.7426160337552743, "grad_norm": 0.7734375, "learning_rate": 7.150902028771074e-07, "loss": 0.27989983558654785, "step": 3520, "token_acc": 0.9226044226044227 }, { "epoch": 0.7428270042194093, "grad_norm": 0.6171875, "learning_rate": 7.14937550613981e-07, "loss": 0.2581920027732849, "step": 3521, "token_acc": 0.9269679736605807 }, { "epoch": 0.7430379746835443, "grad_norm": 0.75, "learning_rate": 7.147848737709268e-07, "loss": 0.29302847385406494, "step": 3522, "token_acc": 0.922842329978652 }, { "epoch": 0.7432489451476794, "grad_norm": 0.734375, "learning_rate": 7.146321723654042e-07, "loss": 0.2533879280090332, "step": 3523, "token_acc": 0.9344645991808075 }, { "epoch": 0.7434599156118143, "grad_norm": 0.6953125, "learning_rate": 7.144794464148761e-07, "loss": 0.273163378238678, "step": 3524, "token_acc": 0.9212684527063969 }, { "epoch": 0.7436708860759493, "grad_norm": 0.61328125, "learning_rate": 7.143266959368079e-07, "loss": 0.2230093777179718, "step": 3525, "token_acc": 0.9378411158277744 }, { "epoch": 0.7438818565400844, "grad_norm": 0.703125, "learning_rate": 7.14173920948668e-07, "loss": 0.2582929730415344, "step": 3526, "token_acc": 0.9237704918032786 }, { "epoch": 0.7440928270042194, "grad_norm": 0.82421875, "learning_rate": 7.140211214679277e-07, "loss": 0.30780112743377686, "step": 3527, "token_acc": 0.9162699523942873 }, { "epoch": 0.7443037974683544, "grad_norm": 0.78515625, "learning_rate": 7.138682975120604e-07, "loss": 0.26564908027648926, "step": 3528, "token_acc": 0.9269679736605807 }, { "epoch": 0.7445147679324895, "grad_norm": 0.85546875, "learning_rate": 7.137154490985434e-07, "loss": 0.3046082854270935, "step": 3529, "token_acc": 0.9192225933798968 }, { "epoch": 0.7447257383966245, "grad_norm": 0.66796875, "learning_rate": 7.135625762448556e-07, "loss": 0.2720472812652588, "step": 3530, "token_acc": 0.9205593573341267 }, { "epoch": 0.7449367088607595, "grad_norm": 0.67578125, "learning_rate": 7.134096789684797e-07, "loss": 0.27744001150131226, "step": 3531, "token_acc": 0.9187632615944226 }, { "epoch": 0.7451476793248946, "grad_norm": 0.59765625, "learning_rate": 7.132567572869008e-07, "loss": 0.2300909161567688, "step": 3532, "token_acc": 0.934716459197787 }, { "epoch": 0.7453586497890295, "grad_norm": 0.7578125, "learning_rate": 7.131038112176067e-07, "loss": 0.2688423991203308, "step": 3533, "token_acc": 0.9294330518697226 }, { "epoch": 0.7455696202531645, "grad_norm": 0.76171875, "learning_rate": 7.129508407780882e-07, "loss": 0.25981172919273376, "step": 3534, "token_acc": 0.9300866447564984 }, { "epoch": 0.7457805907172996, "grad_norm": 0.73046875, "learning_rate": 7.127978459858386e-07, "loss": 0.2795941233634949, "step": 3535, "token_acc": 0.9207097310784585 }, { "epoch": 0.7459915611814346, "grad_norm": 0.828125, "learning_rate": 7.126448268583541e-07, "loss": 0.25526419281959534, "step": 3536, "token_acc": 0.9247104247104247 }, { "epoch": 0.7462025316455696, "grad_norm": 0.6796875, "learning_rate": 7.124917834131339e-07, "loss": 0.2330908179283142, "step": 3537, "token_acc": 0.9349419670128284 }, { "epoch": 0.7464135021097047, "grad_norm": 0.8203125, "learning_rate": 7.123387156676797e-07, "loss": 0.25593847036361694, "step": 3538, "token_acc": 0.9323104693140795 }, { "epoch": 0.7466244725738397, "grad_norm": 1.4453125, "learning_rate": 7.121856236394963e-07, "loss": 0.21583817899227142, "step": 3539, "token_acc": 0.9380061435353253 }, { "epoch": 0.7468354430379747, "grad_norm": 1.53125, "learning_rate": 7.120325073460907e-07, "loss": 0.26078498363494873, "step": 3540, "token_acc": 0.9325547445255474 }, { "epoch": 0.7470464135021097, "grad_norm": 0.59765625, "learning_rate": 7.118793668049734e-07, "loss": 0.19006752967834473, "step": 3541, "token_acc": 0.9430793157076205 }, { "epoch": 0.7472573839662447, "grad_norm": 0.671875, "learning_rate": 7.117262020336571e-07, "loss": 0.2612856924533844, "step": 3542, "token_acc": 0.9203612479474549 }, { "epoch": 0.7474683544303797, "grad_norm": 0.796875, "learning_rate": 7.115730130496577e-07, "loss": 0.2688848376274109, "step": 3543, "token_acc": 0.9302475088396014 }, { "epoch": 0.7476793248945147, "grad_norm": 0.640625, "learning_rate": 7.114197998704933e-07, "loss": 0.23038744926452637, "step": 3544, "token_acc": 0.9311090515909793 }, { "epoch": 0.7478902953586498, "grad_norm": 0.6640625, "learning_rate": 7.112665625136856e-07, "loss": 0.18869724869728088, "step": 3545, "token_acc": 0.9492774992627544 }, { "epoch": 0.7481012658227848, "grad_norm": 0.72265625, "learning_rate": 7.111133009967582e-07, "loss": 0.2565477788448334, "step": 3546, "token_acc": 0.9254714157437893 }, { "epoch": 0.7483122362869198, "grad_norm": 0.77734375, "learning_rate": 7.10960015337238e-07, "loss": 0.26111316680908203, "step": 3547, "token_acc": 0.9256044637321761 }, { "epoch": 0.7485232067510549, "grad_norm": 0.76171875, "learning_rate": 7.108067055526543e-07, "loss": 0.2826637625694275, "step": 3548, "token_acc": 0.9211292200232829 }, { "epoch": 0.7487341772151899, "grad_norm": 0.70703125, "learning_rate": 7.106533716605399e-07, "loss": 0.27930188179016113, "step": 3549, "token_acc": 0.9240506329113924 }, { "epoch": 0.7489451476793249, "grad_norm": 0.6875, "learning_rate": 7.105000136784293e-07, "loss": 0.25697407126426697, "step": 3550, "token_acc": 0.9318866787221217 }, { "epoch": 0.74915611814346, "grad_norm": 0.6640625, "learning_rate": 7.103466316238606e-07, "loss": 0.21250073611736298, "step": 3551, "token_acc": 0.9401049706699599 }, { "epoch": 0.7493670886075949, "grad_norm": 0.71484375, "learning_rate": 7.101932255143739e-07, "loss": 0.2584548890590668, "step": 3552, "token_acc": 0.9262717321313587 }, { "epoch": 0.7495780590717299, "grad_norm": 0.6953125, "learning_rate": 7.10039795367513e-07, "loss": 0.260434627532959, "step": 3553, "token_acc": 0.9307210031347962 }, { "epoch": 0.749789029535865, "grad_norm": 0.6328125, "learning_rate": 7.098863412008239e-07, "loss": 0.2433972805738449, "step": 3554, "token_acc": 0.9266537503459729 }, { "epoch": 0.75, "grad_norm": 0.56640625, "learning_rate": 7.097328630318551e-07, "loss": 0.22033482789993286, "step": 3555, "token_acc": 0.9384853168469861 }, { "epoch": 0.750210970464135, "grad_norm": 2.390625, "learning_rate": 7.095793608781582e-07, "loss": 0.34649893641471863, "step": 3556, "token_acc": 0.9165919282511211 }, { "epoch": 0.7504219409282701, "grad_norm": 0.76953125, "learning_rate": 7.094258347572874e-07, "loss": 0.29037395119667053, "step": 3557, "token_acc": 0.9247279322853688 }, { "epoch": 0.7506329113924051, "grad_norm": 0.703125, "learning_rate": 7.092722846868001e-07, "loss": 0.2765369117259979, "step": 3558, "token_acc": 0.9246338215712383 }, { "epoch": 0.75084388185654, "grad_norm": 0.6796875, "learning_rate": 7.091187106842558e-07, "loss": 0.27818629145622253, "step": 3559, "token_acc": 0.9182371701942592 }, { "epoch": 0.7510548523206751, "grad_norm": 0.76953125, "learning_rate": 7.089651127672166e-07, "loss": 0.24161913990974426, "step": 3560, "token_acc": 0.9316715542521994 }, { "epoch": 0.7512658227848101, "grad_norm": 0.765625, "learning_rate": 7.088114909532484e-07, "loss": 0.2660974860191345, "step": 3561, "token_acc": 0.9266568483063329 }, { "epoch": 0.7514767932489451, "grad_norm": 0.67578125, "learning_rate": 7.086578452599188e-07, "loss": 0.2507850229740143, "step": 3562, "token_acc": 0.9287349742415569 }, { "epoch": 0.7516877637130802, "grad_norm": 0.70703125, "learning_rate": 7.085041757047985e-07, "loss": 0.22469371557235718, "step": 3563, "token_acc": 0.9381771829190567 }, { "epoch": 0.7518987341772152, "grad_norm": 0.765625, "learning_rate": 7.083504823054607e-07, "loss": 0.26905137300491333, "step": 3564, "token_acc": 0.9288444575124963 }, { "epoch": 0.7521097046413502, "grad_norm": 1.34375, "learning_rate": 7.081967650794822e-07, "loss": 0.2733200788497925, "step": 3565, "token_acc": 0.9256469046839174 }, { "epoch": 0.7523206751054853, "grad_norm": 0.734375, "learning_rate": 7.080430240444413e-07, "loss": 0.2558225691318512, "step": 3566, "token_acc": 0.9288224956063269 }, { "epoch": 0.7525316455696203, "grad_norm": 0.66015625, "learning_rate": 7.078892592179197e-07, "loss": 0.2642616033554077, "step": 3567, "token_acc": 0.9258820041506077 }, { "epoch": 0.7527426160337553, "grad_norm": 0.9296875, "learning_rate": 7.077354706175017e-07, "loss": 0.2921859622001648, "step": 3568, "token_acc": 0.9237755433019786 }, { "epoch": 0.7529535864978903, "grad_norm": 0.70703125, "learning_rate": 7.075816582607744e-07, "loss": 0.2635979652404785, "step": 3569, "token_acc": 0.9235163440177722 }, { "epoch": 0.7531645569620253, "grad_norm": 0.71484375, "learning_rate": 7.074278221653275e-07, "loss": 0.24422971904277802, "step": 3570, "token_acc": 0.9297912713472486 }, { "epoch": 0.7533755274261603, "grad_norm": 0.796875, "learning_rate": 7.072739623487536e-07, "loss": 0.31625938415527344, "step": 3571, "token_acc": 0.9163441500446562 }, { "epoch": 0.7535864978902953, "grad_norm": 0.80078125, "learning_rate": 7.071200788286475e-07, "loss": 0.25472718477249146, "step": 3572, "token_acc": 0.9297086212075698 }, { "epoch": 0.7537974683544304, "grad_norm": 0.5625, "learning_rate": 7.069661716226076e-07, "loss": 0.2336064875125885, "step": 3573, "token_acc": 0.9268774703557312 }, { "epoch": 0.7540084388185654, "grad_norm": 0.7109375, "learning_rate": 7.068122407482342e-07, "loss": 0.278248131275177, "step": 3574, "token_acc": 0.9274220032840722 }, { "epoch": 0.7542194092827004, "grad_norm": 0.8828125, "learning_rate": 7.066582862231305e-07, "loss": 0.24679693579673767, "step": 3575, "token_acc": 0.9312262218320164 }, { "epoch": 0.7544303797468355, "grad_norm": 0.8046875, "learning_rate": 7.065043080649027e-07, "loss": 0.30121713876724243, "step": 3576, "token_acc": 0.9174441617189709 }, { "epoch": 0.7546413502109705, "grad_norm": 0.78125, "learning_rate": 7.063503062911594e-07, "loss": 0.26413196325302124, "step": 3577, "token_acc": 0.9267580974613364 }, { "epoch": 0.7548523206751054, "grad_norm": 0.765625, "learning_rate": 7.061962809195122e-07, "loss": 0.2629912793636322, "step": 3578, "token_acc": 0.9266568483063329 }, { "epoch": 0.7550632911392405, "grad_norm": 0.9453125, "learning_rate": 7.060422319675749e-07, "loss": 0.2774139642715454, "step": 3579, "token_acc": 0.927485380116959 }, { "epoch": 0.7552742616033755, "grad_norm": 0.6328125, "learning_rate": 7.058881594529643e-07, "loss": 0.23301386833190918, "step": 3580, "token_acc": 0.9320872274143303 }, { "epoch": 0.7554852320675105, "grad_norm": 0.7421875, "learning_rate": 7.057340633933003e-07, "loss": 0.26888787746429443, "step": 3581, "token_acc": 0.9204260651629073 }, { "epoch": 0.7556962025316456, "grad_norm": 0.82421875, "learning_rate": 7.055799438062047e-07, "loss": 0.2563207447528839, "step": 3582, "token_acc": 0.9224785407725322 }, { "epoch": 0.7559071729957806, "grad_norm": 0.62890625, "learning_rate": 7.054258007093026e-07, "loss": 0.2346584051847458, "step": 3583, "token_acc": 0.935052827843381 }, { "epoch": 0.7561181434599156, "grad_norm": 0.6796875, "learning_rate": 7.052716341202214e-07, "loss": 0.2905561625957489, "step": 3584, "token_acc": 0.9212417361310722 }, { "epoch": 0.7563291139240507, "grad_norm": 0.80078125, "learning_rate": 7.051174440565915e-07, "loss": 0.25182196497917175, "step": 3585, "token_acc": 0.923332327195895 }, { "epoch": 0.7565400843881857, "grad_norm": 0.60546875, "learning_rate": 7.049632305360458e-07, "loss": 0.21153762936592102, "step": 3586, "token_acc": 0.9364291035510306 }, { "epoch": 0.7567510548523206, "grad_norm": 0.7578125, "learning_rate": 7.048089935762198e-07, "loss": 0.2465967983007431, "step": 3587, "token_acc": 0.9290181363352095 }, { "epoch": 0.7569620253164557, "grad_norm": 0.8671875, "learning_rate": 7.046547331947518e-07, "loss": 0.32247695326805115, "step": 3588, "token_acc": 0.9171901770416905 }, { "epoch": 0.7571729957805907, "grad_norm": 0.92578125, "learning_rate": 7.04500449409283e-07, "loss": 0.231221541762352, "step": 3589, "token_acc": 0.9297281665702718 }, { "epoch": 0.7573839662447257, "grad_norm": 0.984375, "learning_rate": 7.043461422374571e-07, "loss": 0.25836244225502014, "step": 3590, "token_acc": 0.9286784537009289 }, { "epoch": 0.7575949367088608, "grad_norm": 0.7109375, "learning_rate": 7.041918116969199e-07, "loss": 0.2843126654624939, "step": 3591, "token_acc": 0.9241316270566727 }, { "epoch": 0.7578059071729958, "grad_norm": 0.87890625, "learning_rate": 7.04037457805321e-07, "loss": 0.28008750081062317, "step": 3592, "token_acc": 0.9181768259198243 }, { "epoch": 0.7580168776371308, "grad_norm": 1.1875, "learning_rate": 7.038830805803119e-07, "loss": 0.2706752419471741, "step": 3593, "token_acc": 0.9221065909807632 }, { "epoch": 0.7582278481012659, "grad_norm": 0.91796875, "learning_rate": 7.037286800395468e-07, "loss": 0.25725018978118896, "step": 3594, "token_acc": 0.9288178112260896 }, { "epoch": 0.7584388185654009, "grad_norm": 0.625, "learning_rate": 7.035742562006827e-07, "loss": 0.21777284145355225, "step": 3595, "token_acc": 0.9417696811971373 }, { "epoch": 0.7586497890295358, "grad_norm": 0.66796875, "learning_rate": 7.034198090813794e-07, "loss": 0.26400330662727356, "step": 3596, "token_acc": 0.9276393831553974 }, { "epoch": 0.7588607594936709, "grad_norm": 0.67578125, "learning_rate": 7.032653386992992e-07, "loss": 0.20463129878044128, "step": 3597, "token_acc": 0.9382901866345575 }, { "epoch": 0.7590717299578059, "grad_norm": 0.80078125, "learning_rate": 7.031108450721072e-07, "loss": 0.23509961366653442, "step": 3598, "token_acc": 0.9291815927252687 }, { "epoch": 0.7592827004219409, "grad_norm": 0.70703125, "learning_rate": 7.029563282174709e-07, "loss": 0.27354031801223755, "step": 3599, "token_acc": 0.9278163696768348 }, { "epoch": 0.759493670886076, "grad_norm": 0.734375, "learning_rate": 7.028017881530606e-07, "loss": 0.2453666776418686, "step": 3600, "token_acc": 0.9301139772045591 }, { "epoch": 0.759493670886076, "eval_loss": 0.43366512656211853, "eval_runtime": 245.6513, "eval_samples_per_second": 137.207, "eval_steps_per_second": 2.145, "eval_token_acc": 0.8991407161446648, "step": 3600 }, { "epoch": 0.759704641350211, "grad_norm": 0.67578125, "learning_rate": 7.026472248965494e-07, "loss": 0.2591482996940613, "step": 3601, "token_acc": 0.9343163538873994 }, { "epoch": 0.759915611814346, "grad_norm": 0.82421875, "learning_rate": 7.024926384656129e-07, "loss": 0.2736390233039856, "step": 3602, "token_acc": 0.9346564885496184 }, { "epoch": 0.7601265822784811, "grad_norm": 0.64453125, "learning_rate": 7.023380288779291e-07, "loss": 0.2669934034347534, "step": 3603, "token_acc": 0.9282339043118725 }, { "epoch": 0.760337552742616, "grad_norm": 0.71484375, "learning_rate": 7.021833961511791e-07, "loss": 0.23906216025352478, "step": 3604, "token_acc": 0.9384478144513827 }, { "epoch": 0.760548523206751, "grad_norm": 0.75390625, "learning_rate": 7.020287403030468e-07, "loss": 0.2721901834011078, "step": 3605, "token_acc": 0.9253154777234097 }, { "epoch": 0.760759493670886, "grad_norm": 0.63671875, "learning_rate": 7.018740613512179e-07, "loss": 0.20786064863204956, "step": 3606, "token_acc": 0.9367973260407171 }, { "epoch": 0.7609704641350211, "grad_norm": 0.6640625, "learning_rate": 7.017193593133815e-07, "loss": 0.23600813746452332, "step": 3607, "token_acc": 0.9306303452644916 }, { "epoch": 0.7611814345991561, "grad_norm": 0.70703125, "learning_rate": 7.015646342072289e-07, "loss": 0.24893473088741302, "step": 3608, "token_acc": 0.9349881796690307 }, { "epoch": 0.7613924050632911, "grad_norm": 0.77734375, "learning_rate": 7.014098860504545e-07, "loss": 0.2684192359447479, "step": 3609, "token_acc": 0.9240299967394848 }, { "epoch": 0.7616033755274262, "grad_norm": 0.765625, "learning_rate": 7.012551148607549e-07, "loss": 0.2189260870218277, "step": 3610, "token_acc": 0.9325600215807931 }, { "epoch": 0.7618143459915612, "grad_norm": 0.8515625, "learning_rate": 7.011003206558293e-07, "loss": 0.25713759660720825, "step": 3611, "token_acc": 0.9258224705152079 }, { "epoch": 0.7620253164556962, "grad_norm": 0.75390625, "learning_rate": 7.009455034533801e-07, "loss": 0.282659113407135, "step": 3612, "token_acc": 0.926094890510949 }, { "epoch": 0.7622362869198313, "grad_norm": 0.640625, "learning_rate": 7.007906632711118e-07, "loss": 0.23984509706497192, "step": 3613, "token_acc": 0.9296123178443773 }, { "epoch": 0.7624472573839662, "grad_norm": 1.0234375, "learning_rate": 7.006358001267317e-07, "loss": 0.263349711894989, "step": 3614, "token_acc": 0.9250379362670713 }, { "epoch": 0.7626582278481012, "grad_norm": 0.66796875, "learning_rate": 7.004809140379495e-07, "loss": 0.27596715092658997, "step": 3615, "token_acc": 0.9216757741347905 }, { "epoch": 0.7628691983122363, "grad_norm": 0.84375, "learning_rate": 7.003260050224779e-07, "loss": 0.28400570154190063, "step": 3616, "token_acc": 0.9237312163311596 }, { "epoch": 0.7630801687763713, "grad_norm": 0.73828125, "learning_rate": 7.001710730980325e-07, "loss": 0.2688075304031372, "step": 3617, "token_acc": 0.9250824093497153 }, { "epoch": 0.7632911392405063, "grad_norm": 0.78125, "learning_rate": 7.000161182823304e-07, "loss": 0.2644561529159546, "step": 3618, "token_acc": 0.9248488338612151 }, { "epoch": 0.7635021097046414, "grad_norm": 0.79296875, "learning_rate": 6.998611405930921e-07, "loss": 0.28558480739593506, "step": 3619, "token_acc": 0.9211788896504455 }, { "epoch": 0.7637130801687764, "grad_norm": 0.703125, "learning_rate": 6.997061400480408e-07, "loss": 0.28770703077316284, "step": 3620, "token_acc": 0.924766777008727 }, { "epoch": 0.7639240506329114, "grad_norm": 0.7578125, "learning_rate": 6.995511166649023e-07, "loss": 0.2408161610364914, "step": 3621, "token_acc": 0.9320719602977667 }, { "epoch": 0.7641350210970465, "grad_norm": 0.62109375, "learning_rate": 6.993960704614045e-07, "loss": 0.2704365849494934, "step": 3622, "token_acc": 0.9290216497670595 }, { "epoch": 0.7643459915611814, "grad_norm": 0.828125, "learning_rate": 6.992410014552783e-07, "loss": 0.24723555147647858, "step": 3623, "token_acc": 0.9284712482468443 }, { "epoch": 0.7645569620253164, "grad_norm": 0.7734375, "learning_rate": 6.990859096642574e-07, "loss": 0.307455837726593, "step": 3624, "token_acc": 0.9201006605850897 }, { "epoch": 0.7647679324894515, "grad_norm": 0.73828125, "learning_rate": 6.989307951060775e-07, "loss": 0.3087320327758789, "step": 3625, "token_acc": 0.9176661264181524 }, { "epoch": 0.7649789029535865, "grad_norm": 0.765625, "learning_rate": 6.987756577984776e-07, "loss": 0.23372602462768555, "step": 3626, "token_acc": 0.9351211072664359 }, { "epoch": 0.7651898734177215, "grad_norm": 0.5625, "learning_rate": 6.986204977591987e-07, "loss": 0.23592683672904968, "step": 3627, "token_acc": 0.9317174876205369 }, { "epoch": 0.7654008438818566, "grad_norm": 0.59765625, "learning_rate": 6.984653150059847e-07, "loss": 0.2503807842731476, "step": 3628, "token_acc": 0.9283987915407855 }, { "epoch": 0.7656118143459916, "grad_norm": 0.7578125, "learning_rate": 6.983101095565825e-07, "loss": 0.26628175377845764, "step": 3629, "token_acc": 0.9256174612291787 }, { "epoch": 0.7658227848101266, "grad_norm": 0.6640625, "learning_rate": 6.981548814287404e-07, "loss": 0.28135985136032104, "step": 3630, "token_acc": 0.9161997563946407 }, { "epoch": 0.7660337552742617, "grad_norm": 0.6328125, "learning_rate": 6.979996306402104e-07, "loss": 0.24709215760231018, "step": 3631, "token_acc": 0.9287979244739117 }, { "epoch": 0.7662447257383966, "grad_norm": 0.625, "learning_rate": 6.97844357208747e-07, "loss": 0.2622273564338684, "step": 3632, "token_acc": 0.9240862230552952 }, { "epoch": 0.7664556962025316, "grad_norm": 1.2890625, "learning_rate": 6.976890611521068e-07, "loss": 0.25957703590393066, "step": 3633, "token_acc": 0.9301219149568838 }, { "epoch": 0.7666666666666667, "grad_norm": 0.82421875, "learning_rate": 6.975337424880492e-07, "loss": 0.2642722725868225, "step": 3634, "token_acc": 0.9256304683479156 }, { "epoch": 0.7668776371308017, "grad_norm": 0.84375, "learning_rate": 6.973784012343362e-07, "loss": 0.3275147080421448, "step": 3635, "token_acc": 0.906888102368636 }, { "epoch": 0.7670886075949367, "grad_norm": 0.72265625, "learning_rate": 6.972230374087324e-07, "loss": 0.23659901320934296, "step": 3636, "token_acc": 0.930314613349015 }, { "epoch": 0.7672995780590718, "grad_norm": 0.6640625, "learning_rate": 6.970676510290051e-07, "loss": 0.2730189561843872, "step": 3637, "token_acc": 0.9282241921647126 }, { "epoch": 0.7675105485232068, "grad_norm": 0.87890625, "learning_rate": 6.969122421129239e-07, "loss": 0.2830978035926819, "step": 3638, "token_acc": 0.9242331288343558 }, { "epoch": 0.7677215189873418, "grad_norm": 0.78515625, "learning_rate": 6.967568106782611e-07, "loss": 0.24627569317817688, "step": 3639, "token_acc": 0.929819770617149 }, { "epoch": 0.7679324894514767, "grad_norm": 0.65234375, "learning_rate": 6.966013567427916e-07, "loss": 0.2465059459209442, "step": 3640, "token_acc": 0.9306728808622197 }, { "epoch": 0.7681434599156118, "grad_norm": 0.609375, "learning_rate": 6.964458803242932e-07, "loss": 0.24307966232299805, "step": 3641, "token_acc": 0.9302820649281532 }, { "epoch": 0.7683544303797468, "grad_norm": 0.70703125, "learning_rate": 6.962903814405454e-07, "loss": 0.2552075982093811, "step": 3642, "token_acc": 0.9285506825442927 }, { "epoch": 0.7685654008438818, "grad_norm": 0.75390625, "learning_rate": 6.961348601093311e-07, "loss": 0.290314644575119, "step": 3643, "token_acc": 0.9199160167966407 }, { "epoch": 0.7687763713080169, "grad_norm": 0.78125, "learning_rate": 6.959793163484356e-07, "loss": 0.298772931098938, "step": 3644, "token_acc": 0.917153183208229 }, { "epoch": 0.7689873417721519, "grad_norm": 0.66015625, "learning_rate": 6.958237501756465e-07, "loss": 0.22932755947113037, "step": 3645, "token_acc": 0.9371993586317477 }, { "epoch": 0.7691983122362869, "grad_norm": 0.5625, "learning_rate": 6.956681616087542e-07, "loss": 0.2145199179649353, "step": 3646, "token_acc": 0.934135009565455 }, { "epoch": 0.769409282700422, "grad_norm": 0.6640625, "learning_rate": 6.955125506655513e-07, "loss": 0.295622855424881, "step": 3647, "token_acc": 0.9203589455973079 }, { "epoch": 0.769620253164557, "grad_norm": 0.71875, "learning_rate": 6.953569173638336e-07, "loss": 0.2703492045402527, "step": 3648, "token_acc": 0.9267840593141798 }, { "epoch": 0.7698312236286919, "grad_norm": 0.60546875, "learning_rate": 6.95201261721399e-07, "loss": 0.22849828004837036, "step": 3649, "token_acc": 0.9332378223495702 }, { "epoch": 0.770042194092827, "grad_norm": 0.7578125, "learning_rate": 6.950455837560477e-07, "loss": 0.24727970361709595, "step": 3650, "token_acc": 0.9298298906439855 }, { "epoch": 0.770253164556962, "grad_norm": 0.74609375, "learning_rate": 6.948898834855832e-07, "loss": 0.2926967144012451, "step": 3651, "token_acc": 0.9221418234442836 }, { "epoch": 0.770464135021097, "grad_norm": 0.83984375, "learning_rate": 6.947341609278109e-07, "loss": 0.23877976834774017, "step": 3652, "token_acc": 0.9263373282665888 }, { "epoch": 0.7706751054852321, "grad_norm": 0.734375, "learning_rate": 6.945784161005391e-07, "loss": 0.2415345460176468, "step": 3653, "token_acc": 0.931095406360424 }, { "epoch": 0.7708860759493671, "grad_norm": 0.734375, "learning_rate": 6.944226490215786e-07, "loss": 0.29380980134010315, "step": 3654, "token_acc": 0.9279084158415841 }, { "epoch": 0.7710970464135021, "grad_norm": 1.234375, "learning_rate": 6.942668597087425e-07, "loss": 0.2600228190422058, "step": 3655, "token_acc": 0.9252130877096508 }, { "epoch": 0.7713080168776372, "grad_norm": 0.67578125, "learning_rate": 6.941110481798469e-07, "loss": 0.21745869517326355, "step": 3656, "token_acc": 0.9345714285714286 }, { "epoch": 0.7715189873417722, "grad_norm": 0.80859375, "learning_rate": 6.939552144527098e-07, "loss": 0.22252723574638367, "step": 3657, "token_acc": 0.9347447795823666 }, { "epoch": 0.7717299578059071, "grad_norm": 0.890625, "learning_rate": 6.937993585451523e-07, "loss": 0.2583829462528229, "step": 3658, "token_acc": 0.9348329474032419 }, { "epoch": 0.7719409282700422, "grad_norm": 0.69140625, "learning_rate": 6.93643480474998e-07, "loss": 0.282559871673584, "step": 3659, "token_acc": 0.9236476043276661 }, { "epoch": 0.7721518987341772, "grad_norm": 0.66796875, "learning_rate": 6.934875802600726e-07, "loss": 0.27186816930770874, "step": 3660, "token_acc": 0.9252309468822171 }, { "epoch": 0.7723628691983122, "grad_norm": 1.0078125, "learning_rate": 6.93331657918205e-07, "loss": 0.22208011150360107, "step": 3661, "token_acc": 0.9354427149783108 }, { "epoch": 0.7725738396624473, "grad_norm": 0.85546875, "learning_rate": 6.931757134672256e-07, "loss": 0.27613455057144165, "step": 3662, "token_acc": 0.9187145557655955 }, { "epoch": 0.7727848101265823, "grad_norm": 0.7734375, "learning_rate": 6.930197469249686e-07, "loss": 0.29514437913894653, "step": 3663, "token_acc": 0.9176677409903311 }, { "epoch": 0.7729957805907173, "grad_norm": 0.76953125, "learning_rate": 6.928637583092696e-07, "loss": 0.2356572449207306, "step": 3664, "token_acc": 0.930993099309931 }, { "epoch": 0.7732067510548524, "grad_norm": 0.76953125, "learning_rate": 6.927077476379675e-07, "loss": 0.28277188539505005, "step": 3665, "token_acc": 0.919921875 }, { "epoch": 0.7734177215189874, "grad_norm": 0.828125, "learning_rate": 6.925517149289036e-07, "loss": 0.24720638990402222, "step": 3666, "token_acc": 0.927591706539075 }, { "epoch": 0.7736286919831223, "grad_norm": 0.5703125, "learning_rate": 6.923956601999211e-07, "loss": 0.24515727162361145, "step": 3667, "token_acc": 0.9275599128540305 }, { "epoch": 0.7738396624472574, "grad_norm": 0.99609375, "learning_rate": 6.922395834688667e-07, "loss": 0.33189499378204346, "step": 3668, "token_acc": 0.9139679424272162 }, { "epoch": 0.7740506329113924, "grad_norm": 0.703125, "learning_rate": 6.920834847535887e-07, "loss": 0.27018362283706665, "step": 3669, "token_acc": 0.929162072767365 }, { "epoch": 0.7742616033755274, "grad_norm": 0.7421875, "learning_rate": 6.919273640719383e-07, "loss": 0.2942999303340912, "step": 3670, "token_acc": 0.9221336603310852 }, { "epoch": 0.7744725738396624, "grad_norm": 0.6484375, "learning_rate": 6.917712214417695e-07, "loss": 0.2464093267917633, "step": 3671, "token_acc": 0.9295774647887324 }, { "epoch": 0.7746835443037975, "grad_norm": 0.55078125, "learning_rate": 6.916150568809384e-07, "loss": 0.21692651510238647, "step": 3672, "token_acc": 0.9383410814778567 }, { "epoch": 0.7748945147679325, "grad_norm": 0.84765625, "learning_rate": 6.914588704073038e-07, "loss": 0.29441776871681213, "step": 3673, "token_acc": 0.9213418160786582 }, { "epoch": 0.7751054852320675, "grad_norm": 0.6796875, "learning_rate": 6.913026620387265e-07, "loss": 0.2425011694431305, "step": 3674, "token_acc": 0.9298490776970374 }, { "epoch": 0.7753164556962026, "grad_norm": 0.58984375, "learning_rate": 6.91146431793071e-07, "loss": 0.22949694097042084, "step": 3675, "token_acc": 0.9349736379613357 }, { "epoch": 0.7755274261603375, "grad_norm": 0.74609375, "learning_rate": 6.909901796882031e-07, "loss": 0.29254043102264404, "step": 3676, "token_acc": 0.9219556635286973 }, { "epoch": 0.7757383966244725, "grad_norm": 0.625, "learning_rate": 6.908339057419916e-07, "loss": 0.24154436588287354, "step": 3677, "token_acc": 0.9288924134284959 }, { "epoch": 0.7759493670886076, "grad_norm": 0.70703125, "learning_rate": 6.906776099723077e-07, "loss": 0.24134990572929382, "step": 3678, "token_acc": 0.9338391502276177 }, { "epoch": 0.7761603375527426, "grad_norm": 0.6328125, "learning_rate": 6.905212923970254e-07, "loss": 0.25612080097198486, "step": 3679, "token_acc": 0.9270534175761057 }, { "epoch": 0.7763713080168776, "grad_norm": 0.875, "learning_rate": 6.903649530340207e-07, "loss": 0.25917917490005493, "step": 3680, "token_acc": 0.9305734121882232 }, { "epoch": 0.7765822784810127, "grad_norm": 0.66796875, "learning_rate": 6.902085919011722e-07, "loss": 0.24981045722961426, "step": 3681, "token_acc": 0.9256567915036333 }, { "epoch": 0.7767932489451477, "grad_norm": 0.625, "learning_rate": 6.900522090163614e-07, "loss": 0.2547079920768738, "step": 3682, "token_acc": 0.9300958861738323 }, { "epoch": 0.7770042194092827, "grad_norm": 0.6875, "learning_rate": 6.89895804397472e-07, "loss": 0.258309543132782, "step": 3683, "token_acc": 0.9258748966657482 }, { "epoch": 0.7772151898734178, "grad_norm": 0.72265625, "learning_rate": 6.897393780623901e-07, "loss": 0.29223310947418213, "step": 3684, "token_acc": 0.9189650270116576 }, { "epoch": 0.7774261603375527, "grad_norm": 0.828125, "learning_rate": 6.895829300290044e-07, "loss": 0.24945858120918274, "step": 3685, "token_acc": 0.9299905392620624 }, { "epoch": 0.7776371308016877, "grad_norm": 0.67578125, "learning_rate": 6.894264603152058e-07, "loss": 0.2516825795173645, "step": 3686, "token_acc": 0.9305816135084428 }, { "epoch": 0.7778481012658228, "grad_norm": 0.7890625, "learning_rate": 6.892699689388882e-07, "loss": 0.2643257975578308, "step": 3687, "token_acc": 0.9284436493738819 }, { "epoch": 0.7780590717299578, "grad_norm": 0.828125, "learning_rate": 6.891134559179479e-07, "loss": 0.2585337162017822, "step": 3688, "token_acc": 0.9275722723033882 }, { "epoch": 0.7782700421940928, "grad_norm": 0.69140625, "learning_rate": 6.889569212702832e-07, "loss": 0.2545027732849121, "step": 3689, "token_acc": 0.9330612244897959 }, { "epoch": 0.7784810126582279, "grad_norm": 0.83984375, "learning_rate": 6.888003650137952e-07, "loss": 0.2585119605064392, "step": 3690, "token_acc": 0.9273984442523768 }, { "epoch": 0.7786919831223629, "grad_norm": 0.83984375, "learning_rate": 6.886437871663875e-07, "loss": 0.2718811631202698, "step": 3691, "token_acc": 0.9168405365126676 }, { "epoch": 0.7789029535864979, "grad_norm": 1.046875, "learning_rate": 6.884871877459659e-07, "loss": 0.26976239681243896, "step": 3692, "token_acc": 0.9228754365541327 }, { "epoch": 0.779113924050633, "grad_norm": 0.7578125, "learning_rate": 6.883305667704393e-07, "loss": 0.2598419785499573, "step": 3693, "token_acc": 0.9351763584366063 }, { "epoch": 0.7793248945147679, "grad_norm": 0.80078125, "learning_rate": 6.881739242577182e-07, "loss": 0.2603619694709778, "step": 3694, "token_acc": 0.9280426413976902 }, { "epoch": 0.7795358649789029, "grad_norm": 0.99609375, "learning_rate": 6.880172602257163e-07, "loss": 0.3399237096309662, "step": 3695, "token_acc": 0.9132574592433097 }, { "epoch": 0.779746835443038, "grad_norm": 0.58984375, "learning_rate": 6.878605746923493e-07, "loss": 0.23321382701396942, "step": 3696, "token_acc": 0.9413333333333334 }, { "epoch": 0.779957805907173, "grad_norm": 0.71484375, "learning_rate": 6.877038676755356e-07, "loss": 0.29135560989379883, "step": 3697, "token_acc": 0.9198230577826928 }, { "epoch": 0.780168776371308, "grad_norm": 0.6875, "learning_rate": 6.875471391931956e-07, "loss": 0.2538648843765259, "step": 3698, "token_acc": 0.9273529411764706 }, { "epoch": 0.7803797468354431, "grad_norm": 0.734375, "learning_rate": 6.873903892632531e-07, "loss": 0.23047593235969543, "step": 3699, "token_acc": 0.9357509994288977 }, { "epoch": 0.7805907172995781, "grad_norm": 0.73828125, "learning_rate": 6.872336179036335e-07, "loss": 0.25847840309143066, "step": 3700, "token_acc": 0.9268596327419857 }, { "epoch": 0.7808016877637131, "grad_norm": 0.6484375, "learning_rate": 6.87076825132265e-07, "loss": 0.254151850938797, "step": 3701, "token_acc": 0.9283555018137848 }, { "epoch": 0.7810126582278482, "grad_norm": 0.5859375, "learning_rate": 6.86920010967078e-07, "loss": 0.25956016778945923, "step": 3702, "token_acc": 0.9284578696343402 }, { "epoch": 0.7812236286919831, "grad_norm": 1.03125, "learning_rate": 6.867631754260058e-07, "loss": 0.2644844651222229, "step": 3703, "token_acc": 0.9262440758293838 }, { "epoch": 0.7814345991561181, "grad_norm": 1.0546875, "learning_rate": 6.866063185269837e-07, "loss": 0.25173264741897583, "step": 3704, "token_acc": 0.9312344656172328 }, { "epoch": 0.7816455696202531, "grad_norm": 0.73828125, "learning_rate": 6.864494402879497e-07, "loss": 0.27664461731910706, "step": 3705, "token_acc": 0.9237738206810372 }, { "epoch": 0.7818565400843882, "grad_norm": 0.73828125, "learning_rate": 6.862925407268441e-07, "loss": 0.22745180130004883, "step": 3706, "token_acc": 0.9310443490701001 }, { "epoch": 0.7820675105485232, "grad_norm": 0.8046875, "learning_rate": 6.861356198616098e-07, "loss": 0.31877458095550537, "step": 3707, "token_acc": 0.9229214780600462 }, { "epoch": 0.7822784810126582, "grad_norm": 0.62109375, "learning_rate": 6.859786777101917e-07, "loss": 0.2266487032175064, "step": 3708, "token_acc": 0.937035937035937 }, { "epoch": 0.7824894514767933, "grad_norm": 0.65625, "learning_rate": 6.858217142905377e-07, "loss": 0.2469361126422882, "step": 3709, "token_acc": 0.9355296790684464 }, { "epoch": 0.7827004219409283, "grad_norm": 0.828125, "learning_rate": 6.85664729620598e-07, "loss": 0.26468029618263245, "step": 3710, "token_acc": 0.9284158107687519 }, { "epoch": 0.7829113924050632, "grad_norm": 0.609375, "learning_rate": 6.855077237183249e-07, "loss": 0.2680419683456421, "step": 3711, "token_acc": 0.9326779862972893 }, { "epoch": 0.7831223628691983, "grad_norm": 0.58203125, "learning_rate": 6.853506966016737e-07, "loss": 0.24280305206775665, "step": 3712, "token_acc": 0.9312252964426877 }, { "epoch": 0.7833333333333333, "grad_norm": 0.81640625, "learning_rate": 6.851936482886013e-07, "loss": 0.22377482056617737, "step": 3713, "token_acc": 0.9360879904875149 }, { "epoch": 0.7835443037974683, "grad_norm": 0.6328125, "learning_rate": 6.850365787970679e-07, "loss": 0.2574831247329712, "step": 3714, "token_acc": 0.9239263803680982 }, { "epoch": 0.7837552742616034, "grad_norm": 1.4296875, "learning_rate": 6.848794881450355e-07, "loss": 0.27529558539390564, "step": 3715, "token_acc": 0.9279279279279279 }, { "epoch": 0.7839662447257384, "grad_norm": 0.75390625, "learning_rate": 6.847223763504688e-07, "loss": 0.2677040100097656, "step": 3716, "token_acc": 0.9243645381277124 }, { "epoch": 0.7841772151898734, "grad_norm": 0.9296875, "learning_rate": 6.84565243431335e-07, "loss": 0.2547934055328369, "step": 3717, "token_acc": 0.934720908230842 }, { "epoch": 0.7843881856540085, "grad_norm": 0.65234375, "learning_rate": 6.844080894056034e-07, "loss": 0.2479507476091385, "step": 3718, "token_acc": 0.9263784461152882 }, { "epoch": 0.7845991561181435, "grad_norm": 0.83984375, "learning_rate": 6.84250914291246e-07, "loss": 0.3337711691856384, "step": 3719, "token_acc": 0.9096079018258006 }, { "epoch": 0.7848101265822784, "grad_norm": 0.80859375, "learning_rate": 6.84093718106237e-07, "loss": 0.2677861154079437, "step": 3720, "token_acc": 0.9250606796116505 }, { "epoch": 0.7850210970464135, "grad_norm": 0.7578125, "learning_rate": 6.839365008685533e-07, "loss": 0.27862823009490967, "step": 3721, "token_acc": 0.9265269991147831 }, { "epoch": 0.7852320675105485, "grad_norm": 0.76953125, "learning_rate": 6.837792625961738e-07, "loss": 0.24211342632770538, "step": 3722, "token_acc": 0.9306846999154691 }, { "epoch": 0.7854430379746835, "grad_norm": 0.68359375, "learning_rate": 6.836220033070801e-07, "loss": 0.23772498965263367, "step": 3723, "token_acc": 0.9348281016442451 }, { "epoch": 0.7856540084388186, "grad_norm": 0.7734375, "learning_rate": 6.834647230192563e-07, "loss": 0.2653692662715912, "step": 3724, "token_acc": 0.9205181042095967 }, { "epoch": 0.7858649789029536, "grad_norm": 0.94921875, "learning_rate": 6.833074217506885e-07, "loss": 0.29411494731903076, "step": 3725, "token_acc": 0.9190246839253462 }, { "epoch": 0.7860759493670886, "grad_norm": 1.3671875, "learning_rate": 6.831500995193654e-07, "loss": 0.2011893093585968, "step": 3726, "token_acc": 0.9432314410480349 }, { "epoch": 0.7862869198312237, "grad_norm": 0.72265625, "learning_rate": 6.829927563432784e-07, "loss": 0.277072012424469, "step": 3727, "token_acc": 0.9289354473386183 }, { "epoch": 0.7864978902953587, "grad_norm": 0.6953125, "learning_rate": 6.828353922404208e-07, "loss": 0.23608897626399994, "step": 3728, "token_acc": 0.9289171203871749 }, { "epoch": 0.7867088607594936, "grad_norm": 0.68359375, "learning_rate": 6.826780072287886e-07, "loss": 0.2978522777557373, "step": 3729, "token_acc": 0.9211718514044095 }, { "epoch": 0.7869198312236287, "grad_norm": 0.75, "learning_rate": 6.8252060132638e-07, "loss": 0.2764698266983032, "step": 3730, "token_acc": 0.9259259259259259 }, { "epoch": 0.7871308016877637, "grad_norm": 0.80078125, "learning_rate": 6.82363174551196e-07, "loss": 0.2706848382949829, "step": 3731, "token_acc": 0.9229039511725025 }, { "epoch": 0.7873417721518987, "grad_norm": 1.2578125, "learning_rate": 6.822057269212394e-07, "loss": 0.23842565715312958, "step": 3732, "token_acc": 0.9342105263157895 }, { "epoch": 0.7875527426160338, "grad_norm": 0.765625, "learning_rate": 6.820482584545158e-07, "loss": 0.308233380317688, "step": 3733, "token_acc": 0.914965034965035 }, { "epoch": 0.7877637130801688, "grad_norm": 0.6640625, "learning_rate": 6.81890769169033e-07, "loss": 0.32188481092453003, "step": 3734, "token_acc": 0.9183273440052271 }, { "epoch": 0.7879746835443038, "grad_norm": 0.74609375, "learning_rate": 6.817332590828011e-07, "loss": 0.24351143836975098, "step": 3735, "token_acc": 0.9289340101522843 }, { "epoch": 0.7881856540084389, "grad_norm": 0.65234375, "learning_rate": 6.81575728213833e-07, "loss": 0.2406502217054367, "step": 3736, "token_acc": 0.9296083264858943 }, { "epoch": 0.7883966244725739, "grad_norm": 0.7265625, "learning_rate": 6.814181765801434e-07, "loss": 0.25740504264831543, "step": 3737, "token_acc": 0.9356691556576681 }, { "epoch": 0.7886075949367088, "grad_norm": 0.87890625, "learning_rate": 6.812606041997497e-07, "loss": 0.30852043628692627, "step": 3738, "token_acc": 0.9169415292353823 }, { "epoch": 0.7888185654008438, "grad_norm": 0.69140625, "learning_rate": 6.811030110906721e-07, "loss": 0.24373894929885864, "step": 3739, "token_acc": 0.9300859598853868 }, { "epoch": 0.7890295358649789, "grad_norm": 0.7265625, "learning_rate": 6.80945397270932e-07, "loss": 0.26416531205177307, "step": 3740, "token_acc": 0.9294306335204491 }, { "epoch": 0.7892405063291139, "grad_norm": 0.70703125, "learning_rate": 6.807877627585544e-07, "loss": 0.2439078539609909, "step": 3741, "token_acc": 0.9268617021276596 }, { "epoch": 0.7894514767932489, "grad_norm": 0.6484375, "learning_rate": 6.806301075715659e-07, "loss": 0.22207039594650269, "step": 3742, "token_acc": 0.9373143196672609 }, { "epoch": 0.789662447257384, "grad_norm": 0.64453125, "learning_rate": 6.804724317279957e-07, "loss": 0.24652144312858582, "step": 3743, "token_acc": 0.9312638580931264 }, { "epoch": 0.789873417721519, "grad_norm": 0.66796875, "learning_rate": 6.803147352458756e-07, "loss": 0.23358792066574097, "step": 3744, "token_acc": 0.931804465902233 }, { "epoch": 0.790084388185654, "grad_norm": 0.6875, "learning_rate": 6.801570181432391e-07, "loss": 0.2671646177768707, "step": 3745, "token_acc": 0.9274924471299094 }, { "epoch": 0.7902953586497891, "grad_norm": 0.68359375, "learning_rate": 6.799992804381229e-07, "loss": 0.21967467665672302, "step": 3746, "token_acc": 0.9336861768368617 }, { "epoch": 0.790506329113924, "grad_norm": 0.78125, "learning_rate": 6.798415221485654e-07, "loss": 0.26587140560150146, "step": 3747, "token_acc": 0.9269366197183099 }, { "epoch": 0.790717299578059, "grad_norm": 0.6328125, "learning_rate": 6.796837432926078e-07, "loss": 0.24114859104156494, "step": 3748, "token_acc": 0.9327935222672065 }, { "epoch": 0.7909282700421941, "grad_norm": 0.76171875, "learning_rate": 6.795259438882932e-07, "loss": 0.2353094220161438, "step": 3749, "token_acc": 0.9295232454841575 }, { "epoch": 0.7911392405063291, "grad_norm": 0.7421875, "learning_rate": 6.793681239536675e-07, "loss": 0.26283562183380127, "step": 3750, "token_acc": 0.9245469522240527 }, { "epoch": 0.7913502109704641, "grad_norm": 1.71875, "learning_rate": 6.792102835067788e-07, "loss": 0.3247092664241791, "step": 3751, "token_acc": 0.9168437025796662 }, { "epoch": 0.7915611814345992, "grad_norm": 0.77734375, "learning_rate": 6.790524225656771e-07, "loss": 0.31272995471954346, "step": 3752, "token_acc": 0.9183922046285018 }, { "epoch": 0.7917721518987342, "grad_norm": 0.578125, "learning_rate": 6.788945411484155e-07, "loss": 0.2380940169095993, "step": 3753, "token_acc": 0.9293862031504617 }, { "epoch": 0.7919831223628692, "grad_norm": 1.125, "learning_rate": 6.787366392730489e-07, "loss": 0.2898922562599182, "step": 3754, "token_acc": 0.9248507867607162 }, { "epoch": 0.7921940928270043, "grad_norm": 0.60546875, "learning_rate": 6.785787169576348e-07, "loss": 0.20603176951408386, "step": 3755, "token_acc": 0.9449512425290972 }, { "epoch": 0.7924050632911392, "grad_norm": 0.80078125, "learning_rate": 6.784207742202331e-07, "loss": 0.29567432403564453, "step": 3756, "token_acc": 0.9191220685508118 }, { "epoch": 0.7926160337552742, "grad_norm": 0.640625, "learning_rate": 6.782628110789055e-07, "loss": 0.22451366484165192, "step": 3757, "token_acc": 0.9361590790162219 }, { "epoch": 0.7928270042194093, "grad_norm": 0.7265625, "learning_rate": 6.781048275517169e-07, "loss": 0.24609267711639404, "step": 3758, "token_acc": 0.9333952912019826 }, { "epoch": 0.7930379746835443, "grad_norm": 0.8515625, "learning_rate": 6.779468236567336e-07, "loss": 0.23488013446331024, "step": 3759, "token_acc": 0.938985228002569 }, { "epoch": 0.7932489451476793, "grad_norm": 0.8203125, "learning_rate": 6.777887994120248e-07, "loss": 0.270779550075531, "step": 3760, "token_acc": 0.92277450154451 }, { "epoch": 0.7934599156118144, "grad_norm": 0.66796875, "learning_rate": 6.776307548356622e-07, "loss": 0.22133612632751465, "step": 3761, "token_acc": 0.9343291995490417 }, { "epoch": 0.7936708860759494, "grad_norm": 0.734375, "learning_rate": 6.774726899457193e-07, "loss": 0.23177753388881683, "step": 3762, "token_acc": 0.9321937321937321 }, { "epoch": 0.7938818565400844, "grad_norm": 0.66796875, "learning_rate": 6.773146047602722e-07, "loss": 0.22609542310237885, "step": 3763, "token_acc": 0.9370337212772307 }, { "epoch": 0.7940928270042195, "grad_norm": 0.7265625, "learning_rate": 6.771564992973992e-07, "loss": 0.27038583159446716, "step": 3764, "token_acc": 0.9299937772246422 }, { "epoch": 0.7943037974683544, "grad_norm": 0.6640625, "learning_rate": 6.76998373575181e-07, "loss": 0.2481411099433899, "step": 3765, "token_acc": 0.9272930648769575 }, { "epoch": 0.7945147679324894, "grad_norm": 0.69140625, "learning_rate": 6.768402276117008e-07, "loss": 0.2473355382680893, "step": 3766, "token_acc": 0.9334862385321101 }, { "epoch": 0.7947257383966245, "grad_norm": 0.8203125, "learning_rate": 6.766820614250438e-07, "loss": 0.2826572060585022, "step": 3767, "token_acc": 0.9208045254556882 }, { "epoch": 0.7949367088607595, "grad_norm": 0.73046875, "learning_rate": 6.765238750332977e-07, "loss": 0.2550674080848694, "step": 3768, "token_acc": 0.9254587155963303 }, { "epoch": 0.7951476793248945, "grad_norm": 0.7734375, "learning_rate": 6.763656684545523e-07, "loss": 0.28944841027259827, "step": 3769, "token_acc": 0.9179033785917272 }, { "epoch": 0.7953586497890295, "grad_norm": 0.70703125, "learning_rate": 6.762074417068999e-07, "loss": 0.26555052399635315, "step": 3770, "token_acc": 0.9232456140350878 }, { "epoch": 0.7955696202531646, "grad_norm": 0.6875, "learning_rate": 6.760491948084353e-07, "loss": 0.2669599950313568, "step": 3771, "token_acc": 0.9282269503546099 }, { "epoch": 0.7957805907172996, "grad_norm": 0.671875, "learning_rate": 6.758909277772551e-07, "loss": 0.23314353823661804, "step": 3772, "token_acc": 0.9303218187186301 }, { "epoch": 0.7959915611814345, "grad_norm": 0.80078125, "learning_rate": 6.757326406314586e-07, "loss": 0.25445854663848877, "step": 3773, "token_acc": 0.931960049937578 }, { "epoch": 0.7962025316455696, "grad_norm": 0.58984375, "learning_rate": 6.755743333891473e-07, "loss": 0.23404361307621002, "step": 3774, "token_acc": 0.9415059116365899 }, { "epoch": 0.7964135021097046, "grad_norm": 0.6953125, "learning_rate": 6.75416006068425e-07, "loss": 0.3183133602142334, "step": 3775, "token_acc": 0.9195888754534461 }, { "epoch": 0.7966244725738396, "grad_norm": 0.92578125, "learning_rate": 6.752576586873975e-07, "loss": 0.28258395195007324, "step": 3776, "token_acc": 0.9191134139320668 }, { "epoch": 0.7968354430379747, "grad_norm": 0.8125, "learning_rate": 6.750992912641733e-07, "loss": 0.2961618900299072, "step": 3777, "token_acc": 0.9268722466960353 }, { "epoch": 0.7970464135021097, "grad_norm": 0.78125, "learning_rate": 6.749409038168635e-07, "loss": 0.27102768421173096, "step": 3778, "token_acc": 0.9203438395415473 }, { "epoch": 0.7972573839662447, "grad_norm": 0.82421875, "learning_rate": 6.747824963635803e-07, "loss": 0.26760005950927734, "step": 3779, "token_acc": 0.9296402419611589 }, { "epoch": 0.7974683544303798, "grad_norm": 0.640625, "learning_rate": 6.746240689224393e-07, "loss": 0.25048884749412537, "step": 3780, "token_acc": 0.9281846679965802 }, { "epoch": 0.7976793248945148, "grad_norm": 0.6171875, "learning_rate": 6.744656215115583e-07, "loss": 0.21889421343803406, "step": 3781, "token_acc": 0.9337485843714609 }, { "epoch": 0.7978902953586497, "grad_norm": 0.97265625, "learning_rate": 6.743071541490566e-07, "loss": 0.24408601224422455, "step": 3782, "token_acc": 0.9314951677089255 }, { "epoch": 0.7981012658227848, "grad_norm": 0.77734375, "learning_rate": 6.741486668530566e-07, "loss": 0.28509223461151123, "step": 3783, "token_acc": 0.9250783699059562 }, { "epoch": 0.7983122362869198, "grad_norm": 0.76953125, "learning_rate": 6.739901596416825e-07, "loss": 0.28772521018981934, "step": 3784, "token_acc": 0.9173999432302016 }, { "epoch": 0.7985232067510548, "grad_norm": 0.73046875, "learning_rate": 6.73831632533061e-07, "loss": 0.2744446396827698, "step": 3785, "token_acc": 0.9269513991163476 }, { "epoch": 0.7987341772151899, "grad_norm": 1.0625, "learning_rate": 6.73673085545321e-07, "loss": 0.24417513608932495, "step": 3786, "token_acc": 0.924702380952381 }, { "epoch": 0.7989451476793249, "grad_norm": 0.70703125, "learning_rate": 6.735145186965938e-07, "loss": 0.28194305300712585, "step": 3787, "token_acc": 0.9195402298850575 }, { "epoch": 0.7991561181434599, "grad_norm": 0.82421875, "learning_rate": 6.733559320050127e-07, "loss": 0.24639712274074554, "step": 3788, "token_acc": 0.9284467713787086 }, { "epoch": 0.799367088607595, "grad_norm": 0.8828125, "learning_rate": 6.731973254887135e-07, "loss": 0.26459747552871704, "step": 3789, "token_acc": 0.9247740563530037 }, { "epoch": 0.79957805907173, "grad_norm": 0.8515625, "learning_rate": 6.730386991658343e-07, "loss": 0.291557639837265, "step": 3790, "token_acc": 0.9218335983041865 }, { "epoch": 0.799789029535865, "grad_norm": 0.62890625, "learning_rate": 6.728800530545152e-07, "loss": 0.22183871269226074, "step": 3791, "token_acc": 0.939875604699378 }, { "epoch": 0.8, "grad_norm": 0.625, "learning_rate": 6.727213871728987e-07, "loss": 0.1998809278011322, "step": 3792, "token_acc": 0.9422594142259414 }, { "epoch": 0.800210970464135, "grad_norm": 0.609375, "learning_rate": 6.725627015391297e-07, "loss": 0.2742140591144562, "step": 3793, "token_acc": 0.9239473684210526 }, { "epoch": 0.80042194092827, "grad_norm": 0.66796875, "learning_rate": 6.724039961713552e-07, "loss": 0.2113466113805771, "step": 3794, "token_acc": 0.9414310444316215 }, { "epoch": 0.8006329113924051, "grad_norm": 0.703125, "learning_rate": 6.722452710877247e-07, "loss": 0.2576555013656616, "step": 3795, "token_acc": 0.9300265721877768 }, { "epoch": 0.8008438818565401, "grad_norm": 0.6796875, "learning_rate": 6.720865263063895e-07, "loss": 0.250874787569046, "step": 3796, "token_acc": 0.9283405172413793 }, { "epoch": 0.8010548523206751, "grad_norm": 0.69921875, "learning_rate": 6.719277618455035e-07, "loss": 0.26921847462654114, "step": 3797, "token_acc": 0.9220338983050848 }, { "epoch": 0.8012658227848102, "grad_norm": 0.6484375, "learning_rate": 6.717689777232227e-07, "loss": 0.22328831255435944, "step": 3798, "token_acc": 0.9373125374925015 }, { "epoch": 0.8014767932489452, "grad_norm": 0.796875, "learning_rate": 6.716101739577053e-07, "loss": 0.24966508150100708, "step": 3799, "token_acc": 0.9317491019618679 }, { "epoch": 0.8016877637130801, "grad_norm": 0.6796875, "learning_rate": 6.714513505671122e-07, "loss": 0.2438233196735382, "step": 3800, "token_acc": 0.9293731179852176 }, { "epoch": 0.8016877637130801, "eval_loss": 0.43370315432548523, "eval_runtime": 245.5039, "eval_samples_per_second": 137.289, "eval_steps_per_second": 2.147, "eval_token_acc": 0.8991407161446648, "step": 3800 }, { "epoch": 0.8018987341772152, "grad_norm": 1.0234375, "learning_rate": 6.71292507569606e-07, "loss": 0.24866743385791779, "step": 3801, "token_acc": 0.9305511309334182 }, { "epoch": 0.8021097046413502, "grad_norm": 0.8359375, "learning_rate": 6.711336449833518e-07, "loss": 0.28139013051986694, "step": 3802, "token_acc": 0.9287363906301551 }, { "epoch": 0.8023206751054852, "grad_norm": 0.77734375, "learning_rate": 6.709747628265168e-07, "loss": 0.2816872000694275, "step": 3803, "token_acc": 0.9188156638013372 }, { "epoch": 0.8025316455696202, "grad_norm": 0.56640625, "learning_rate": 6.708158611172704e-07, "loss": 0.22397735714912415, "step": 3804, "token_acc": 0.9378238341968912 }, { "epoch": 0.8027426160337553, "grad_norm": 0.6875, "learning_rate": 6.706569398737846e-07, "loss": 0.25848016142845154, "step": 3805, "token_acc": 0.9222052067381317 }, { "epoch": 0.8029535864978903, "grad_norm": 0.69921875, "learning_rate": 6.704979991142333e-07, "loss": 0.24762225151062012, "step": 3806, "token_acc": 0.9295605057194462 }, { "epoch": 0.8031645569620253, "grad_norm": 0.7109375, "learning_rate": 6.703390388567929e-07, "loss": 0.20117399096488953, "step": 3807, "token_acc": 0.9378161261529306 }, { "epoch": 0.8033755274261604, "grad_norm": 0.91796875, "learning_rate": 6.701800591196413e-07, "loss": 0.22435054183006287, "step": 3808, "token_acc": 0.9379426644182125 }, { "epoch": 0.8035864978902953, "grad_norm": 0.6796875, "learning_rate": 6.7002105992096e-07, "loss": 0.2498970776796341, "step": 3809, "token_acc": 0.9316557937247593 }, { "epoch": 0.8037974683544303, "grad_norm": 0.71875, "learning_rate": 6.698620412789311e-07, "loss": 0.27398955821990967, "step": 3810, "token_acc": 0.9339274738783037 }, { "epoch": 0.8040084388185654, "grad_norm": 0.7734375, "learning_rate": 6.697030032117401e-07, "loss": 0.2316502183675766, "step": 3811, "token_acc": 0.9354642313546423 }, { "epoch": 0.8042194092827004, "grad_norm": 1.0078125, "learning_rate": 6.695439457375744e-07, "loss": 0.26577842235565186, "step": 3812, "token_acc": 0.93198127925117 }, { "epoch": 0.8044303797468354, "grad_norm": 0.7578125, "learning_rate": 6.693848688746235e-07, "loss": 0.32031378149986267, "step": 3813, "token_acc": 0.9126360338573156 }, { "epoch": 0.8046413502109705, "grad_norm": 0.60546875, "learning_rate": 6.692257726410793e-07, "loss": 0.25163471698760986, "step": 3814, "token_acc": 0.9282622139764997 }, { "epoch": 0.8048523206751055, "grad_norm": 0.7578125, "learning_rate": 6.690666570551355e-07, "loss": 0.25846314430236816, "step": 3815, "token_acc": 0.930965909090909 }, { "epoch": 0.8050632911392405, "grad_norm": 0.77734375, "learning_rate": 6.689075221349887e-07, "loss": 0.252838671207428, "step": 3816, "token_acc": 0.9208185053380783 }, { "epoch": 0.8052742616033756, "grad_norm": 0.68359375, "learning_rate": 6.687483678988369e-07, "loss": 0.23083123564720154, "step": 3817, "token_acc": 0.9360933592664629 }, { "epoch": 0.8054852320675105, "grad_norm": 0.71484375, "learning_rate": 6.685891943648811e-07, "loss": 0.2751753330230713, "step": 3818, "token_acc": 0.9243331518780621 }, { "epoch": 0.8056962025316455, "grad_norm": 0.68359375, "learning_rate": 6.68430001551324e-07, "loss": 0.23713719844818115, "step": 3819, "token_acc": 0.9321893163575976 }, { "epoch": 0.8059071729957806, "grad_norm": 0.65625, "learning_rate": 6.682707894763705e-07, "loss": 0.2415693700313568, "step": 3820, "token_acc": 0.9353092049016813 }, { "epoch": 0.8061181434599156, "grad_norm": 0.7265625, "learning_rate": 6.681115581582281e-07, "loss": 0.25185084342956543, "step": 3821, "token_acc": 0.9327757125154895 }, { "epoch": 0.8063291139240506, "grad_norm": 0.6640625, "learning_rate": 6.679523076151061e-07, "loss": 0.22767677903175354, "step": 3822, "token_acc": 0.9352498656636217 }, { "epoch": 0.8065400843881857, "grad_norm": 0.6796875, "learning_rate": 6.677930378652162e-07, "loss": 0.24404624104499817, "step": 3823, "token_acc": 0.9269949066213922 }, { "epoch": 0.8067510548523207, "grad_norm": 0.8046875, "learning_rate": 6.676337489267723e-07, "loss": 0.3289371728897095, "step": 3824, "token_acc": 0.9154366543665436 }, { "epoch": 0.8069620253164557, "grad_norm": 0.7421875, "learning_rate": 6.674744408179902e-07, "loss": 0.23183780908584595, "step": 3825, "token_acc": 0.9335997540731632 }, { "epoch": 0.8071729957805908, "grad_norm": 0.6015625, "learning_rate": 6.673151135570885e-07, "loss": 0.2832763195037842, "step": 3826, "token_acc": 0.924255082149819 }, { "epoch": 0.8073839662447257, "grad_norm": 0.88671875, "learning_rate": 6.671557671622874e-07, "loss": 0.281197726726532, "step": 3827, "token_acc": 0.9222346996069624 }, { "epoch": 0.8075949367088607, "grad_norm": 0.69921875, "learning_rate": 6.669964016518092e-07, "loss": 0.2679712176322937, "step": 3828, "token_acc": 0.9254788769351876 }, { "epoch": 0.8078059071729958, "grad_norm": 0.80078125, "learning_rate": 6.668370170438795e-07, "loss": 0.2317182719707489, "step": 3829, "token_acc": 0.9374828626268166 }, { "epoch": 0.8080168776371308, "grad_norm": 0.73046875, "learning_rate": 6.666776133567245e-07, "loss": 0.2268115133047104, "step": 3830, "token_acc": 0.9338954468802698 }, { "epoch": 0.8082278481012658, "grad_norm": 0.71484375, "learning_rate": 6.665181906085738e-07, "loss": 0.25034910440444946, "step": 3831, "token_acc": 0.9336720293536551 }, { "epoch": 0.8084388185654009, "grad_norm": 0.6875, "learning_rate": 6.663587488176585e-07, "loss": 0.25004035234451294, "step": 3832, "token_acc": 0.9244186046511628 }, { "epoch": 0.8086497890295359, "grad_norm": 0.66015625, "learning_rate": 6.661992880022123e-07, "loss": 0.29401612281799316, "step": 3833, "token_acc": 0.9240657698056801 }, { "epoch": 0.8088607594936709, "grad_norm": 1.515625, "learning_rate": 6.660398081804709e-07, "loss": 0.24970433115959167, "step": 3834, "token_acc": 0.9292814541323487 }, { "epoch": 0.8090717299578059, "grad_norm": 1.421875, "learning_rate": 6.65880309370672e-07, "loss": 0.2733500003814697, "step": 3835, "token_acc": 0.9193548387096774 }, { "epoch": 0.809282700421941, "grad_norm": 0.625, "learning_rate": 6.657207915910556e-07, "loss": 0.24261116981506348, "step": 3836, "token_acc": 0.9274809160305344 }, { "epoch": 0.8094936708860759, "grad_norm": 0.6796875, "learning_rate": 6.655612548598641e-07, "loss": 0.24547317624092102, "step": 3837, "token_acc": 0.9277411247251021 }, { "epoch": 0.8097046413502109, "grad_norm": 0.57421875, "learning_rate": 6.654016991953418e-07, "loss": 0.1914728581905365, "step": 3838, "token_acc": 0.9396673244995771 }, { "epoch": 0.809915611814346, "grad_norm": 0.87109375, "learning_rate": 6.652421246157355e-07, "loss": 0.24703305959701538, "step": 3839, "token_acc": 0.9255384615384615 }, { "epoch": 0.810126582278481, "grad_norm": 0.79296875, "learning_rate": 6.650825311392932e-07, "loss": 0.29354432225227356, "step": 3840, "token_acc": 0.9223088923556942 }, { "epoch": 0.810337552742616, "grad_norm": 0.71484375, "learning_rate": 6.649229187842666e-07, "loss": 0.26330727338790894, "step": 3841, "token_acc": 0.9265576052906369 }, { "epoch": 0.8105485232067511, "grad_norm": 0.8046875, "learning_rate": 6.647632875689082e-07, "loss": 0.2815566062927246, "step": 3842, "token_acc": 0.9252569293055123 }, { "epoch": 0.8107594936708861, "grad_norm": 0.69921875, "learning_rate": 6.646036375114733e-07, "loss": 0.2549968361854553, "step": 3843, "token_acc": 0.9295249922384353 }, { "epoch": 0.810970464135021, "grad_norm": 0.7421875, "learning_rate": 6.644439686302194e-07, "loss": 0.2554401755332947, "step": 3844, "token_acc": 0.9222963177732676 }, { "epoch": 0.8111814345991561, "grad_norm": 0.703125, "learning_rate": 6.642842809434056e-07, "loss": 0.2644885778427124, "step": 3845, "token_acc": 0.9204704564547745 }, { "epoch": 0.8113924050632911, "grad_norm": 0.6015625, "learning_rate": 6.641245744692941e-07, "loss": 0.21364383399486542, "step": 3846, "token_acc": 0.9382157123834887 }, { "epoch": 0.8116033755274261, "grad_norm": 1.0859375, "learning_rate": 6.63964849226148e-07, "loss": 0.25603336095809937, "step": 3847, "token_acc": 0.9297854555586514 }, { "epoch": 0.8118143459915612, "grad_norm": 0.734375, "learning_rate": 6.638051052322339e-07, "loss": 0.2295062094926834, "step": 3848, "token_acc": 0.9351057747284163 }, { "epoch": 0.8120253164556962, "grad_norm": 1.1171875, "learning_rate": 6.636453425058194e-07, "loss": 0.2455746978521347, "step": 3849, "token_acc": 0.9323979591836735 }, { "epoch": 0.8122362869198312, "grad_norm": 0.5703125, "learning_rate": 6.63485561065175e-07, "loss": 0.24265459179878235, "step": 3850, "token_acc": 0.9336397586900316 }, { "epoch": 0.8124472573839663, "grad_norm": 0.65625, "learning_rate": 6.633257609285729e-07, "loss": 0.2472209930419922, "step": 3851, "token_acc": 0.9316851008458035 }, { "epoch": 0.8126582278481013, "grad_norm": 0.58984375, "learning_rate": 6.631659421142877e-07, "loss": 0.2446625828742981, "step": 3852, "token_acc": 0.9308526223263991 }, { "epoch": 0.8128691983122363, "grad_norm": 3.21875, "learning_rate": 6.63006104640596e-07, "loss": 0.24848181009292603, "step": 3853, "token_acc": 0.9299533022014677 }, { "epoch": 0.8130801687763713, "grad_norm": 0.70703125, "learning_rate": 6.628462485257765e-07, "loss": 0.2545608878135681, "step": 3854, "token_acc": 0.9258653584082857 }, { "epoch": 0.8132911392405063, "grad_norm": 0.69921875, "learning_rate": 6.626863737881102e-07, "loss": 0.24015453457832336, "step": 3855, "token_acc": 0.9273325358851675 }, { "epoch": 0.8135021097046413, "grad_norm": 0.828125, "learning_rate": 6.625264804458801e-07, "loss": 0.23762840032577515, "step": 3856, "token_acc": 0.9321086261980831 }, { "epoch": 0.8137130801687764, "grad_norm": 0.78515625, "learning_rate": 6.623665685173714e-07, "loss": 0.3068075180053711, "step": 3857, "token_acc": 0.918010752688172 }, { "epoch": 0.8139240506329114, "grad_norm": 0.84375, "learning_rate": 6.622066380208712e-07, "loss": 0.3409074544906616, "step": 3858, "token_acc": 0.9071274298056156 }, { "epoch": 0.8141350210970464, "grad_norm": 0.69921875, "learning_rate": 6.620466889746692e-07, "loss": 0.25454872846603394, "step": 3859, "token_acc": 0.9332171893147503 }, { "epoch": 0.8143459915611815, "grad_norm": 0.70703125, "learning_rate": 6.618867213970567e-07, "loss": 0.2751002609729767, "step": 3860, "token_acc": 0.9186144747958321 }, { "epoch": 0.8145569620253165, "grad_norm": 0.66796875, "learning_rate": 6.617267353063273e-07, "loss": 0.26624971628189087, "step": 3861, "token_acc": 0.9239701410008294 }, { "epoch": 0.8147679324894515, "grad_norm": 0.8203125, "learning_rate": 6.615667307207769e-07, "loss": 0.2987936735153198, "step": 3862, "token_acc": 0.917902725077613 }, { "epoch": 0.8149789029535865, "grad_norm": 0.68359375, "learning_rate": 6.614067076587034e-07, "loss": 0.2832443118095398, "step": 3863, "token_acc": 0.9249581239530988 }, { "epoch": 0.8151898734177215, "grad_norm": 0.5859375, "learning_rate": 6.612466661384069e-07, "loss": 0.22179633378982544, "step": 3864, "token_acc": 0.9341674113792077 }, { "epoch": 0.8154008438818565, "grad_norm": 0.734375, "learning_rate": 6.610866061781893e-07, "loss": 0.33024394512176514, "step": 3865, "token_acc": 0.9145654108573124 }, { "epoch": 0.8156118143459916, "grad_norm": 0.76953125, "learning_rate": 6.609265277963546e-07, "loss": 0.2833813428878784, "step": 3866, "token_acc": 0.9177489177489178 }, { "epoch": 0.8158227848101266, "grad_norm": 0.7109375, "learning_rate": 6.607664310112095e-07, "loss": 0.28233611583709717, "step": 3867, "token_acc": 0.9201467268623025 }, { "epoch": 0.8160337552742616, "grad_norm": 1.390625, "learning_rate": 6.606063158410624e-07, "loss": 0.26586490869522095, "step": 3868, "token_acc": 0.9266169154228856 }, { "epoch": 0.8162447257383966, "grad_norm": 0.78515625, "learning_rate": 6.604461823042235e-07, "loss": 0.27420341968536377, "step": 3869, "token_acc": 0.9233628824706892 }, { "epoch": 0.8164556962025317, "grad_norm": 0.67578125, "learning_rate": 6.60286030419006e-07, "loss": 0.3113558888435364, "step": 3870, "token_acc": 0.9224441833137486 }, { "epoch": 0.8166666666666667, "grad_norm": 0.73046875, "learning_rate": 6.60125860203724e-07, "loss": 0.24610458314418793, "step": 3871, "token_acc": 0.9296600234466589 }, { "epoch": 0.8168776371308016, "grad_norm": 0.73828125, "learning_rate": 6.599656716766946e-07, "loss": 0.23708760738372803, "step": 3872, "token_acc": 0.9305512301463719 }, { "epoch": 0.8170886075949367, "grad_norm": 0.80859375, "learning_rate": 6.598054648562367e-07, "loss": 0.25672951340675354, "step": 3873, "token_acc": 0.9287025703794369 }, { "epoch": 0.8172995780590717, "grad_norm": 0.99609375, "learning_rate": 6.596452397606716e-07, "loss": 0.23325884342193604, "step": 3874, "token_acc": 0.9304985337243402 }, { "epoch": 0.8175105485232067, "grad_norm": 0.625, "learning_rate": 6.594849964083218e-07, "loss": 0.24925515055656433, "step": 3875, "token_acc": 0.9246088193456614 }, { "epoch": 0.8177215189873418, "grad_norm": 0.67578125, "learning_rate": 6.593247348175128e-07, "loss": 0.2375839352607727, "step": 3876, "token_acc": 0.9302114803625378 }, { "epoch": 0.8179324894514768, "grad_norm": 0.73046875, "learning_rate": 6.59164455006572e-07, "loss": 0.3156859874725342, "step": 3877, "token_acc": 0.9176300578034682 }, { "epoch": 0.8181434599156118, "grad_norm": 0.70703125, "learning_rate": 6.590041569938286e-07, "loss": 0.1921265423297882, "step": 3878, "token_acc": 0.9424910607866508 }, { "epoch": 0.8183544303797469, "grad_norm": 0.8046875, "learning_rate": 6.58843840797614e-07, "loss": 0.26523125171661377, "step": 3879, "token_acc": 0.9298245614035088 }, { "epoch": 0.8185654008438819, "grad_norm": 0.87890625, "learning_rate": 6.586835064362617e-07, "loss": 0.24606674909591675, "step": 3880, "token_acc": 0.927725478287276 }, { "epoch": 0.8187763713080168, "grad_norm": 0.65625, "learning_rate": 6.585231539281074e-07, "loss": 0.23699648678302765, "step": 3881, "token_acc": 0.9321022727272728 }, { "epoch": 0.8189873417721519, "grad_norm": 0.89453125, "learning_rate": 6.583627832914888e-07, "loss": 0.30540046095848083, "step": 3882, "token_acc": 0.9202112871837642 }, { "epoch": 0.8191983122362869, "grad_norm": 0.87890625, "learning_rate": 6.582023945447454e-07, "loss": 0.32462117075920105, "step": 3883, "token_acc": 0.9127962085308057 }, { "epoch": 0.8194092827004219, "grad_norm": 0.73828125, "learning_rate": 6.580419877062194e-07, "loss": 0.26315778493881226, "step": 3884, "token_acc": 0.9240506329113924 }, { "epoch": 0.819620253164557, "grad_norm": 0.73046875, "learning_rate": 6.578815627942542e-07, "loss": 0.24795284867286682, "step": 3885, "token_acc": 0.9381266112861644 }, { "epoch": 0.819831223628692, "grad_norm": 0.734375, "learning_rate": 6.577211198271961e-07, "loss": 0.2541196644306183, "step": 3886, "token_acc": 0.9287869643934822 }, { "epoch": 0.820042194092827, "grad_norm": 0.95703125, "learning_rate": 6.575606588233931e-07, "loss": 0.2700772285461426, "step": 3887, "token_acc": 0.9251141552511416 }, { "epoch": 0.8202531645569621, "grad_norm": 0.8515625, "learning_rate": 6.57400179801195e-07, "loss": 0.2850884795188904, "step": 3888, "token_acc": 0.922990758891067 }, { "epoch": 0.820464135021097, "grad_norm": 0.64453125, "learning_rate": 6.572396827789542e-07, "loss": 0.25250065326690674, "step": 3889, "token_acc": 0.9327235172617291 }, { "epoch": 0.820675105485232, "grad_norm": 0.74609375, "learning_rate": 6.570791677750249e-07, "loss": 0.25007957220077515, "step": 3890, "token_acc": 0.9261142686209991 }, { "epoch": 0.8208860759493671, "grad_norm": 1.0078125, "learning_rate": 6.569186348077631e-07, "loss": 0.2537238597869873, "step": 3891, "token_acc": 0.9259365994236312 }, { "epoch": 0.8210970464135021, "grad_norm": 0.77734375, "learning_rate": 6.567580838955274e-07, "loss": 0.24524493515491486, "step": 3892, "token_acc": 0.9259150805270864 }, { "epoch": 0.8213080168776371, "grad_norm": 0.7421875, "learning_rate": 6.565975150566779e-07, "loss": 0.272413432598114, "step": 3893, "token_acc": 0.9166395310973624 }, { "epoch": 0.8215189873417722, "grad_norm": 0.83203125, "learning_rate": 6.564369283095772e-07, "loss": 0.26138147711753845, "step": 3894, "token_acc": 0.9254302103250478 }, { "epoch": 0.8217299578059072, "grad_norm": 0.68359375, "learning_rate": 6.562763236725896e-07, "loss": 0.24631807208061218, "step": 3895, "token_acc": 0.9333701962952723 }, { "epoch": 0.8219409282700422, "grad_norm": 0.6328125, "learning_rate": 6.561157011640818e-07, "loss": 0.2188456952571869, "step": 3896, "token_acc": 0.9359370609721832 }, { "epoch": 0.8221518987341773, "grad_norm": 0.71875, "learning_rate": 6.559550608024221e-07, "loss": 0.22681105136871338, "step": 3897, "token_acc": 0.9311657879320445 }, { "epoch": 0.8223628691983123, "grad_norm": 0.71484375, "learning_rate": 6.557944026059813e-07, "loss": 0.23474553227424622, "step": 3898, "token_acc": 0.9352104147744474 }, { "epoch": 0.8225738396624472, "grad_norm": 0.9453125, "learning_rate": 6.556337265931319e-07, "loss": 0.31797876954078674, "step": 3899, "token_acc": 0.9230009871668312 }, { "epoch": 0.8227848101265823, "grad_norm": 0.83984375, "learning_rate": 6.554730327822485e-07, "loss": 0.2675440013408661, "step": 3900, "token_acc": 0.925753339546443 }, { "epoch": 0.8229957805907173, "grad_norm": 0.87890625, "learning_rate": 6.55312321191708e-07, "loss": 0.244090273976326, "step": 3901, "token_acc": 0.928956228956229 }, { "epoch": 0.8232067510548523, "grad_norm": 0.6796875, "learning_rate": 6.55151591839889e-07, "loss": 0.2744724154472351, "step": 3902, "token_acc": 0.923686527737012 }, { "epoch": 0.8234177215189873, "grad_norm": 0.72265625, "learning_rate": 6.549908447451722e-07, "loss": 0.26983070373535156, "step": 3903, "token_acc": 0.924871168972064 }, { "epoch": 0.8236286919831224, "grad_norm": 0.70703125, "learning_rate": 6.548300799259405e-07, "loss": 0.25931838154792786, "step": 3904, "token_acc": 0.9237923792379238 }, { "epoch": 0.8238396624472574, "grad_norm": 0.58203125, "learning_rate": 6.546692974005788e-07, "loss": 0.22445806860923767, "step": 3905, "token_acc": 0.9373854612095296 }, { "epoch": 0.8240506329113924, "grad_norm": 0.66015625, "learning_rate": 6.545084971874736e-07, "loss": 0.23917442560195923, "step": 3906, "token_acc": 0.9317343173431735 }, { "epoch": 0.8242616033755275, "grad_norm": 1.140625, "learning_rate": 6.543476793050142e-07, "loss": 0.2863171696662903, "step": 3907, "token_acc": 0.926166568222091 }, { "epoch": 0.8244725738396624, "grad_norm": 0.83984375, "learning_rate": 6.541868437715913e-07, "loss": 0.27904772758483887, "step": 3908, "token_acc": 0.9217772215269087 }, { "epoch": 0.8246835443037974, "grad_norm": 0.92578125, "learning_rate": 6.540259906055978e-07, "loss": 0.22931741178035736, "step": 3909, "token_acc": 0.9357903829542182 }, { "epoch": 0.8248945147679325, "grad_norm": 0.703125, "learning_rate": 6.538651198254285e-07, "loss": 0.23171097040176392, "step": 3910, "token_acc": 0.9338391502276177 }, { "epoch": 0.8251054852320675, "grad_norm": 0.7265625, "learning_rate": 6.537042314494802e-07, "loss": 0.2497771680355072, "step": 3911, "token_acc": 0.9305879305879305 }, { "epoch": 0.8253164556962025, "grad_norm": 0.76171875, "learning_rate": 6.535433254961525e-07, "loss": 0.30600184202194214, "step": 3912, "token_acc": 0.9182446648632402 }, { "epoch": 0.8255274261603376, "grad_norm": 0.92578125, "learning_rate": 6.533824019838459e-07, "loss": 0.2714044749736786, "step": 3913, "token_acc": 0.9179349447925992 }, { "epoch": 0.8257383966244726, "grad_norm": 0.83984375, "learning_rate": 6.532214609309634e-07, "loss": 0.2965072989463806, "step": 3914, "token_acc": 0.9242149758454107 }, { "epoch": 0.8259493670886076, "grad_norm": 0.6796875, "learning_rate": 6.530605023559097e-07, "loss": 0.2105378657579422, "step": 3915, "token_acc": 0.9412673879443586 }, { "epoch": 0.8261603375527427, "grad_norm": 0.7421875, "learning_rate": 6.528995262770924e-07, "loss": 0.2803211212158203, "step": 3916, "token_acc": 0.925280199252802 }, { "epoch": 0.8263713080168776, "grad_norm": 0.67578125, "learning_rate": 6.5273853271292e-07, "loss": 0.24325792491436005, "step": 3917, "token_acc": 0.9304019422713785 }, { "epoch": 0.8265822784810126, "grad_norm": 0.671875, "learning_rate": 6.525775216818037e-07, "loss": 0.24964752793312073, "step": 3918, "token_acc": 0.9241958041958042 }, { "epoch": 0.8267932489451477, "grad_norm": 0.78515625, "learning_rate": 6.524164932021563e-07, "loss": 0.23925700783729553, "step": 3919, "token_acc": 0.9349670122525919 }, { "epoch": 0.8270042194092827, "grad_norm": 0.66796875, "learning_rate": 6.522554472923927e-07, "loss": 0.25324904918670654, "step": 3920, "token_acc": 0.926482645531566 }, { "epoch": 0.8272151898734177, "grad_norm": 0.75, "learning_rate": 6.520943839709304e-07, "loss": 0.3274841904640198, "step": 3921, "token_acc": 0.9202209266646211 }, { "epoch": 0.8274261603375528, "grad_norm": 0.67578125, "learning_rate": 6.519333032561878e-07, "loss": 0.2633184790611267, "step": 3922, "token_acc": 0.9288164665523156 }, { "epoch": 0.8276371308016878, "grad_norm": 0.8125, "learning_rate": 6.517722051665858e-07, "loss": 0.24954834580421448, "step": 3923, "token_acc": 0.9327659574468085 }, { "epoch": 0.8278481012658228, "grad_norm": 0.75, "learning_rate": 6.516110897205479e-07, "loss": 0.2670617997646332, "step": 3924, "token_acc": 0.9292543021032504 }, { "epoch": 0.8280590717299579, "grad_norm": 0.73828125, "learning_rate": 6.514499569364984e-07, "loss": 0.30051189661026, "step": 3925, "token_acc": 0.9186323880201431 }, { "epoch": 0.8282700421940928, "grad_norm": 0.6796875, "learning_rate": 6.512888068328646e-07, "loss": 0.283771276473999, "step": 3926, "token_acc": 0.9243208279430789 }, { "epoch": 0.8284810126582278, "grad_norm": 0.796875, "learning_rate": 6.511276394280752e-07, "loss": 0.3061697483062744, "step": 3927, "token_acc": 0.9117250673854448 }, { "epoch": 0.8286919831223629, "grad_norm": 0.671875, "learning_rate": 6.509664547405611e-07, "loss": 0.22393658757209778, "step": 3928, "token_acc": 0.9349112426035503 }, { "epoch": 0.8289029535864979, "grad_norm": 0.7890625, "learning_rate": 6.508052527887554e-07, "loss": 0.2411932647228241, "step": 3929, "token_acc": 0.9296655204751485 }, { "epoch": 0.8291139240506329, "grad_norm": 0.65234375, "learning_rate": 6.506440335910922e-07, "loss": 0.236104816198349, "step": 3930, "token_acc": 0.9346779048765804 }, { "epoch": 0.829324894514768, "grad_norm": 0.5625, "learning_rate": 6.504827971660088e-07, "loss": 0.2544421851634979, "step": 3931, "token_acc": 0.9273385300668151 }, { "epoch": 0.829535864978903, "grad_norm": 0.73828125, "learning_rate": 6.50321543531944e-07, "loss": 0.25943803787231445, "step": 3932, "token_acc": 0.9285714285714286 }, { "epoch": 0.829746835443038, "grad_norm": 0.94921875, "learning_rate": 6.501602727073383e-07, "loss": 0.2832021117210388, "step": 3933, "token_acc": 0.9200488251449497 }, { "epoch": 0.8299578059071729, "grad_norm": 0.81640625, "learning_rate": 6.499989847106346e-07, "loss": 0.34506621956825256, "step": 3934, "token_acc": 0.9123024480942051 }, { "epoch": 0.830168776371308, "grad_norm": 1.5078125, "learning_rate": 6.498376795602774e-07, "loss": 0.2370583862066269, "step": 3935, "token_acc": 0.9299007444168734 }, { "epoch": 0.830379746835443, "grad_norm": 0.765625, "learning_rate": 6.496763572747132e-07, "loss": 0.23779910802841187, "step": 3936, "token_acc": 0.9311216429699842 }, { "epoch": 0.830590717299578, "grad_norm": 0.671875, "learning_rate": 6.495150178723908e-07, "loss": 0.2785772383213043, "step": 3937, "token_acc": 0.9267110841913991 }, { "epoch": 0.8308016877637131, "grad_norm": 0.66015625, "learning_rate": 6.493536613717606e-07, "loss": 0.25046637654304504, "step": 3938, "token_acc": 0.9304375196726472 }, { "epoch": 0.8310126582278481, "grad_norm": 0.859375, "learning_rate": 6.49192287791275e-07, "loss": 0.2880856692790985, "step": 3939, "token_acc": 0.9268661226956785 }, { "epoch": 0.8312236286919831, "grad_norm": 0.92578125, "learning_rate": 6.490308971493887e-07, "loss": 0.3179314136505127, "step": 3940, "token_acc": 0.9074973031283711 }, { "epoch": 0.8314345991561182, "grad_norm": 0.640625, "learning_rate": 6.488694894645579e-07, "loss": 0.252625435590744, "step": 3941, "token_acc": 0.9264032073310424 }, { "epoch": 0.8316455696202532, "grad_norm": 0.98046875, "learning_rate": 6.487080647552409e-07, "loss": 0.27672988176345825, "step": 3942, "token_acc": 0.9236596736596736 }, { "epoch": 0.8318565400843881, "grad_norm": 0.69921875, "learning_rate": 6.48546623039898e-07, "loss": 0.23155945539474487, "step": 3943, "token_acc": 0.9358766233766234 }, { "epoch": 0.8320675105485232, "grad_norm": 0.6484375, "learning_rate": 6.483851643369915e-07, "loss": 0.24412915110588074, "step": 3944, "token_acc": 0.9290898939140145 }, { "epoch": 0.8322784810126582, "grad_norm": 0.55859375, "learning_rate": 6.482236886649854e-07, "loss": 0.19203822314739227, "step": 3945, "token_acc": 0.9428994082840236 }, { "epoch": 0.8324894514767932, "grad_norm": 0.6328125, "learning_rate": 6.480621960423462e-07, "loss": 0.22454287111759186, "step": 3946, "token_acc": 0.938048090523338 }, { "epoch": 0.8327004219409283, "grad_norm": 0.71875, "learning_rate": 6.479006864875416e-07, "loss": 0.2593839168548584, "step": 3947, "token_acc": 0.9289251383629479 }, { "epoch": 0.8329113924050633, "grad_norm": 0.72265625, "learning_rate": 6.477391600190417e-07, "loss": 0.2662603557109833, "step": 3948, "token_acc": 0.9284441805225653 }, { "epoch": 0.8331223628691983, "grad_norm": 1.0234375, "learning_rate": 6.475776166553184e-07, "loss": 0.28308922052383423, "step": 3949, "token_acc": 0.9240596167494677 }, { "epoch": 0.8333333333333334, "grad_norm": 0.671875, "learning_rate": 6.474160564148454e-07, "loss": 0.26084619760513306, "step": 3950, "token_acc": 0.9276620370370371 }, { "epoch": 0.8335443037974684, "grad_norm": 0.65625, "learning_rate": 6.472544793160988e-07, "loss": 0.25631803274154663, "step": 3951, "token_acc": 0.9278646664538781 }, { "epoch": 0.8337552742616033, "grad_norm": 0.85546875, "learning_rate": 6.47092885377556e-07, "loss": 0.2739582657814026, "step": 3952, "token_acc": 0.9194953439471313 }, { "epoch": 0.8339662447257384, "grad_norm": 0.578125, "learning_rate": 6.469312746176969e-07, "loss": 0.23267674446105957, "step": 3953, "token_acc": 0.933768656716418 }, { "epoch": 0.8341772151898734, "grad_norm": 0.8203125, "learning_rate": 6.467696470550028e-07, "loss": 0.2560337781906128, "step": 3954, "token_acc": 0.9290530636177075 }, { "epoch": 0.8343881856540084, "grad_norm": 0.7578125, "learning_rate": 6.466080027079575e-07, "loss": 0.2677566409111023, "step": 3955, "token_acc": 0.9251925192519251 }, { "epoch": 0.8345991561181435, "grad_norm": 0.765625, "learning_rate": 6.464463415950461e-07, "loss": 0.25645899772644043, "step": 3956, "token_acc": 0.9324828873677661 }, { "epoch": 0.8348101265822785, "grad_norm": 0.734375, "learning_rate": 6.462846637347561e-07, "loss": 0.24950836598873138, "step": 3957, "token_acc": 0.9276018099547512 }, { "epoch": 0.8350210970464135, "grad_norm": 0.828125, "learning_rate": 6.461229691455765e-07, "loss": 0.27104687690734863, "step": 3958, "token_acc": 0.9280798348245011 }, { "epoch": 0.8352320675105486, "grad_norm": 0.71484375, "learning_rate": 6.459612578459987e-07, "loss": 0.2055513560771942, "step": 3959, "token_acc": 0.9366342469790746 }, { "epoch": 0.8354430379746836, "grad_norm": 0.6171875, "learning_rate": 6.457995298545156e-07, "loss": 0.21447184681892395, "step": 3960, "token_acc": 0.9356951473652872 }, { "epoch": 0.8356540084388185, "grad_norm": 0.69140625, "learning_rate": 6.456377851896224e-07, "loss": 0.24882611632347107, "step": 3961, "token_acc": 0.9324612108305446 }, { "epoch": 0.8358649789029536, "grad_norm": 0.66796875, "learning_rate": 6.454760238698155e-07, "loss": 0.26918721199035645, "step": 3962, "token_acc": 0.9250288350634371 }, { "epoch": 0.8360759493670886, "grad_norm": 0.7578125, "learning_rate": 6.453142459135943e-07, "loss": 0.26664412021636963, "step": 3963, "token_acc": 0.9265785609397944 }, { "epoch": 0.8362869198312236, "grad_norm": 0.88671875, "learning_rate": 6.451524513394589e-07, "loss": 0.2968097925186157, "step": 3964, "token_acc": 0.9287539936102236 }, { "epoch": 0.8364978902953587, "grad_norm": 0.65625, "learning_rate": 6.449906401659125e-07, "loss": 0.27162426710128784, "step": 3965, "token_acc": 0.9238375200427579 }, { "epoch": 0.8367088607594937, "grad_norm": 0.80078125, "learning_rate": 6.448288124114587e-07, "loss": 0.2802884578704834, "step": 3966, "token_acc": 0.9211630695443646 }, { "epoch": 0.8369198312236287, "grad_norm": 0.625, "learning_rate": 6.446669680946046e-07, "loss": 0.277859628200531, "step": 3967, "token_acc": 0.9259823332318002 }, { "epoch": 0.8371308016877637, "grad_norm": 0.69921875, "learning_rate": 6.445051072338583e-07, "loss": 0.21875625848770142, "step": 3968, "token_acc": 0.9410931747380346 }, { "epoch": 0.8373417721518988, "grad_norm": 0.703125, "learning_rate": 6.443432298477299e-07, "loss": 0.2868942618370056, "step": 3969, "token_acc": 0.922421661089139 }, { "epoch": 0.8375527426160337, "grad_norm": 0.671875, "learning_rate": 6.441813359547313e-07, "loss": 0.22499072551727295, "step": 3970, "token_acc": 0.9321218599679316 }, { "epoch": 0.8377637130801687, "grad_norm": 0.66015625, "learning_rate": 6.440194255733768e-07, "loss": 0.2296089380979538, "step": 3971, "token_acc": 0.9370567375886525 }, { "epoch": 0.8379746835443038, "grad_norm": 0.78125, "learning_rate": 6.43857498722182e-07, "loss": 0.2627893388271332, "step": 3972, "token_acc": 0.9245115452930728 }, { "epoch": 0.8381856540084388, "grad_norm": 0.73046875, "learning_rate": 6.436955554196644e-07, "loss": 0.24663309752941132, "step": 3973, "token_acc": 0.9267788603483909 }, { "epoch": 0.8383966244725738, "grad_norm": 0.76171875, "learning_rate": 6.435335956843439e-07, "loss": 0.27100545167922974, "step": 3974, "token_acc": 0.927940757618912 }, { "epoch": 0.8386075949367089, "grad_norm": 0.69921875, "learning_rate": 6.433716195347421e-07, "loss": 0.2786221504211426, "step": 3975, "token_acc": 0.9308885754583921 }, { "epoch": 0.8388185654008439, "grad_norm": 0.703125, "learning_rate": 6.432096269893818e-07, "loss": 0.23809830844402313, "step": 3976, "token_acc": 0.9304645198062126 }, { "epoch": 0.8390295358649789, "grad_norm": 0.734375, "learning_rate": 6.430476180667888e-07, "loss": 0.26453155279159546, "step": 3977, "token_acc": 0.9257698541329011 }, { "epoch": 0.839240506329114, "grad_norm": 0.75, "learning_rate": 6.428855927854897e-07, "loss": 0.27957895398139954, "step": 3978, "token_acc": 0.9273451870018393 }, { "epoch": 0.8394514767932489, "grad_norm": 1.6875, "learning_rate": 6.427235511640137e-07, "loss": 0.23777717351913452, "step": 3979, "token_acc": 0.9330232558139535 }, { "epoch": 0.8396624472573839, "grad_norm": 0.80078125, "learning_rate": 6.42561493220892e-07, "loss": 0.28891676664352417, "step": 3980, "token_acc": 0.9276375798114929 }, { "epoch": 0.839873417721519, "grad_norm": 0.83203125, "learning_rate": 6.423994189746566e-07, "loss": 0.29886090755462646, "step": 3981, "token_acc": 0.9226925338036449 }, { "epoch": 0.840084388185654, "grad_norm": 0.7109375, "learning_rate": 6.422373284438424e-07, "loss": 0.24945876002311707, "step": 3982, "token_acc": 0.933082271147161 }, { "epoch": 0.840295358649789, "grad_norm": 0.66015625, "learning_rate": 6.420752216469858e-07, "loss": 0.2574917674064636, "step": 3983, "token_acc": 0.9284598562949078 }, { "epoch": 0.8405063291139241, "grad_norm": 0.66796875, "learning_rate": 6.419130986026251e-07, "loss": 0.2568361461162567, "step": 3984, "token_acc": 0.9307210031347962 }, { "epoch": 0.8407172995780591, "grad_norm": 1.140625, "learning_rate": 6.417509593293006e-07, "loss": 0.23255518078804016, "step": 3985, "token_acc": 0.9300341296928327 }, { "epoch": 0.8409282700421941, "grad_norm": 0.6875, "learning_rate": 6.415888038455538e-07, "loss": 0.2272457331418991, "step": 3986, "token_acc": 0.9336861768368617 }, { "epoch": 0.8411392405063292, "grad_norm": 0.67578125, "learning_rate": 6.414266321699291e-07, "loss": 0.23200049996376038, "step": 3987, "token_acc": 0.9322139303482587 }, { "epoch": 0.8413502109704641, "grad_norm": 0.671875, "learning_rate": 6.41264444320972e-07, "loss": 0.2394973784685135, "step": 3988, "token_acc": 0.9310046189376443 }, { "epoch": 0.8415611814345991, "grad_norm": 0.58984375, "learning_rate": 6.411022403172299e-07, "loss": 0.2145586907863617, "step": 3989, "token_acc": 0.9423522595596755 }, { "epoch": 0.8417721518987342, "grad_norm": 0.8203125, "learning_rate": 6.409400201772524e-07, "loss": 0.25166648626327515, "step": 3990, "token_acc": 0.9234129295282469 }, { "epoch": 0.8419831223628692, "grad_norm": 0.65234375, "learning_rate": 6.407777839195908e-07, "loss": 0.21799296140670776, "step": 3991, "token_acc": 0.9328051643192489 }, { "epoch": 0.8421940928270042, "grad_norm": 0.66796875, "learning_rate": 6.40615531562798e-07, "loss": 0.22338657081127167, "step": 3992, "token_acc": 0.9342629482071713 }, { "epoch": 0.8424050632911393, "grad_norm": 0.6953125, "learning_rate": 6.40453263125429e-07, "loss": 0.2609310746192932, "step": 3993, "token_acc": 0.9242424242424242 }, { "epoch": 0.8426160337552743, "grad_norm": 0.88671875, "learning_rate": 6.402909786260406e-07, "loss": 0.2577947974205017, "step": 3994, "token_acc": 0.9258555133079848 }, { "epoch": 0.8428270042194093, "grad_norm": 0.62890625, "learning_rate": 6.401286780831914e-07, "loss": 0.2324633002281189, "step": 3995, "token_acc": 0.930834059866318 }, { "epoch": 0.8430379746835444, "grad_norm": 0.87109375, "learning_rate": 6.399663615154417e-07, "loss": 0.27545166015625, "step": 3996, "token_acc": 0.9233870967741935 }, { "epoch": 0.8432489451476793, "grad_norm": 0.76171875, "learning_rate": 6.39804028941354e-07, "loss": 0.2462514042854309, "step": 3997, "token_acc": 0.9251863684771033 }, { "epoch": 0.8434599156118143, "grad_norm": 0.69140625, "learning_rate": 6.396416803794921e-07, "loss": 0.2465493083000183, "step": 3998, "token_acc": 0.9317258195610946 }, { "epoch": 0.8436708860759494, "grad_norm": 0.69921875, "learning_rate": 6.394793158484225e-07, "loss": 0.2563038468360901, "step": 3999, "token_acc": 0.9270976616231087 }, { "epoch": 0.8438818565400844, "grad_norm": 0.65234375, "learning_rate": 6.393169353667124e-07, "loss": 0.25324690341949463, "step": 4000, "token_acc": 0.9291569086651054 }, { "epoch": 0.8438818565400844, "eval_loss": 0.43371912837028503, "eval_runtime": 245.4646, "eval_samples_per_second": 137.311, "eval_steps_per_second": 2.147, "eval_token_acc": 0.8990801399982051, "step": 4000 }, { "epoch": 0.8440928270042194, "grad_norm": 0.70703125, "learning_rate": 6.391545389529314e-07, "loss": 0.2749151587486267, "step": 4001, "token_acc": 0.9299941417691857 }, { "epoch": 0.8443037974683544, "grad_norm": 0.70703125, "learning_rate": 6.389921266256513e-07, "loss": 0.24990534782409668, "step": 4002, "token_acc": 0.9284128745837957 }, { "epoch": 0.8445147679324895, "grad_norm": 0.8515625, "learning_rate": 6.38829698403445e-07, "loss": 0.2420434057712555, "step": 4003, "token_acc": 0.9279331219371577 }, { "epoch": 0.8447257383966245, "grad_norm": 0.6171875, "learning_rate": 6.386672543048878e-07, "loss": 0.2700849771499634, "step": 4004, "token_acc": 0.9280892280598767 }, { "epoch": 0.8449367088607594, "grad_norm": 0.6875, "learning_rate": 6.385047943485563e-07, "loss": 0.2698262929916382, "step": 4005, "token_acc": 0.9224806201550387 }, { "epoch": 0.8451476793248945, "grad_norm": 0.7109375, "learning_rate": 6.383423185530292e-07, "loss": 0.2685922384262085, "step": 4006, "token_acc": 0.9281609195402298 }, { "epoch": 0.8453586497890295, "grad_norm": 1.03125, "learning_rate": 6.381798269368871e-07, "loss": 0.28427624702453613, "step": 4007, "token_acc": 0.9218662169758292 }, { "epoch": 0.8455696202531645, "grad_norm": 0.71484375, "learning_rate": 6.380173195187122e-07, "loss": 0.229273721575737, "step": 4008, "token_acc": 0.9344210856604958 }, { "epoch": 0.8457805907172996, "grad_norm": 0.6796875, "learning_rate": 6.378547963170887e-07, "loss": 0.24727031588554382, "step": 4009, "token_acc": 0.9304625199362041 }, { "epoch": 0.8459915611814346, "grad_norm": 0.73828125, "learning_rate": 6.376922573506025e-07, "loss": 0.2529195547103882, "step": 4010, "token_acc": 0.9266955266955267 }, { "epoch": 0.8462025316455696, "grad_norm": 0.82421875, "learning_rate": 6.375297026378412e-07, "loss": 0.25183016061782837, "step": 4011, "token_acc": 0.9292958562735439 }, { "epoch": 0.8464135021097047, "grad_norm": 0.71875, "learning_rate": 6.373671321973944e-07, "loss": 0.2100679874420166, "step": 4012, "token_acc": 0.9414098260604211 }, { "epoch": 0.8466244725738397, "grad_norm": 0.765625, "learning_rate": 6.372045460478532e-07, "loss": 0.28501707315444946, "step": 4013, "token_acc": 0.9220743958729297 }, { "epoch": 0.8468354430379746, "grad_norm": 0.62109375, "learning_rate": 6.37041944207811e-07, "loss": 0.22804370522499084, "step": 4014, "token_acc": 0.9354936402180497 }, { "epoch": 0.8470464135021097, "grad_norm": 0.6640625, "learning_rate": 6.368793266958625e-07, "loss": 0.2854589819908142, "step": 4015, "token_acc": 0.9223911903513372 }, { "epoch": 0.8472573839662447, "grad_norm": 0.6328125, "learning_rate": 6.367166935306048e-07, "loss": 0.24029400944709778, "step": 4016, "token_acc": 0.9272829763246899 }, { "epoch": 0.8474683544303797, "grad_norm": 0.6953125, "learning_rate": 6.365540447306355e-07, "loss": 0.23028738796710968, "step": 4017, "token_acc": 0.9340396445659603 }, { "epoch": 0.8476793248945148, "grad_norm": 0.87890625, "learning_rate": 6.363913803145557e-07, "loss": 0.2532397210597992, "step": 4018, "token_acc": 0.9285287081339713 }, { "epoch": 0.8478902953586498, "grad_norm": 0.625, "learning_rate": 6.362287003009672e-07, "loss": 0.22738203406333923, "step": 4019, "token_acc": 0.9304767309875142 }, { "epoch": 0.8481012658227848, "grad_norm": 0.72265625, "learning_rate": 6.360660047084736e-07, "loss": 0.2883397936820984, "step": 4020, "token_acc": 0.9223065250379363 }, { "epoch": 0.8483122362869199, "grad_norm": 0.65234375, "learning_rate": 6.359032935556808e-07, "loss": 0.21398988366127014, "step": 4021, "token_acc": 0.9430516867842773 }, { "epoch": 0.8485232067510549, "grad_norm": 0.68359375, "learning_rate": 6.357405668611962e-07, "loss": 0.2394147515296936, "step": 4022, "token_acc": 0.9319629415170817 }, { "epoch": 0.8487341772151898, "grad_norm": 0.67578125, "learning_rate": 6.355778246436286e-07, "loss": 0.2525699734687805, "step": 4023, "token_acc": 0.9300316729052692 }, { "epoch": 0.8489451476793249, "grad_norm": 0.703125, "learning_rate": 6.354150669215896e-07, "loss": 0.23859259486198425, "step": 4024, "token_acc": 0.9332210998877666 }, { "epoch": 0.8491561181434599, "grad_norm": 0.7265625, "learning_rate": 6.352522937136913e-07, "loss": 0.28634679317474365, "step": 4025, "token_acc": 0.9241084165477889 }, { "epoch": 0.8493670886075949, "grad_norm": 0.76171875, "learning_rate": 6.350895050385485e-07, "loss": 0.24875682592391968, "step": 4026, "token_acc": 0.9309080654258319 }, { "epoch": 0.84957805907173, "grad_norm": 0.640625, "learning_rate": 6.349267009147774e-07, "loss": 0.22554785013198853, "step": 4027, "token_acc": 0.933997509339975 }, { "epoch": 0.849789029535865, "grad_norm": 0.80078125, "learning_rate": 6.347638813609962e-07, "loss": 0.3481491208076477, "step": 4028, "token_acc": 0.9139344262295082 }, { "epoch": 0.85, "grad_norm": 0.8984375, "learning_rate": 6.346010463958242e-07, "loss": 0.21887174248695374, "step": 4029, "token_acc": 0.9430670339761249 }, { "epoch": 0.8502109704641351, "grad_norm": 0.6484375, "learning_rate": 6.344381960378835e-07, "loss": 0.23873837292194366, "step": 4030, "token_acc": 0.928354584092289 }, { "epoch": 0.8504219409282701, "grad_norm": 0.62890625, "learning_rate": 6.342753303057974e-07, "loss": 0.24963217973709106, "step": 4031, "token_acc": 0.9254255606592813 }, { "epoch": 0.850632911392405, "grad_norm": 0.671875, "learning_rate": 6.341124492181905e-07, "loss": 0.217873215675354, "step": 4032, "token_acc": 0.9429594929732709 }, { "epoch": 0.85084388185654, "grad_norm": 0.6328125, "learning_rate": 6.3394955279369e-07, "loss": 0.2454797625541687, "step": 4033, "token_acc": 0.9345002823263693 }, { "epoch": 0.8510548523206751, "grad_norm": 0.76171875, "learning_rate": 6.337866410509244e-07, "loss": 0.2986906170845032, "step": 4034, "token_acc": 0.9169600938967136 }, { "epoch": 0.8512658227848101, "grad_norm": 0.625, "learning_rate": 6.33623714008524e-07, "loss": 0.25454509258270264, "step": 4035, "token_acc": 0.9271683288002418 }, { "epoch": 0.8514767932489451, "grad_norm": 0.80078125, "learning_rate": 6.334607716851211e-07, "loss": 0.26961055397987366, "step": 4036, "token_acc": 0.926006528835691 }, { "epoch": 0.8516877637130802, "grad_norm": 0.6953125, "learning_rate": 6.33297814099349e-07, "loss": 0.295684278011322, "step": 4037, "token_acc": 0.9138373751783166 }, { "epoch": 0.8518987341772152, "grad_norm": 0.9140625, "learning_rate": 6.331348412698439e-07, "loss": 0.28541135787963867, "step": 4038, "token_acc": 0.9265569917743831 }, { "epoch": 0.8521097046413502, "grad_norm": 0.6953125, "learning_rate": 6.329718532152428e-07, "loss": 0.2573990225791931, "step": 4039, "token_acc": 0.9276573517889789 }, { "epoch": 0.8523206751054853, "grad_norm": 0.71484375, "learning_rate": 6.328088499541849e-07, "loss": 0.2427927553653717, "step": 4040, "token_acc": 0.9306480920654149 }, { "epoch": 0.8525316455696202, "grad_norm": 0.60546875, "learning_rate": 6.326458315053107e-07, "loss": 0.24568727612495422, "step": 4041, "token_acc": 0.9323737099402498 }, { "epoch": 0.8527426160337552, "grad_norm": 0.64453125, "learning_rate": 6.324827978872631e-07, "loss": 0.24748145043849945, "step": 4042, "token_acc": 0.9305789769533446 }, { "epoch": 0.8529535864978903, "grad_norm": 0.71484375, "learning_rate": 6.323197491186864e-07, "loss": 0.27441835403442383, "step": 4043, "token_acc": 0.9257857974388825 }, { "epoch": 0.8531645569620253, "grad_norm": 0.87890625, "learning_rate": 6.321566852182261e-07, "loss": 0.27532127499580383, "step": 4044, "token_acc": 0.9201210287443268 }, { "epoch": 0.8533755274261603, "grad_norm": 0.68359375, "learning_rate": 6.319936062045304e-07, "loss": 0.24706800282001495, "step": 4045, "token_acc": 0.9343106850859881 }, { "epoch": 0.8535864978902954, "grad_norm": 0.72265625, "learning_rate": 6.318305120962488e-07, "loss": 0.27837932109832764, "step": 4046, "token_acc": 0.9215258855585831 }, { "epoch": 0.8537974683544304, "grad_norm": 2.203125, "learning_rate": 6.316674029120323e-07, "loss": 0.2541462182998657, "step": 4047, "token_acc": 0.92520035618878 }, { "epoch": 0.8540084388185654, "grad_norm": 0.63671875, "learning_rate": 6.315042786705338e-07, "loss": 0.2086646556854248, "step": 4048, "token_acc": 0.9423808131032466 }, { "epoch": 0.8542194092827005, "grad_norm": 1.890625, "learning_rate": 6.313411393904079e-07, "loss": 0.24628953635692596, "step": 4049, "token_acc": 0.9371544951993017 }, { "epoch": 0.8544303797468354, "grad_norm": 0.765625, "learning_rate": 6.311779850903112e-07, "loss": 0.2605888843536377, "step": 4050, "token_acc": 0.9255493604460479 }, { "epoch": 0.8546413502109704, "grad_norm": 0.66015625, "learning_rate": 6.310148157889016e-07, "loss": 0.2797889709472656, "step": 4051, "token_acc": 0.9228395061728395 }, { "epoch": 0.8548523206751055, "grad_norm": 0.55859375, "learning_rate": 6.308516315048389e-07, "loss": 0.24685141444206238, "step": 4052, "token_acc": 0.9282934963868816 }, { "epoch": 0.8550632911392405, "grad_norm": 0.73828125, "learning_rate": 6.306884322567845e-07, "loss": 0.23584289848804474, "step": 4053, "token_acc": 0.9354463506833381 }, { "epoch": 0.8552742616033755, "grad_norm": 0.671875, "learning_rate": 6.305252180634018e-07, "loss": 0.25034475326538086, "step": 4054, "token_acc": 0.9323731997495304 }, { "epoch": 0.8554852320675106, "grad_norm": 0.75390625, "learning_rate": 6.303619889433558e-07, "loss": 0.2421070635318756, "step": 4055, "token_acc": 0.9346833998859099 }, { "epoch": 0.8556962025316456, "grad_norm": 0.67578125, "learning_rate": 6.301987449153129e-07, "loss": 0.2173137664794922, "step": 4056, "token_acc": 0.9392056201026749 }, { "epoch": 0.8559071729957806, "grad_norm": 0.72265625, "learning_rate": 6.300354859979413e-07, "loss": 0.28232231736183167, "step": 4057, "token_acc": 0.918398533007335 }, { "epoch": 0.8561181434599157, "grad_norm": 0.953125, "learning_rate": 6.298722122099116e-07, "loss": 0.30284732580184937, "step": 4058, "token_acc": 0.9167449139280125 }, { "epoch": 0.8563291139240506, "grad_norm": 0.6953125, "learning_rate": 6.29708923569895e-07, "loss": 0.2498965859413147, "step": 4059, "token_acc": 0.9313209637219607 }, { "epoch": 0.8565400843881856, "grad_norm": 0.7109375, "learning_rate": 6.295456200965652e-07, "loss": 0.2922337055206299, "step": 4060, "token_acc": 0.9208219178082192 }, { "epoch": 0.8567510548523207, "grad_norm": 0.92578125, "learning_rate": 6.293823018085973e-07, "loss": 0.2691463232040405, "step": 4061, "token_acc": 0.9242501270971022 }, { "epoch": 0.8569620253164557, "grad_norm": 0.734375, "learning_rate": 6.292189687246681e-07, "loss": 0.23617780208587646, "step": 4062, "token_acc": 0.9292988070992144 }, { "epoch": 0.8571729957805907, "grad_norm": 0.66796875, "learning_rate": 6.290556208634562e-07, "loss": 0.22488857805728912, "step": 4063, "token_acc": 0.9367311072056239 }, { "epoch": 0.8573839662447258, "grad_norm": 0.71875, "learning_rate": 6.288922582436415e-07, "loss": 0.25236254930496216, "step": 4064, "token_acc": 0.9259367681498829 }, { "epoch": 0.8575949367088608, "grad_norm": 0.8515625, "learning_rate": 6.287288808839064e-07, "loss": 0.28328806161880493, "step": 4065, "token_acc": 0.9219088937093276 }, { "epoch": 0.8578059071729958, "grad_norm": 1.1171875, "learning_rate": 6.28565488802934e-07, "loss": 0.22331370413303375, "step": 4066, "token_acc": 0.9407265774378585 }, { "epoch": 0.8580168776371307, "grad_norm": 0.7265625, "learning_rate": 6.2840208201941e-07, "loss": 0.3170815110206604, "step": 4067, "token_acc": 0.9153687922421393 }, { "epoch": 0.8582278481012658, "grad_norm": 0.70703125, "learning_rate": 6.282386605520211e-07, "loss": 0.2890605926513672, "step": 4068, "token_acc": 0.9237190558434082 }, { "epoch": 0.8584388185654008, "grad_norm": 0.8203125, "learning_rate": 6.280752244194557e-07, "loss": 0.30230003595352173, "step": 4069, "token_acc": 0.9161448407296487 }, { "epoch": 0.8586497890295358, "grad_norm": 0.609375, "learning_rate": 6.279117736404048e-07, "loss": 0.22874580323696136, "step": 4070, "token_acc": 0.9360783184566657 }, { "epoch": 0.8588607594936709, "grad_norm": 0.9453125, "learning_rate": 6.277483082335595e-07, "loss": 0.24095237255096436, "step": 4071, "token_acc": 0.9326676176890157 }, { "epoch": 0.8590717299578059, "grad_norm": 0.73828125, "learning_rate": 6.275848282176141e-07, "loss": 0.27633145451545715, "step": 4072, "token_acc": 0.9181870011402509 }, { "epoch": 0.8592827004219409, "grad_norm": 0.7578125, "learning_rate": 6.274213336112637e-07, "loss": 0.2621336579322815, "step": 4073, "token_acc": 0.9301169590643275 }, { "epoch": 0.859493670886076, "grad_norm": 0.8359375, "learning_rate": 6.272578244332054e-07, "loss": 0.3243217468261719, "step": 4074, "token_acc": 0.9198167239404352 }, { "epoch": 0.859704641350211, "grad_norm": 0.79296875, "learning_rate": 6.270943007021378e-07, "loss": 0.26706814765930176, "step": 4075, "token_acc": 0.9287080032777929 }, { "epoch": 0.859915611814346, "grad_norm": 0.75390625, "learning_rate": 6.269307624367611e-07, "loss": 0.22295701503753662, "step": 4076, "token_acc": 0.9379194630872483 }, { "epoch": 0.860126582278481, "grad_norm": 0.6640625, "learning_rate": 6.267672096557774e-07, "loss": 0.22589023411273956, "step": 4077, "token_acc": 0.9374149659863945 }, { "epoch": 0.860337552742616, "grad_norm": 0.72265625, "learning_rate": 6.266036423778903e-07, "loss": 0.23850886523723602, "step": 4078, "token_acc": 0.9335260115606936 }, { "epoch": 0.860548523206751, "grad_norm": 0.76171875, "learning_rate": 6.264400606218051e-07, "loss": 0.2906739413738251, "step": 4079, "token_acc": 0.9235970250169033 }, { "epoch": 0.8607594936708861, "grad_norm": 0.82421875, "learning_rate": 6.262764644062289e-07, "loss": 0.27760887145996094, "step": 4080, "token_acc": 0.9202920596288409 }, { "epoch": 0.8609704641350211, "grad_norm": 0.87109375, "learning_rate": 6.261128537498701e-07, "loss": 0.29703396558761597, "step": 4081, "token_acc": 0.9251968503937008 }, { "epoch": 0.8611814345991561, "grad_norm": 0.7890625, "learning_rate": 6.259492286714392e-07, "loss": 0.24846400320529938, "step": 4082, "token_acc": 0.9291163382072473 }, { "epoch": 0.8613924050632912, "grad_norm": 0.59765625, "learning_rate": 6.257855891896479e-07, "loss": 0.26180729269981384, "step": 4083, "token_acc": 0.9316396018858041 }, { "epoch": 0.8616033755274262, "grad_norm": 0.62890625, "learning_rate": 6.256219353232097e-07, "loss": 0.2206679731607437, "step": 4084, "token_acc": 0.9381009615384616 }, { "epoch": 0.8618143459915611, "grad_norm": 0.57421875, "learning_rate": 6.2545826709084e-07, "loss": 0.23405548930168152, "step": 4085, "token_acc": 0.9296895728309896 }, { "epoch": 0.8620253164556962, "grad_norm": 0.62109375, "learning_rate": 6.252945845112558e-07, "loss": 0.23339158296585083, "step": 4086, "token_acc": 0.9363073749355337 }, { "epoch": 0.8622362869198312, "grad_norm": 0.64453125, "learning_rate": 6.251308876031752e-07, "loss": 0.2242586463689804, "step": 4087, "token_acc": 0.9317455707232065 }, { "epoch": 0.8624472573839662, "grad_norm": 0.65234375, "learning_rate": 6.249671763853183e-07, "loss": 0.2569422125816345, "step": 4088, "token_acc": 0.9347557204700062 }, { "epoch": 0.8626582278481013, "grad_norm": 0.625, "learning_rate": 6.248034508764075e-07, "loss": 0.22514408826828003, "step": 4089, "token_acc": 0.9332932692307693 }, { "epoch": 0.8628691983122363, "grad_norm": 0.7265625, "learning_rate": 6.246397110951656e-07, "loss": 0.25419875979423523, "step": 4090, "token_acc": 0.928718703976436 }, { "epoch": 0.8630801687763713, "grad_norm": 0.8359375, "learning_rate": 6.244759570603177e-07, "loss": 0.250669002532959, "step": 4091, "token_acc": 0.9290869155946031 }, { "epoch": 0.8632911392405064, "grad_norm": 0.625, "learning_rate": 6.243121887905905e-07, "loss": 0.2381683588027954, "step": 4092, "token_acc": 0.937048801617095 }, { "epoch": 0.8635021097046414, "grad_norm": 0.73828125, "learning_rate": 6.241484063047124e-07, "loss": 0.24533361196517944, "step": 4093, "token_acc": 0.929968812021548 }, { "epoch": 0.8637130801687763, "grad_norm": 0.703125, "learning_rate": 6.239846096214134e-07, "loss": 0.2539099454879761, "step": 4094, "token_acc": 0.9323753169907016 }, { "epoch": 0.8639240506329114, "grad_norm": 0.84375, "learning_rate": 6.238207987594249e-07, "loss": 0.29992061853408813, "step": 4095, "token_acc": 0.9155581260844419 }, { "epoch": 0.8641350210970464, "grad_norm": 0.69921875, "learning_rate": 6.236569737374799e-07, "loss": 0.21126480400562286, "step": 4096, "token_acc": 0.9335508751112429 }, { "epoch": 0.8643459915611814, "grad_norm": 1.0078125, "learning_rate": 6.234931345743133e-07, "loss": 0.24169303476810455, "step": 4097, "token_acc": 0.9324667985306584 }, { "epoch": 0.8645569620253165, "grad_norm": 0.64453125, "learning_rate": 6.233292812886616e-07, "loss": 0.23757483065128326, "step": 4098, "token_acc": 0.9276901004304161 }, { "epoch": 0.8647679324894515, "grad_norm": 0.75390625, "learning_rate": 6.231654138992628e-07, "loss": 0.23772116005420685, "step": 4099, "token_acc": 0.9333144956202317 }, { "epoch": 0.8649789029535865, "grad_norm": 0.72265625, "learning_rate": 6.230015324248563e-07, "loss": 0.21935242414474487, "step": 4100, "token_acc": 0.940925700365408 }, { "epoch": 0.8651898734177215, "grad_norm": 0.78515625, "learning_rate": 6.228376368841835e-07, "loss": 0.32475346326828003, "step": 4101, "token_acc": 0.9176262178919398 }, { "epoch": 0.8654008438818566, "grad_norm": 0.6875, "learning_rate": 6.226737272959875e-07, "loss": 0.23204733431339264, "step": 4102, "token_acc": 0.9330396475770925 }, { "epoch": 0.8656118143459915, "grad_norm": 0.796875, "learning_rate": 6.225098036790122e-07, "loss": 0.2298852503299713, "step": 4103, "token_acc": 0.9341109709962169 }, { "epoch": 0.8658227848101265, "grad_norm": 0.7109375, "learning_rate": 6.22345866052004e-07, "loss": 0.2577894330024719, "step": 4104, "token_acc": 0.925531914893617 }, { "epoch": 0.8660337552742616, "grad_norm": 0.6328125, "learning_rate": 6.221819144337105e-07, "loss": 0.23886854946613312, "step": 4105, "token_acc": 0.9325384374019454 }, { "epoch": 0.8662447257383966, "grad_norm": 0.69921875, "learning_rate": 6.22017948842881e-07, "loss": 0.24876929819583893, "step": 4106, "token_acc": 0.9355296790684464 }, { "epoch": 0.8664556962025316, "grad_norm": 0.7109375, "learning_rate": 6.218539692982663e-07, "loss": 0.2213858813047409, "step": 4107, "token_acc": 0.9365609348914858 }, { "epoch": 0.8666666666666667, "grad_norm": 0.87890625, "learning_rate": 6.216899758186187e-07, "loss": 0.3000623285770416, "step": 4108, "token_acc": 0.9200603318250377 }, { "epoch": 0.8668776371308017, "grad_norm": 0.6484375, "learning_rate": 6.215259684226926e-07, "loss": 0.24768386781215668, "step": 4109, "token_acc": 0.9304527966854099 }, { "epoch": 0.8670886075949367, "grad_norm": 1.3046875, "learning_rate": 6.213619471292433e-07, "loss": 0.29407799243927, "step": 4110, "token_acc": 0.9195084485407066 }, { "epoch": 0.8672995780590718, "grad_norm": 0.66015625, "learning_rate": 6.211979119570284e-07, "loss": 0.23583140969276428, "step": 4111, "token_acc": 0.9303761931499158 }, { "epoch": 0.8675105485232067, "grad_norm": 0.66015625, "learning_rate": 6.21033862924806e-07, "loss": 0.28180259466171265, "step": 4112, "token_acc": 0.9215627796003579 }, { "epoch": 0.8677215189873417, "grad_norm": 0.74609375, "learning_rate": 6.208698000513372e-07, "loss": 0.27399757504463196, "step": 4113, "token_acc": 0.927065527065527 }, { "epoch": 0.8679324894514768, "grad_norm": 0.69921875, "learning_rate": 6.207057233553837e-07, "loss": 0.2254963219165802, "step": 4114, "token_acc": 0.9318809450751764 }, { "epoch": 0.8681434599156118, "grad_norm": 1.1796875, "learning_rate": 6.205416328557092e-07, "loss": 0.257060706615448, "step": 4115, "token_acc": 0.9296116504854369 }, { "epoch": 0.8683544303797468, "grad_norm": 0.79296875, "learning_rate": 6.203775285710785e-07, "loss": 0.2546718418598175, "step": 4116, "token_acc": 0.931855233126834 }, { "epoch": 0.8685654008438819, "grad_norm": 0.671875, "learning_rate": 6.202134105202587e-07, "loss": 0.25170835852622986, "step": 4117, "token_acc": 0.9231692677070829 }, { "epoch": 0.8687763713080169, "grad_norm": 0.89453125, "learning_rate": 6.200492787220178e-07, "loss": 0.24876196682453156, "step": 4118, "token_acc": 0.9244532803180915 }, { "epoch": 0.8689873417721519, "grad_norm": 0.67578125, "learning_rate": 6.198851331951261e-07, "loss": 0.2943386137485504, "step": 4119, "token_acc": 0.9190295235311312 }, { "epoch": 0.869198312236287, "grad_norm": 0.85546875, "learning_rate": 6.197209739583542e-07, "loss": 0.27560490369796753, "step": 4120, "token_acc": 0.9269821372610467 }, { "epoch": 0.869409282700422, "grad_norm": 0.67578125, "learning_rate": 6.195568010304761e-07, "loss": 0.238186314702034, "step": 4121, "token_acc": 0.9350782653323069 }, { "epoch": 0.8696202531645569, "grad_norm": 0.734375, "learning_rate": 6.193926144302657e-07, "loss": 0.2685512900352478, "step": 4122, "token_acc": 0.9218492019812878 }, { "epoch": 0.869831223628692, "grad_norm": 0.73046875, "learning_rate": 6.192284141764993e-07, "loss": 0.2898721992969513, "step": 4123, "token_acc": 0.9181845326963273 }, { "epoch": 0.870042194092827, "grad_norm": 0.63671875, "learning_rate": 6.190642002879545e-07, "loss": 0.21817117929458618, "step": 4124, "token_acc": 0.9383025367992484 }, { "epoch": 0.870253164556962, "grad_norm": 0.59375, "learning_rate": 6.188999727834107e-07, "loss": 0.2810812294483185, "step": 4125, "token_acc": 0.9233682983682984 }, { "epoch": 0.8704641350210971, "grad_norm": 0.87109375, "learning_rate": 6.187357316816488e-07, "loss": 0.28047841787338257, "step": 4126, "token_acc": 0.9229984701682815 }, { "epoch": 0.8706751054852321, "grad_norm": 0.8984375, "learning_rate": 6.185714770014509e-07, "loss": 0.24389293789863586, "step": 4127, "token_acc": 0.927810650887574 }, { "epoch": 0.8708860759493671, "grad_norm": 0.65234375, "learning_rate": 6.184072087616012e-07, "loss": 0.24977731704711914, "step": 4128, "token_acc": 0.9366090385198665 }, { "epoch": 0.8710970464135022, "grad_norm": 0.65625, "learning_rate": 6.182429269808846e-07, "loss": 0.27298176288604736, "step": 4129, "token_acc": 0.9266666666666666 }, { "epoch": 0.8713080168776371, "grad_norm": 0.70703125, "learning_rate": 6.180786316780888e-07, "loss": 0.25833290815353394, "step": 4130, "token_acc": 0.9311475409836065 }, { "epoch": 0.8715189873417721, "grad_norm": 0.6484375, "learning_rate": 6.179143228720021e-07, "loss": 0.2458360344171524, "step": 4131, "token_acc": 0.9304062126642771 }, { "epoch": 0.8717299578059071, "grad_norm": 0.67578125, "learning_rate": 6.177500005814143e-07, "loss": 0.23436272144317627, "step": 4132, "token_acc": 0.9314420803782506 }, { "epoch": 0.8719409282700422, "grad_norm": 0.8125, "learning_rate": 6.175856648251176e-07, "loss": 0.2540491819381714, "step": 4133, "token_acc": 0.9280426413976902 }, { "epoch": 0.8721518987341772, "grad_norm": 0.734375, "learning_rate": 6.174213156219047e-07, "loss": 0.27026164531707764, "step": 4134, "token_acc": 0.9261418853255587 }, { "epoch": 0.8723628691983122, "grad_norm": 0.61328125, "learning_rate": 6.172569529905704e-07, "loss": 0.2616317868232727, "step": 4135, "token_acc": 0.928118975488846 }, { "epoch": 0.8725738396624473, "grad_norm": 0.6171875, "learning_rate": 6.170925769499113e-07, "loss": 0.20361138880252838, "step": 4136, "token_acc": 0.9358663433036917 }, { "epoch": 0.8727848101265823, "grad_norm": 0.6875, "learning_rate": 6.169281875187249e-07, "loss": 0.2533760070800781, "step": 4137, "token_acc": 0.9307185800171772 }, { "epoch": 0.8729957805907173, "grad_norm": 0.7109375, "learning_rate": 6.167637847158107e-07, "loss": 0.2632371485233307, "step": 4138, "token_acc": 0.926261319534282 }, { "epoch": 0.8732067510548523, "grad_norm": 0.68359375, "learning_rate": 6.165993685599692e-07, "loss": 0.22066712379455566, "step": 4139, "token_acc": 0.9332612807349365 }, { "epoch": 0.8734177215189873, "grad_norm": 0.875, "learning_rate": 6.164349390700031e-07, "loss": 0.3172043561935425, "step": 4140, "token_acc": 0.9124893797790994 }, { "epoch": 0.8736286919831223, "grad_norm": 0.671875, "learning_rate": 6.162704962647163e-07, "loss": 0.22815823554992676, "step": 4141, "token_acc": 0.9368836291913215 }, { "epoch": 0.8738396624472574, "grad_norm": 0.98046875, "learning_rate": 6.16106040162914e-07, "loss": 0.3069443702697754, "step": 4142, "token_acc": 0.9129970643181211 }, { "epoch": 0.8740506329113924, "grad_norm": 0.74609375, "learning_rate": 6.159415707834033e-07, "loss": 0.24851232767105103, "step": 4143, "token_acc": 0.9312745988495307 }, { "epoch": 0.8742616033755274, "grad_norm": 0.98828125, "learning_rate": 6.157770881449929e-07, "loss": 0.2722988724708557, "step": 4144, "token_acc": 0.9251101321585903 }, { "epoch": 0.8744725738396625, "grad_norm": 0.65234375, "learning_rate": 6.156125922664925e-07, "loss": 0.23378847539424896, "step": 4145, "token_acc": 0.9338280766852195 }, { "epoch": 0.8746835443037975, "grad_norm": 0.67578125, "learning_rate": 6.154480831667133e-07, "loss": 0.22901234030723572, "step": 4146, "token_acc": 0.9359899592092877 }, { "epoch": 0.8748945147679325, "grad_norm": 1.1015625, "learning_rate": 6.152835608644689e-07, "loss": 0.251006156206131, "step": 4147, "token_acc": 0.9290629962943356 }, { "epoch": 0.8751054852320675, "grad_norm": 0.76953125, "learning_rate": 6.151190253785737e-07, "loss": 0.2572343349456787, "step": 4148, "token_acc": 0.926514943789416 }, { "epoch": 0.8753164556962025, "grad_norm": 0.7109375, "learning_rate": 6.149544767278433e-07, "loss": 0.23926880955696106, "step": 4149, "token_acc": 0.9337175792507204 }, { "epoch": 0.8755274261603375, "grad_norm": 0.6796875, "learning_rate": 6.147899149310956e-07, "loss": 0.28901904821395874, "step": 4150, "token_acc": 0.9175174637291779 }, { "epoch": 0.8757383966244726, "grad_norm": 0.7421875, "learning_rate": 6.146253400071496e-07, "loss": 0.23873598873615265, "step": 4151, "token_acc": 0.9260677466863034 }, { "epoch": 0.8759493670886076, "grad_norm": 0.6875, "learning_rate": 6.144607519748257e-07, "loss": 0.2708508372306824, "step": 4152, "token_acc": 0.9204545454545454 }, { "epoch": 0.8761603375527426, "grad_norm": 0.7734375, "learning_rate": 6.142961508529463e-07, "loss": 0.2670045495033264, "step": 4153, "token_acc": 0.9204577968526466 }, { "epoch": 0.8763713080168777, "grad_norm": 0.625, "learning_rate": 6.141315366603343e-07, "loss": 0.2225058674812317, "step": 4154, "token_acc": 0.9336358479863869 }, { "epoch": 0.8765822784810127, "grad_norm": 1.15625, "learning_rate": 6.139669094158154e-07, "loss": 0.28319936990737915, "step": 4155, "token_acc": 0.9232848935451166 }, { "epoch": 0.8767932489451477, "grad_norm": 0.62109375, "learning_rate": 6.138022691382156e-07, "loss": 0.22440600395202637, "step": 4156, "token_acc": 0.9360156468287231 }, { "epoch": 0.8770042194092827, "grad_norm": 0.7265625, "learning_rate": 6.136376158463632e-07, "loss": 0.2605544328689575, "step": 4157, "token_acc": 0.9335807050092765 }, { "epoch": 0.8772151898734177, "grad_norm": 0.71875, "learning_rate": 6.134729495590874e-07, "loss": 0.24485601484775543, "step": 4158, "token_acc": 0.9389752446747266 }, { "epoch": 0.8774261603375527, "grad_norm": 0.703125, "learning_rate": 6.133082702952196e-07, "loss": 0.24484291672706604, "step": 4159, "token_acc": 0.9349706949483673 }, { "epoch": 0.8776371308016878, "grad_norm": 0.65234375, "learning_rate": 6.13143578073592e-07, "loss": 0.29455679655075073, "step": 4160, "token_acc": 0.9226074895977808 }, { "epoch": 0.8778481012658228, "grad_norm": 0.66015625, "learning_rate": 6.129788729130386e-07, "loss": 0.25719529390335083, "step": 4161, "token_acc": 0.9313212195873114 }, { "epoch": 0.8780590717299578, "grad_norm": 0.65625, "learning_rate": 6.128141548323949e-07, "loss": 0.2703285813331604, "step": 4162, "token_acc": 0.9200779727095516 }, { "epoch": 0.8782700421940929, "grad_norm": 0.62890625, "learning_rate": 6.126494238504975e-07, "loss": 0.2466617375612259, "step": 4163, "token_acc": 0.9294532627865961 }, { "epoch": 0.8784810126582279, "grad_norm": 0.6875, "learning_rate": 6.124846799861854e-07, "loss": 0.26350271701812744, "step": 4164, "token_acc": 0.927405247813411 }, { "epoch": 0.8786919831223629, "grad_norm": 0.65625, "learning_rate": 6.123199232582979e-07, "loss": 0.27094197273254395, "step": 4165, "token_acc": 0.9232914923291492 }, { "epoch": 0.8789029535864978, "grad_norm": 0.68359375, "learning_rate": 6.121551536856764e-07, "loss": 0.24841900169849396, "step": 4166, "token_acc": 0.926261319534282 }, { "epoch": 0.8791139240506329, "grad_norm": 0.71484375, "learning_rate": 6.11990371287164e-07, "loss": 0.2515864372253418, "step": 4167, "token_acc": 0.926829268292683 }, { "epoch": 0.8793248945147679, "grad_norm": 0.6875, "learning_rate": 6.118255760816047e-07, "loss": 0.27919894456863403, "step": 4168, "token_acc": 0.9240174672489083 }, { "epoch": 0.8795358649789029, "grad_norm": 0.70703125, "learning_rate": 6.116607680878442e-07, "loss": 0.30026260018348694, "step": 4169, "token_acc": 0.9152833428734974 }, { "epoch": 0.879746835443038, "grad_norm": 0.5859375, "learning_rate": 6.1149594732473e-07, "loss": 0.23216193914413452, "step": 4170, "token_acc": 0.9358869872317305 }, { "epoch": 0.879957805907173, "grad_norm": 0.984375, "learning_rate": 6.113311138111101e-07, "loss": 0.2964291572570801, "step": 4171, "token_acc": 0.9222126188418324 }, { "epoch": 0.880168776371308, "grad_norm": 0.7109375, "learning_rate": 6.111662675658355e-07, "loss": 0.29114317893981934, "step": 4172, "token_acc": 0.917890320647288 }, { "epoch": 0.8803797468354431, "grad_norm": 0.79296875, "learning_rate": 6.11001408607757e-07, "loss": 0.27105581760406494, "step": 4173, "token_acc": 0.9239344262295082 }, { "epoch": 0.880590717299578, "grad_norm": 0.8671875, "learning_rate": 6.108365369557281e-07, "loss": 0.3158109188079834, "step": 4174, "token_acc": 0.9118942731277533 }, { "epoch": 0.880801687763713, "grad_norm": 0.70703125, "learning_rate": 6.10671652628603e-07, "loss": 0.2518154978752136, "step": 4175, "token_acc": 0.9320155735250075 }, { "epoch": 0.8810126582278481, "grad_norm": 0.78125, "learning_rate": 6.105067556452377e-07, "loss": 0.302077978849411, "step": 4176, "token_acc": 0.9133034379671151 }, { "epoch": 0.8812236286919831, "grad_norm": 0.671875, "learning_rate": 6.103418460244897e-07, "loss": 0.20054301619529724, "step": 4177, "token_acc": 0.9361442102047052 }, { "epoch": 0.8814345991561181, "grad_norm": 0.8046875, "learning_rate": 6.101769237852176e-07, "loss": 0.258273720741272, "step": 4178, "token_acc": 0.9312004530011325 }, { "epoch": 0.8816455696202532, "grad_norm": 0.8125, "learning_rate": 6.100119889462816e-07, "loss": 0.2696981132030487, "step": 4179, "token_acc": 0.9246376811594202 }, { "epoch": 0.8818565400843882, "grad_norm": 0.765625, "learning_rate": 6.098470415265437e-07, "loss": 0.26163631677627563, "step": 4180, "token_acc": 0.9287454323995128 }, { "epoch": 0.8820675105485232, "grad_norm": 0.6796875, "learning_rate": 6.096820815448666e-07, "loss": 0.23252522945404053, "step": 4181, "token_acc": 0.9277423099245502 }, { "epoch": 0.8822784810126583, "grad_norm": 0.75, "learning_rate": 6.095171090201155e-07, "loss": 0.2892453074455261, "step": 4182, "token_acc": 0.9190966266437964 }, { "epoch": 0.8824894514767933, "grad_norm": 0.86328125, "learning_rate": 6.093521239711558e-07, "loss": 0.29943525791168213, "step": 4183, "token_acc": 0.9188945629318114 }, { "epoch": 0.8827004219409282, "grad_norm": 0.59765625, "learning_rate": 6.091871264168553e-07, "loss": 0.2400483638048172, "step": 4184, "token_acc": 0.93239901071723 }, { "epoch": 0.8829113924050633, "grad_norm": 0.61328125, "learning_rate": 6.090221163760827e-07, "loss": 0.24968990683555603, "step": 4185, "token_acc": 0.9295023696682464 }, { "epoch": 0.8831223628691983, "grad_norm": 0.78125, "learning_rate": 6.088570938677084e-07, "loss": 0.2953101098537445, "step": 4186, "token_acc": 0.9210682492581602 }, { "epoch": 0.8833333333333333, "grad_norm": 0.66015625, "learning_rate": 6.086920589106038e-07, "loss": 0.2590201199054718, "step": 4187, "token_acc": 0.9268945022288262 }, { "epoch": 0.8835443037974684, "grad_norm": 0.984375, "learning_rate": 6.085270115236425e-07, "loss": 0.3109322190284729, "step": 4188, "token_acc": 0.9183551847437426 }, { "epoch": 0.8837552742616034, "grad_norm": 0.7109375, "learning_rate": 6.083619517256989e-07, "loss": 0.2562364935874939, "step": 4189, "token_acc": 0.9321928460342146 }, { "epoch": 0.8839662447257384, "grad_norm": 0.6875, "learning_rate": 6.081968795356489e-07, "loss": 0.2528485655784607, "step": 4190, "token_acc": 0.9289060347203086 }, { "epoch": 0.8841772151898735, "grad_norm": 0.66796875, "learning_rate": 6.080317949723699e-07, "loss": 0.21761058270931244, "step": 4191, "token_acc": 0.9312645011600929 }, { "epoch": 0.8843881856540085, "grad_norm": 0.79296875, "learning_rate": 6.078666980547409e-07, "loss": 0.266113817691803, "step": 4192, "token_acc": 0.9250795487416835 }, { "epoch": 0.8845991561181434, "grad_norm": 0.57421875, "learning_rate": 6.077015888016419e-07, "loss": 0.23291605710983276, "step": 4193, "token_acc": 0.9373501199040767 }, { "epoch": 0.8848101265822785, "grad_norm": 0.63671875, "learning_rate": 6.075364672319547e-07, "loss": 0.23121441900730133, "step": 4194, "token_acc": 0.934440818748146 }, { "epoch": 0.8850210970464135, "grad_norm": 0.80859375, "learning_rate": 6.073713333645622e-07, "loss": 0.2666401267051697, "step": 4195, "token_acc": 0.9267554479418886 }, { "epoch": 0.8852320675105485, "grad_norm": 0.796875, "learning_rate": 6.072061872183492e-07, "loss": 0.29241669178009033, "step": 4196, "token_acc": 0.9234354194407457 }, { "epoch": 0.8854430379746835, "grad_norm": 0.62109375, "learning_rate": 6.07041028812201e-07, "loss": 0.26576101779937744, "step": 4197, "token_acc": 0.926395173453997 }, { "epoch": 0.8856540084388186, "grad_norm": 0.6796875, "learning_rate": 6.068758581650054e-07, "loss": 0.30285555124282837, "step": 4198, "token_acc": 0.9184645558980107 }, { "epoch": 0.8858649789029536, "grad_norm": 0.7578125, "learning_rate": 6.067106752956505e-07, "loss": 0.23448467254638672, "step": 4199, "token_acc": 0.934085510688836 }, { "epoch": 0.8860759493670886, "grad_norm": 0.65234375, "learning_rate": 6.06545480223027e-07, "loss": 0.200068399310112, "step": 4200, "token_acc": 0.9435604923446412 }, { "epoch": 0.8860759493670886, "eval_loss": 0.43362924456596375, "eval_runtime": 245.8722, "eval_samples_per_second": 137.083, "eval_steps_per_second": 2.143, "eval_token_acc": 0.8990947231445751, "step": 4200 }, { "epoch": 0.8862869198312237, "grad_norm": 0.546875, "learning_rate": 6.06380272966026e-07, "loss": 0.21078279614448547, "step": 4201, "token_acc": 0.9344827586206896 }, { "epoch": 0.8864978902953586, "grad_norm": 0.7578125, "learning_rate": 6.062150535435403e-07, "loss": 0.2715229392051697, "step": 4202, "token_acc": 0.92718313714755 }, { "epoch": 0.8867088607594936, "grad_norm": 0.81640625, "learning_rate": 6.060498219744641e-07, "loss": 0.2983436584472656, "step": 4203, "token_acc": 0.9193199381761978 }, { "epoch": 0.8869198312236287, "grad_norm": 0.68359375, "learning_rate": 6.058845782776935e-07, "loss": 0.23465782403945923, "step": 4204, "token_acc": 0.9343685906826839 }, { "epoch": 0.8871308016877637, "grad_norm": 0.68359375, "learning_rate": 6.057193224721249e-07, "loss": 0.2636641561985016, "step": 4205, "token_acc": 0.9289781240635301 }, { "epoch": 0.8873417721518987, "grad_norm": 0.75, "learning_rate": 6.055540545766571e-07, "loss": 0.2622556686401367, "step": 4206, "token_acc": 0.9293568810396534 }, { "epoch": 0.8875527426160338, "grad_norm": 0.7421875, "learning_rate": 6.053887746101897e-07, "loss": 0.24544410407543182, "step": 4207, "token_acc": 0.9265861027190332 }, { "epoch": 0.8877637130801688, "grad_norm": 0.72265625, "learning_rate": 6.052234825916239e-07, "loss": 0.2868709862232208, "step": 4208, "token_acc": 0.9217668972857903 }, { "epoch": 0.8879746835443038, "grad_norm": 1.0546875, "learning_rate": 6.050581785398624e-07, "loss": 0.32044923305511475, "step": 4209, "token_acc": 0.914238592633315 }, { "epoch": 0.8881856540084389, "grad_norm": 0.75390625, "learning_rate": 6.048928624738089e-07, "loss": 0.2640608251094818, "step": 4210, "token_acc": 0.9259809750297265 }, { "epoch": 0.8883966244725738, "grad_norm": 0.9296875, "learning_rate": 6.047275344123687e-07, "loss": 0.2653833031654358, "step": 4211, "token_acc": 0.9246769618657422 }, { "epoch": 0.8886075949367088, "grad_norm": 0.7421875, "learning_rate": 6.045621943744486e-07, "loss": 0.28444695472717285, "step": 4212, "token_acc": 0.9214157168566287 }, { "epoch": 0.8888185654008439, "grad_norm": 0.78125, "learning_rate": 6.043968423789566e-07, "loss": 0.26767757534980774, "step": 4213, "token_acc": 0.9318742293464858 }, { "epoch": 0.8890295358649789, "grad_norm": 1.671875, "learning_rate": 6.042314784448019e-07, "loss": 0.28254497051239014, "step": 4214, "token_acc": 0.9181126331811263 }, { "epoch": 0.8892405063291139, "grad_norm": 0.640625, "learning_rate": 6.040661025908955e-07, "loss": 0.24974443018436432, "step": 4215, "token_acc": 0.9303778637310325 }, { "epoch": 0.889451476793249, "grad_norm": 1.1015625, "learning_rate": 6.039007148361497e-07, "loss": 0.23202915489673615, "step": 4216, "token_acc": 0.9343167269691066 }, { "epoch": 0.889662447257384, "grad_norm": 0.62890625, "learning_rate": 6.037353151994776e-07, "loss": 0.2691342532634735, "step": 4217, "token_acc": 0.9249177385581813 }, { "epoch": 0.889873417721519, "grad_norm": 0.76171875, "learning_rate": 6.035699036997941e-07, "loss": 0.24880748987197876, "step": 4218, "token_acc": 0.9292565947242206 }, { "epoch": 0.890084388185654, "grad_norm": 0.63671875, "learning_rate": 6.034044803560154e-07, "loss": 0.22268301248550415, "step": 4219, "token_acc": 0.9379027533684827 }, { "epoch": 0.890295358649789, "grad_norm": 1.0703125, "learning_rate": 6.032390451870594e-07, "loss": 0.25534525513648987, "step": 4220, "token_acc": 0.9259986902423052 }, { "epoch": 0.890506329113924, "grad_norm": 0.765625, "learning_rate": 6.030735982118448e-07, "loss": 0.26995018124580383, "step": 4221, "token_acc": 0.9282994923857868 }, { "epoch": 0.8907172995780591, "grad_norm": 0.93359375, "learning_rate": 6.029081394492918e-07, "loss": 0.3268905580043793, "step": 4222, "token_acc": 0.9139150943396226 }, { "epoch": 0.8909282700421941, "grad_norm": 0.6484375, "learning_rate": 6.027426689183219e-07, "loss": 0.2795318067073822, "step": 4223, "token_acc": 0.9274217149546385 }, { "epoch": 0.8911392405063291, "grad_norm": 0.72265625, "learning_rate": 6.025771866378583e-07, "loss": 0.237565279006958, "step": 4224, "token_acc": 0.9287935323383084 }, { "epoch": 0.8913502109704642, "grad_norm": 1.046875, "learning_rate": 6.024116926268251e-07, "loss": 0.2869594693183899, "step": 4225, "token_acc": 0.9152490886998785 }, { "epoch": 0.8915611814345992, "grad_norm": 0.6484375, "learning_rate": 6.022461869041482e-07, "loss": 0.25012633204460144, "step": 4226, "token_acc": 0.9271349460798601 }, { "epoch": 0.8917721518987342, "grad_norm": 1.046875, "learning_rate": 6.020806694887542e-07, "loss": 0.2637191712856293, "step": 4227, "token_acc": 0.925160163075131 }, { "epoch": 0.8919831223628693, "grad_norm": 0.91015625, "learning_rate": 6.019151403995719e-07, "loss": 0.3079901933670044, "step": 4228, "token_acc": 0.921990171990172 }, { "epoch": 0.8921940928270042, "grad_norm": 0.64453125, "learning_rate": 6.017495996555306e-07, "loss": 0.23182697594165802, "step": 4229, "token_acc": 0.9302525832376579 }, { "epoch": 0.8924050632911392, "grad_norm": 0.91015625, "learning_rate": 6.015840472755613e-07, "loss": 0.25371140241622925, "step": 4230, "token_acc": 0.9307820299500832 }, { "epoch": 0.8926160337552742, "grad_norm": 0.62109375, "learning_rate": 6.014184832785964e-07, "loss": 0.23902302980422974, "step": 4231, "token_acc": 0.9287733182589033 }, { "epoch": 0.8928270042194093, "grad_norm": 0.640625, "learning_rate": 6.012529076835697e-07, "loss": 0.23583605885505676, "step": 4232, "token_acc": 0.93906588308073 }, { "epoch": 0.8930379746835443, "grad_norm": 0.64453125, "learning_rate": 6.01087320509416e-07, "loss": 0.26277485489845276, "step": 4233, "token_acc": 0.9222683264177041 }, { "epoch": 0.8932489451476793, "grad_norm": 0.703125, "learning_rate": 6.009217217750714e-07, "loss": 0.2474956512451172, "step": 4234, "token_acc": 0.9318518518518518 }, { "epoch": 0.8934599156118144, "grad_norm": 1.0703125, "learning_rate": 6.007561114994739e-07, "loss": 0.26536452770233154, "step": 4235, "token_acc": 0.9293462327763119 }, { "epoch": 0.8936708860759494, "grad_norm": 0.6875, "learning_rate": 6.005904897015622e-07, "loss": 0.28018704056739807, "step": 4236, "token_acc": 0.9228001308472359 }, { "epoch": 0.8938818565400843, "grad_norm": 0.76953125, "learning_rate": 6.004248564002765e-07, "loss": 0.22079084813594818, "step": 4237, "token_acc": 0.9371713257680598 }, { "epoch": 0.8940928270042194, "grad_norm": 0.765625, "learning_rate": 6.002592116145587e-07, "loss": 0.27228283882141113, "step": 4238, "token_acc": 0.9252103459021502 }, { "epoch": 0.8943037974683544, "grad_norm": 0.7578125, "learning_rate": 6.000935553633513e-07, "loss": 0.24015696346759796, "step": 4239, "token_acc": 0.9338327091136079 }, { "epoch": 0.8945147679324894, "grad_norm": 0.69140625, "learning_rate": 5.999278876655986e-07, "loss": 0.2547394037246704, "step": 4240, "token_acc": 0.9251944943147815 }, { "epoch": 0.8947257383966245, "grad_norm": 0.6328125, "learning_rate": 5.997622085402464e-07, "loss": 0.26425132155418396, "step": 4241, "token_acc": 0.927822701407607 }, { "epoch": 0.8949367088607595, "grad_norm": 11.875, "learning_rate": 5.99596518006241e-07, "loss": 0.27949339151382446, "step": 4242, "token_acc": 0.9208772903942254 }, { "epoch": 0.8951476793248945, "grad_norm": 0.6796875, "learning_rate": 5.99430816082531e-07, "loss": 0.22526085376739502, "step": 4243, "token_acc": 0.9352769679300291 }, { "epoch": 0.8953586497890296, "grad_norm": 0.84375, "learning_rate": 5.992651027880655e-07, "loss": 0.2722879648208618, "step": 4244, "token_acc": 0.9278131634819533 }, { "epoch": 0.8955696202531646, "grad_norm": 1.078125, "learning_rate": 5.990993781417954e-07, "loss": 0.2544936537742615, "step": 4245, "token_acc": 0.9300526152893841 }, { "epoch": 0.8957805907172995, "grad_norm": 0.76171875, "learning_rate": 5.989336421626725e-07, "loss": 0.23678705096244812, "step": 4246, "token_acc": 0.9278801123946301 }, { "epoch": 0.8959915611814346, "grad_norm": 0.734375, "learning_rate": 5.987678948696503e-07, "loss": 0.25578171014785767, "step": 4247, "token_acc": 0.9313725490196079 }, { "epoch": 0.8962025316455696, "grad_norm": 0.828125, "learning_rate": 5.986021362816836e-07, "loss": 0.28365084528923035, "step": 4248, "token_acc": 0.9187675070028011 }, { "epoch": 0.8964135021097046, "grad_norm": 0.71484375, "learning_rate": 5.984363664177278e-07, "loss": 0.25368574261665344, "step": 4249, "token_acc": 0.9259506337558372 }, { "epoch": 0.8966244725738397, "grad_norm": 0.76953125, "learning_rate": 5.982705852967404e-07, "loss": 0.2846065163612366, "step": 4250, "token_acc": 0.9225526641883519 }, { "epoch": 0.8968354430379747, "grad_norm": 0.828125, "learning_rate": 5.981047929376797e-07, "loss": 0.24003265798091888, "step": 4251, "token_acc": 0.9336574206462682 }, { "epoch": 0.8970464135021097, "grad_norm": 0.64453125, "learning_rate": 5.979389893595058e-07, "loss": 0.24135062098503113, "step": 4252, "token_acc": 0.9311967068509261 }, { "epoch": 0.8972573839662448, "grad_norm": 0.69921875, "learning_rate": 5.977731745811794e-07, "loss": 0.28301721811294556, "step": 4253, "token_acc": 0.9250374812593704 }, { "epoch": 0.8974683544303798, "grad_norm": 0.609375, "learning_rate": 5.976073486216629e-07, "loss": 0.22836647927761078, "step": 4254, "token_acc": 0.9341637010676157 }, { "epoch": 0.8976793248945147, "grad_norm": 0.83984375, "learning_rate": 5.9744151149992e-07, "loss": 0.269849568605423, "step": 4255, "token_acc": 0.9243566415388614 }, { "epoch": 0.8978902953586498, "grad_norm": 0.91796875, "learning_rate": 5.972756632349155e-07, "loss": 0.24670813977718353, "step": 4256, "token_acc": 0.9325254750757367 }, { "epoch": 0.8981012658227848, "grad_norm": 0.72265625, "learning_rate": 5.971098038456156e-07, "loss": 0.27588987350463867, "step": 4257, "token_acc": 0.9230769230769231 }, { "epoch": 0.8983122362869198, "grad_norm": 0.65625, "learning_rate": 5.969439333509876e-07, "loss": 0.24122093617916107, "step": 4258, "token_acc": 0.9377420315758117 }, { "epoch": 0.8985232067510549, "grad_norm": 0.796875, "learning_rate": 5.967780517700004e-07, "loss": 0.3517974019050598, "step": 4259, "token_acc": 0.9076151121605667 }, { "epoch": 0.8987341772151899, "grad_norm": 0.6875, "learning_rate": 5.96612159121624e-07, "loss": 0.23520660400390625, "step": 4260, "token_acc": 0.9336206896551724 }, { "epoch": 0.8989451476793249, "grad_norm": 0.7265625, "learning_rate": 5.964462554248293e-07, "loss": 0.24624879658222198, "step": 4261, "token_acc": 0.9268867924528302 }, { "epoch": 0.89915611814346, "grad_norm": 0.7265625, "learning_rate": 5.962803406985889e-07, "loss": 0.2517179846763611, "step": 4262, "token_acc": 0.9359830097087378 }, { "epoch": 0.899367088607595, "grad_norm": 0.7421875, "learning_rate": 5.961144149618767e-07, "loss": 0.2511524558067322, "step": 4263, "token_acc": 0.9274737125909949 }, { "epoch": 0.8995780590717299, "grad_norm": 0.63671875, "learning_rate": 5.959484782336677e-07, "loss": 0.2458001971244812, "step": 4264, "token_acc": 0.9324217722850381 }, { "epoch": 0.8997890295358649, "grad_norm": 0.55078125, "learning_rate": 5.957825305329381e-07, "loss": 0.25005120038986206, "step": 4265, "token_acc": 0.930932545014781 }, { "epoch": 0.9, "grad_norm": 0.6875, "learning_rate": 5.956165718786654e-07, "loss": 0.27909934520721436, "step": 4266, "token_acc": 0.9232015554115359 }, { "epoch": 0.900210970464135, "grad_norm": 0.578125, "learning_rate": 5.954506022898284e-07, "loss": 0.20023342967033386, "step": 4267, "token_acc": 0.9454933008526187 }, { "epoch": 0.90042194092827, "grad_norm": 0.703125, "learning_rate": 5.952846217854073e-07, "loss": 0.29464536905288696, "step": 4268, "token_acc": 0.9183216783216783 }, { "epoch": 0.9006329113924051, "grad_norm": 0.875, "learning_rate": 5.951186303843831e-07, "loss": 0.33223938941955566, "step": 4269, "token_acc": 0.9164596273291925 }, { "epoch": 0.9008438818565401, "grad_norm": 0.75, "learning_rate": 5.949526281057383e-07, "loss": 0.24782004952430725, "step": 4270, "token_acc": 0.9336206896551724 }, { "epoch": 0.9010548523206751, "grad_norm": 0.7109375, "learning_rate": 5.94786614968457e-07, "loss": 0.352505624294281, "step": 4271, "token_acc": 0.9169139465875371 }, { "epoch": 0.9012658227848102, "grad_norm": 0.59375, "learning_rate": 5.946205909915241e-07, "loss": 0.22753530740737915, "step": 4272, "token_acc": 0.9302587176602924 }, { "epoch": 0.9014767932489451, "grad_norm": 0.73046875, "learning_rate": 5.944545561939255e-07, "loss": 0.2651301622390747, "step": 4273, "token_acc": 0.9242381884260553 }, { "epoch": 0.9016877637130801, "grad_norm": 0.75390625, "learning_rate": 5.94288510594649e-07, "loss": 0.2688208818435669, "step": 4274, "token_acc": 0.9251575630252101 }, { "epoch": 0.9018987341772152, "grad_norm": 0.6953125, "learning_rate": 5.941224542126833e-07, "loss": 0.24615304172039032, "step": 4275, "token_acc": 0.9286157666045934 }, { "epoch": 0.9021097046413502, "grad_norm": 0.87109375, "learning_rate": 5.939563870670183e-07, "loss": 0.27068641781806946, "step": 4276, "token_acc": 0.9235854045478583 }, { "epoch": 0.9023206751054852, "grad_norm": 1.109375, "learning_rate": 5.937903091766452e-07, "loss": 0.21461528539657593, "step": 4277, "token_acc": 0.9345738295318127 }, { "epoch": 0.9025316455696203, "grad_norm": 0.734375, "learning_rate": 5.936242205605563e-07, "loss": 0.25337833166122437, "step": 4278, "token_acc": 0.9245041586692259 }, { "epoch": 0.9027426160337553, "grad_norm": 0.81640625, "learning_rate": 5.934581212377458e-07, "loss": 0.2462354600429535, "step": 4279, "token_acc": 0.9321766561514195 }, { "epoch": 0.9029535864978903, "grad_norm": 0.875, "learning_rate": 5.932920112272077e-07, "loss": 0.2497931867837906, "step": 4280, "token_acc": 0.9275851488186561 }, { "epoch": 0.9031645569620254, "grad_norm": 0.74609375, "learning_rate": 5.931258905479386e-07, "loss": 0.22000060975551605, "step": 4281, "token_acc": 0.9386559802712701 }, { "epoch": 0.9033755274261603, "grad_norm": 0.6484375, "learning_rate": 5.929597592189357e-07, "loss": 0.2436477392911911, "step": 4282, "token_acc": 0.9343106850859881 }, { "epoch": 0.9035864978902953, "grad_norm": 0.94140625, "learning_rate": 5.927936172591974e-07, "loss": 0.2814447283744812, "step": 4283, "token_acc": 0.9261970112839281 }, { "epoch": 0.9037974683544304, "grad_norm": 0.76953125, "learning_rate": 5.926274646877239e-07, "loss": 0.2620810866355896, "step": 4284, "token_acc": 0.9305732484076433 }, { "epoch": 0.9040084388185654, "grad_norm": 0.75, "learning_rate": 5.924613015235155e-07, "loss": 0.25361499190330505, "step": 4285, "token_acc": 0.9276871131119865 }, { "epoch": 0.9042194092827004, "grad_norm": 0.6328125, "learning_rate": 5.922951277855748e-07, "loss": 0.2799944579601288, "step": 4286, "token_acc": 0.9265978230533073 }, { "epoch": 0.9044303797468355, "grad_norm": 0.73046875, "learning_rate": 5.921289434929051e-07, "loss": 0.23529410362243652, "step": 4287, "token_acc": 0.9284247579935465 }, { "epoch": 0.9046413502109705, "grad_norm": 0.62109375, "learning_rate": 5.919627486645109e-07, "loss": 0.27917373180389404, "step": 4288, "token_acc": 0.9291873530180298 }, { "epoch": 0.9048523206751055, "grad_norm": 0.7421875, "learning_rate": 5.91796543319398e-07, "loss": 0.27981898188591003, "step": 4289, "token_acc": 0.923728813559322 }, { "epoch": 0.9050632911392406, "grad_norm": 0.703125, "learning_rate": 5.916303274765733e-07, "loss": 0.2564546465873718, "step": 4290, "token_acc": 0.9315031258494156 }, { "epoch": 0.9052742616033755, "grad_norm": 0.734375, "learning_rate": 5.914641011550454e-07, "loss": 0.2651863396167755, "step": 4291, "token_acc": 0.9274669665448412 }, { "epoch": 0.9054852320675105, "grad_norm": 0.73828125, "learning_rate": 5.912978643738232e-07, "loss": 0.23847214877605438, "step": 4292, "token_acc": 0.9309271935283137 }, { "epoch": 0.9056962025316456, "grad_norm": 0.76171875, "learning_rate": 5.911316171519175e-07, "loss": 0.2515287399291992, "step": 4293, "token_acc": 0.9318490361118653 }, { "epoch": 0.9059071729957806, "grad_norm": 0.9296875, "learning_rate": 5.9096535950834e-07, "loss": 0.25541579723358154, "step": 4294, "token_acc": 0.927124183006536 }, { "epoch": 0.9061181434599156, "grad_norm": 0.6796875, "learning_rate": 5.907990914621037e-07, "loss": 0.2656506299972534, "step": 4295, "token_acc": 0.9255287459040811 }, { "epoch": 0.9063291139240506, "grad_norm": 0.72265625, "learning_rate": 5.906328130322229e-07, "loss": 0.2446029633283615, "step": 4296, "token_acc": 0.9245230078563412 }, { "epoch": 0.9065400843881857, "grad_norm": 0.66015625, "learning_rate": 5.904665242377127e-07, "loss": 0.25557592511177063, "step": 4297, "token_acc": 0.931024531024531 }, { "epoch": 0.9067510548523207, "grad_norm": 0.6875, "learning_rate": 5.903002250975897e-07, "loss": 0.27644652128219604, "step": 4298, "token_acc": 0.9281907433380084 }, { "epoch": 0.9069620253164556, "grad_norm": 0.97265625, "learning_rate": 5.901339156308719e-07, "loss": 0.3008362650871277, "step": 4299, "token_acc": 0.9210349462365591 }, { "epoch": 0.9071729957805907, "grad_norm": 0.77734375, "learning_rate": 5.899675958565778e-07, "loss": 0.2380152940750122, "step": 4300, "token_acc": 0.9316217590238942 }, { "epoch": 0.9073839662447257, "grad_norm": 0.75390625, "learning_rate": 5.898012657937277e-07, "loss": 0.289250910282135, "step": 4301, "token_acc": 0.917357910906298 }, { "epoch": 0.9075949367088607, "grad_norm": 0.86328125, "learning_rate": 5.896349254613426e-07, "loss": 0.2536957859992981, "step": 4302, "token_acc": 0.93625 }, { "epoch": 0.9078059071729958, "grad_norm": 0.71484375, "learning_rate": 5.894685748784452e-07, "loss": 0.24898754060268402, "step": 4303, "token_acc": 0.9329758713136729 }, { "epoch": 0.9080168776371308, "grad_norm": 0.72265625, "learning_rate": 5.893022140640592e-07, "loss": 0.24103891849517822, "step": 4304, "token_acc": 0.9341899091193983 }, { "epoch": 0.9082278481012658, "grad_norm": 0.6875, "learning_rate": 5.89135843037209e-07, "loss": 0.2636047601699829, "step": 4305, "token_acc": 0.9236526946107785 }, { "epoch": 0.9084388185654009, "grad_norm": 0.79296875, "learning_rate": 5.889694618169208e-07, "loss": 0.2893732190132141, "step": 4306, "token_acc": 0.9223269418264544 }, { "epoch": 0.9086497890295359, "grad_norm": 0.6875, "learning_rate": 5.888030704222216e-07, "loss": 0.2498936951160431, "step": 4307, "token_acc": 0.934936786925686 }, { "epoch": 0.9088607594936708, "grad_norm": 0.69921875, "learning_rate": 5.886366688721396e-07, "loss": 0.2481892704963684, "step": 4308, "token_acc": 0.9331574318381706 }, { "epoch": 0.9090717299578059, "grad_norm": 0.73828125, "learning_rate": 5.884702571857042e-07, "loss": 0.29478487372398376, "step": 4309, "token_acc": 0.9194890077243019 }, { "epoch": 0.9092827004219409, "grad_norm": 1.7109375, "learning_rate": 5.883038353819462e-07, "loss": 0.31906357407569885, "step": 4310, "token_acc": 0.9152490886998785 }, { "epoch": 0.9094936708860759, "grad_norm": 0.5234375, "learning_rate": 5.881374034798974e-07, "loss": 0.2200336754322052, "step": 4311, "token_acc": 0.9378268448576409 }, { "epoch": 0.909704641350211, "grad_norm": 0.68359375, "learning_rate": 5.879709614985903e-07, "loss": 0.2540716826915741, "step": 4312, "token_acc": 0.9332084893882646 }, { "epoch": 0.909915611814346, "grad_norm": 0.64453125, "learning_rate": 5.878045094570591e-07, "loss": 0.25512492656707764, "step": 4313, "token_acc": 0.9351487928130264 }, { "epoch": 0.910126582278481, "grad_norm": 0.65625, "learning_rate": 5.876380473743393e-07, "loss": 0.1943981945514679, "step": 4314, "token_acc": 0.9372146118721462 }, { "epoch": 0.9103375527426161, "grad_norm": 0.73828125, "learning_rate": 5.874715752694669e-07, "loss": 0.22304001450538635, "step": 4315, "token_acc": 0.9341317365269461 }, { "epoch": 0.9105485232067511, "grad_norm": 0.73046875, "learning_rate": 5.873050931614795e-07, "loss": 0.24393633008003235, "step": 4316, "token_acc": 0.9307958477508651 }, { "epoch": 0.910759493670886, "grad_norm": 0.74609375, "learning_rate": 5.871386010694158e-07, "loss": 0.28218936920166016, "step": 4317, "token_acc": 0.9245441795231416 }, { "epoch": 0.9109704641350211, "grad_norm": 0.67578125, "learning_rate": 5.869720990123156e-07, "loss": 0.2685447931289673, "step": 4318, "token_acc": 0.9258045292014303 }, { "epoch": 0.9111814345991561, "grad_norm": 0.75390625, "learning_rate": 5.868055870092197e-07, "loss": 0.3118164539337158, "step": 4319, "token_acc": 0.9167918293781917 }, { "epoch": 0.9113924050632911, "grad_norm": 0.63671875, "learning_rate": 5.866390650791701e-07, "loss": 0.2651423513889313, "step": 4320, "token_acc": 0.926305353602115 }, { "epoch": 0.9116033755274262, "grad_norm": 0.69921875, "learning_rate": 5.864725332412101e-07, "loss": 0.2547246813774109, "step": 4321, "token_acc": 0.9271873165002936 }, { "epoch": 0.9118143459915612, "grad_norm": 0.6328125, "learning_rate": 5.863059915143842e-07, "loss": 0.2535017728805542, "step": 4322, "token_acc": 0.9315707620528771 }, { "epoch": 0.9120253164556962, "grad_norm": 0.8125, "learning_rate": 5.861394399177377e-07, "loss": 0.27815091609954834, "step": 4323, "token_acc": 0.926205694363742 }, { "epoch": 0.9122362869198313, "grad_norm": 0.80859375, "learning_rate": 5.859728784703169e-07, "loss": 0.31020891666412354, "step": 4324, "token_acc": 0.9144025708442887 }, { "epoch": 0.9124472573839663, "grad_norm": 0.859375, "learning_rate": 5.8580630719117e-07, "loss": 0.2836688756942749, "step": 4325, "token_acc": 0.9201741654571843 }, { "epoch": 0.9126582278481012, "grad_norm": 0.69140625, "learning_rate": 5.856397260993455e-07, "loss": 0.25703415274620056, "step": 4326, "token_acc": 0.925575101488498 }, { "epoch": 0.9128691983122363, "grad_norm": 0.74609375, "learning_rate": 5.854731352138935e-07, "loss": 0.27324724197387695, "step": 4327, "token_acc": 0.9280742459396751 }, { "epoch": 0.9130801687763713, "grad_norm": 0.875, "learning_rate": 5.853065345538652e-07, "loss": 0.27953433990478516, "step": 4328, "token_acc": 0.9243526331102706 }, { "epoch": 0.9132911392405063, "grad_norm": 0.69140625, "learning_rate": 5.851399241383123e-07, "loss": 0.2497098445892334, "step": 4329, "token_acc": 0.9404161412358134 }, { "epoch": 0.9135021097046413, "grad_norm": 0.77734375, "learning_rate": 5.849733039862888e-07, "loss": 0.22470033168792725, "step": 4330, "token_acc": 0.9348112958564265 }, { "epoch": 0.9137130801687764, "grad_norm": 0.91796875, "learning_rate": 5.848066741168487e-07, "loss": 0.2677520513534546, "step": 4331, "token_acc": 0.9259146341463415 }, { "epoch": 0.9139240506329114, "grad_norm": 0.65234375, "learning_rate": 5.846400345490475e-07, "loss": 0.24836644530296326, "step": 4332, "token_acc": 0.9265281541704306 }, { "epoch": 0.9141350210970464, "grad_norm": 0.73828125, "learning_rate": 5.844733853019421e-07, "loss": 0.26700273156166077, "step": 4333, "token_acc": 0.928140270192584 }, { "epoch": 0.9143459915611815, "grad_norm": 0.671875, "learning_rate": 5.8430672639459e-07, "loss": 0.2631256878376007, "step": 4334, "token_acc": 0.9230080572963295 }, { "epoch": 0.9145569620253164, "grad_norm": 0.8671875, "learning_rate": 5.841400578460504e-07, "loss": 0.22541800141334534, "step": 4335, "token_acc": 0.933947284852334 }, { "epoch": 0.9147679324894514, "grad_norm": 0.7265625, "learning_rate": 5.839733796753827e-07, "loss": 0.2818663716316223, "step": 4336, "token_acc": 0.925125313283208 }, { "epoch": 0.9149789029535865, "grad_norm": 0.84375, "learning_rate": 5.838066919016483e-07, "loss": 0.2500406801700592, "step": 4337, "token_acc": 0.9361179361179361 }, { "epoch": 0.9151898734177215, "grad_norm": 0.57421875, "learning_rate": 5.836399945439096e-07, "loss": 0.24172435700893402, "step": 4338, "token_acc": 0.9317180616740088 }, { "epoch": 0.9154008438818565, "grad_norm": 0.6484375, "learning_rate": 5.834732876212295e-07, "loss": 0.29422229528427124, "step": 4339, "token_acc": 0.9290861919861632 }, { "epoch": 0.9156118143459916, "grad_norm": 0.734375, "learning_rate": 5.833065711526725e-07, "loss": 0.23004946112632751, "step": 4340, "token_acc": 0.9309275444037215 }, { "epoch": 0.9158227848101266, "grad_norm": 0.6796875, "learning_rate": 5.831398451573039e-07, "loss": 0.29581427574157715, "step": 4341, "token_acc": 0.9195910472506217 }, { "epoch": 0.9160337552742616, "grad_norm": 1.0390625, "learning_rate": 5.829731096541903e-07, "loss": 0.3001316487789154, "step": 4342, "token_acc": 0.9158444573418456 }, { "epoch": 0.9162447257383967, "grad_norm": 1.109375, "learning_rate": 5.828063646623994e-07, "loss": 0.3060131072998047, "step": 4343, "token_acc": 0.9223837209302326 }, { "epoch": 0.9164556962025316, "grad_norm": 0.8984375, "learning_rate": 5.826396102009998e-07, "loss": 0.3282131552696228, "step": 4344, "token_acc": 0.9138251704897706 }, { "epoch": 0.9166666666666666, "grad_norm": 1.1640625, "learning_rate": 5.824728462890613e-07, "loss": 0.21289582550525665, "step": 4345, "token_acc": 0.9385194479297365 }, { "epoch": 0.9168776371308017, "grad_norm": 0.86328125, "learning_rate": 5.823060729456548e-07, "loss": 0.2250448316335678, "step": 4346, "token_acc": 0.9359781121751026 }, { "epoch": 0.9170886075949367, "grad_norm": 0.74609375, "learning_rate": 5.821392901898523e-07, "loss": 0.2401316612958908, "step": 4347, "token_acc": 0.9354749376903905 }, { "epoch": 0.9172995780590717, "grad_norm": 0.62890625, "learning_rate": 5.819724980407266e-07, "loss": 0.22959178686141968, "step": 4348, "token_acc": 0.9421694279462332 }, { "epoch": 0.9175105485232068, "grad_norm": 0.59375, "learning_rate": 5.818056965173519e-07, "loss": 0.19914600253105164, "step": 4349, "token_acc": 0.9420582300374748 }, { "epoch": 0.9177215189873418, "grad_norm": 0.7265625, "learning_rate": 5.816388856388037e-07, "loss": 0.28049570322036743, "step": 4350, "token_acc": 0.9260785930200605 }, { "epoch": 0.9179324894514768, "grad_norm": 0.6328125, "learning_rate": 5.814720654241577e-07, "loss": 0.22338229417800903, "step": 4351, "token_acc": 0.9355594610427651 }, { "epoch": 0.9181434599156119, "grad_norm": 0.65625, "learning_rate": 5.813052358924915e-07, "loss": 0.2036553919315338, "step": 4352, "token_acc": 0.9396495781959766 }, { "epoch": 0.9183544303797468, "grad_norm": 0.74609375, "learning_rate": 5.811383970628834e-07, "loss": 0.26292625069618225, "step": 4353, "token_acc": 0.926591052299937 }, { "epoch": 0.9185654008438818, "grad_norm": 0.70703125, "learning_rate": 5.809715489544128e-07, "loss": 0.2776236832141876, "step": 4354, "token_acc": 0.9202192448233861 }, { "epoch": 0.9187763713080169, "grad_norm": 0.76953125, "learning_rate": 5.808046915861604e-07, "loss": 0.2814843952655792, "step": 4355, "token_acc": 0.9247928616953474 }, { "epoch": 0.9189873417721519, "grad_norm": 0.828125, "learning_rate": 5.806378249772075e-07, "loss": 0.33691608905792236, "step": 4356, "token_acc": 0.9069436539556062 }, { "epoch": 0.9191983122362869, "grad_norm": 0.73046875, "learning_rate": 5.804709491466367e-07, "loss": 0.3071545362472534, "step": 4357, "token_acc": 0.9176769100648435 }, { "epoch": 0.919409282700422, "grad_norm": 0.75, "learning_rate": 5.80304064113532e-07, "loss": 0.27650636434555054, "step": 4358, "token_acc": 0.931191270081843 }, { "epoch": 0.919620253164557, "grad_norm": 0.69921875, "learning_rate": 5.801371698969777e-07, "loss": 0.2669373154640198, "step": 4359, "token_acc": 0.9276410998552822 }, { "epoch": 0.919831223628692, "grad_norm": 0.76953125, "learning_rate": 5.799702665160598e-07, "loss": 0.2686125636100769, "step": 4360, "token_acc": 0.9245231607629428 }, { "epoch": 0.9200421940928271, "grad_norm": 0.6875, "learning_rate": 5.798033539898649e-07, "loss": 0.28443431854248047, "step": 4361, "token_acc": 0.9253056884635832 }, { "epoch": 0.920253164556962, "grad_norm": 0.70703125, "learning_rate": 5.796364323374813e-07, "loss": 0.29887712001800537, "step": 4362, "token_acc": 0.9190462172505152 }, { "epoch": 0.920464135021097, "grad_norm": 0.8046875, "learning_rate": 5.794695015779974e-07, "loss": 0.29330581426620483, "step": 4363, "token_acc": 0.9191044776119403 }, { "epoch": 0.920675105485232, "grad_norm": 0.9375, "learning_rate": 5.793025617305035e-07, "loss": 0.2725869417190552, "step": 4364, "token_acc": 0.9233501035809412 }, { "epoch": 0.9208860759493671, "grad_norm": 0.81640625, "learning_rate": 5.791356128140904e-07, "loss": 0.24950778484344482, "step": 4365, "token_acc": 0.9295774647887324 }, { "epoch": 0.9210970464135021, "grad_norm": 0.94921875, "learning_rate": 5.789686548478502e-07, "loss": 0.32874253392219543, "step": 4366, "token_acc": 0.9105998835177636 }, { "epoch": 0.9213080168776371, "grad_norm": 0.7109375, "learning_rate": 5.78801687850876e-07, "loss": 0.26994308829307556, "step": 4367, "token_acc": 0.9221253865617093 }, { "epoch": 0.9215189873417722, "grad_norm": 0.71484375, "learning_rate": 5.786347118422617e-07, "loss": 0.2387690246105194, "step": 4368, "token_acc": 0.9312796208530806 }, { "epoch": 0.9217299578059072, "grad_norm": 0.9453125, "learning_rate": 5.784677268411026e-07, "loss": 0.21991507709026337, "step": 4369, "token_acc": 0.9399875621890548 }, { "epoch": 0.9219409282700421, "grad_norm": 0.5859375, "learning_rate": 5.783007328664948e-07, "loss": 0.24474632740020752, "step": 4370, "token_acc": 0.9295274515159793 }, { "epoch": 0.9221518987341772, "grad_norm": 0.8359375, "learning_rate": 5.781337299375354e-07, "loss": 0.30890393257141113, "step": 4371, "token_acc": 0.9124959638359703 }, { "epoch": 0.9223628691983122, "grad_norm": 0.70703125, "learning_rate": 5.779667180733228e-07, "loss": 0.2354089468717575, "step": 4372, "token_acc": 0.9342227738132175 }, { "epoch": 0.9225738396624472, "grad_norm": 0.72265625, "learning_rate": 5.777996972929558e-07, "loss": 0.2502990961074829, "step": 4373, "token_acc": 0.9255172413793104 }, { "epoch": 0.9227848101265823, "grad_norm": 0.640625, "learning_rate": 5.776326676155354e-07, "loss": 0.2764410972595215, "step": 4374, "token_acc": 0.932156982050502 }, { "epoch": 0.9229957805907173, "grad_norm": 0.7109375, "learning_rate": 5.774656290601619e-07, "loss": 0.2341441810131073, "step": 4375, "token_acc": 0.937481826112242 }, { "epoch": 0.9232067510548523, "grad_norm": 0.8671875, "learning_rate": 5.772985816459383e-07, "loss": 0.26842278242111206, "step": 4376, "token_acc": 0.9248611564848089 }, { "epoch": 0.9234177215189874, "grad_norm": 0.9140625, "learning_rate": 5.771315253919675e-07, "loss": 0.2973787188529968, "step": 4377, "token_acc": 0.9202143495087823 }, { "epoch": 0.9236286919831224, "grad_norm": 1.203125, "learning_rate": 5.769644603173541e-07, "loss": 0.25652506947517395, "step": 4378, "token_acc": 0.9253650621957815 }, { "epoch": 0.9238396624472573, "grad_norm": 0.6953125, "learning_rate": 5.767973864412032e-07, "loss": 0.28217506408691406, "step": 4379, "token_acc": 0.9253685503685504 }, { "epoch": 0.9240506329113924, "grad_norm": 0.6875, "learning_rate": 5.76630303782621e-07, "loss": 0.23677222430706024, "step": 4380, "token_acc": 0.9367588932806324 }, { "epoch": 0.9242616033755274, "grad_norm": 0.75390625, "learning_rate": 5.764632123607152e-07, "loss": 0.28718435764312744, "step": 4381, "token_acc": 0.9202111145606955 }, { "epoch": 0.9244725738396624, "grad_norm": 0.6640625, "learning_rate": 5.762961121945937e-07, "loss": 0.21630658209323883, "step": 4382, "token_acc": 0.9423316708229427 }, { "epoch": 0.9246835443037975, "grad_norm": 0.88671875, "learning_rate": 5.761290033033661e-07, "loss": 0.27299344539642334, "step": 4383, "token_acc": 0.923572228443449 }, { "epoch": 0.9248945147679325, "grad_norm": 1.1953125, "learning_rate": 5.759618857061426e-07, "loss": 0.2751486897468567, "step": 4384, "token_acc": 0.9229675952245594 }, { "epoch": 0.9251054852320675, "grad_norm": 0.74609375, "learning_rate": 5.757947594220345e-07, "loss": 0.23068107664585114, "step": 4385, "token_acc": 0.9298566636169564 }, { "epoch": 0.9253164556962026, "grad_norm": 0.78125, "learning_rate": 5.756276244701543e-07, "loss": 0.31037017703056335, "step": 4386, "token_acc": 0.9201399009035267 }, { "epoch": 0.9255274261603376, "grad_norm": 0.859375, "learning_rate": 5.75460480869615e-07, "loss": 0.2433011382818222, "step": 4387, "token_acc": 0.9303699098539011 }, { "epoch": 0.9257383966244725, "grad_norm": 0.65234375, "learning_rate": 5.752933286395308e-07, "loss": 0.259957492351532, "step": 4388, "token_acc": 0.923989054423837 }, { "epoch": 0.9259493670886076, "grad_norm": 0.65234375, "learning_rate": 5.751261677990176e-07, "loss": 0.2294989824295044, "step": 4389, "token_acc": 0.9342417061611374 }, { "epoch": 0.9261603375527426, "grad_norm": 0.61328125, "learning_rate": 5.74958998367191e-07, "loss": 0.23396210372447968, "step": 4390, "token_acc": 0.938360450563204 }, { "epoch": 0.9263713080168776, "grad_norm": 0.78125, "learning_rate": 5.747918203631687e-07, "loss": 0.3152061104774475, "step": 4391, "token_acc": 0.9197436679890143 }, { "epoch": 0.9265822784810127, "grad_norm": 0.640625, "learning_rate": 5.746246338060684e-07, "loss": 0.22632479667663574, "step": 4392, "token_acc": 0.9338485316846986 }, { "epoch": 0.9267932489451477, "grad_norm": 0.578125, "learning_rate": 5.744574387150099e-07, "loss": 0.22443917393684387, "step": 4393, "token_acc": 0.9375363160952934 }, { "epoch": 0.9270042194092827, "grad_norm": 0.828125, "learning_rate": 5.742902351091129e-07, "loss": 0.26028645038604736, "step": 4394, "token_acc": 0.9281721632196758 }, { "epoch": 0.9272151898734177, "grad_norm": 0.81640625, "learning_rate": 5.741230230074988e-07, "loss": 0.2659928500652313, "step": 4395, "token_acc": 0.9308943089430894 }, { "epoch": 0.9274261603375528, "grad_norm": 0.68359375, "learning_rate": 5.739558024292896e-07, "loss": 0.2444857954978943, "step": 4396, "token_acc": 0.9300760678759509 }, { "epoch": 0.9276371308016877, "grad_norm": 0.80859375, "learning_rate": 5.737885733936084e-07, "loss": 0.27358317375183105, "step": 4397, "token_acc": 0.9273311897106109 }, { "epoch": 0.9278481012658227, "grad_norm": 0.73046875, "learning_rate": 5.736213359195794e-07, "loss": 0.26992809772491455, "step": 4398, "token_acc": 0.9255110613273593 }, { "epoch": 0.9280590717299578, "grad_norm": 0.69140625, "learning_rate": 5.734540900263276e-07, "loss": 0.23264504969120026, "step": 4399, "token_acc": 0.9305392731535757 }, { "epoch": 0.9282700421940928, "grad_norm": 0.76171875, "learning_rate": 5.732868357329786e-07, "loss": 0.25294071435928345, "step": 4400, "token_acc": 0.9271844660194175 }, { "epoch": 0.9282700421940928, "eval_loss": 0.4336411952972412, "eval_runtime": 245.6138, "eval_samples_per_second": 137.228, "eval_steps_per_second": 2.146, "eval_token_acc": 0.8990891142421251, "step": 4400 }, { "epoch": 0.9284810126582278, "grad_norm": 0.87109375, "learning_rate": 5.731195730586599e-07, "loss": 0.2387223243713379, "step": 4401, "token_acc": 0.928896473265074 }, { "epoch": 0.9286919831223629, "grad_norm": 0.62890625, "learning_rate": 5.72952302022499e-07, "loss": 0.20042628049850464, "step": 4402, "token_acc": 0.9409064830751578 }, { "epoch": 0.9289029535864979, "grad_norm": 0.69140625, "learning_rate": 5.727850226436249e-07, "loss": 0.24429546296596527, "step": 4403, "token_acc": 0.9309400669303316 }, { "epoch": 0.9291139240506329, "grad_norm": 0.61328125, "learning_rate": 5.726177349411675e-07, "loss": 0.2671322822570801, "step": 4404, "token_acc": 0.9287661895023859 }, { "epoch": 0.929324894514768, "grad_norm": 0.734375, "learning_rate": 5.724504389342574e-07, "loss": 0.2952882647514343, "step": 4405, "token_acc": 0.9272605939272606 }, { "epoch": 0.929535864978903, "grad_norm": 0.71875, "learning_rate": 5.722831346420264e-07, "loss": 0.2784864902496338, "step": 4406, "token_acc": 0.9252472367655614 }, { "epoch": 0.9297468354430379, "grad_norm": 0.61328125, "learning_rate": 5.721158220836073e-07, "loss": 0.241103857755661, "step": 4407, "token_acc": 0.9311475409836065 }, { "epoch": 0.929957805907173, "grad_norm": 0.5859375, "learning_rate": 5.719485012781333e-07, "loss": 0.214087575674057, "step": 4408, "token_acc": 0.9434190620272315 }, { "epoch": 0.930168776371308, "grad_norm": 0.83984375, "learning_rate": 5.717811722447394e-07, "loss": 0.26888540387153625, "step": 4409, "token_acc": 0.9235364396654719 }, { "epoch": 0.930379746835443, "grad_norm": 0.5546875, "learning_rate": 5.716138350025609e-07, "loss": 0.2050468921661377, "step": 4410, "token_acc": 0.9407171775592829 }, { "epoch": 0.9305907172995781, "grad_norm": 0.921875, "learning_rate": 5.714464895707342e-07, "loss": 0.24155902862548828, "step": 4411, "token_acc": 0.9271047227926078 }, { "epoch": 0.9308016877637131, "grad_norm": 0.55859375, "learning_rate": 5.712791359683966e-07, "loss": 0.24656759202480316, "step": 4412, "token_acc": 0.931350114416476 }, { "epoch": 0.9310126582278481, "grad_norm": 0.734375, "learning_rate": 5.711117742146867e-07, "loss": 0.23491117358207703, "step": 4413, "token_acc": 0.9306873741731377 }, { "epoch": 0.9312236286919832, "grad_norm": 0.7578125, "learning_rate": 5.709444043287434e-07, "loss": 0.3030080199241638, "step": 4414, "token_acc": 0.9209542972118425 }, { "epoch": 0.9314345991561181, "grad_norm": 0.69140625, "learning_rate": 5.707770263297069e-07, "loss": 0.29689499735832214, "step": 4415, "token_acc": 0.9226294357184409 }, { "epoch": 0.9316455696202531, "grad_norm": 0.734375, "learning_rate": 5.706096402367185e-07, "loss": 0.29218047857284546, "step": 4416, "token_acc": 0.9198075380914194 }, { "epoch": 0.9318565400843882, "grad_norm": 0.71875, "learning_rate": 5.704422460689202e-07, "loss": 0.22949926555156708, "step": 4417, "token_acc": 0.9363689433741973 }, { "epoch": 0.9320675105485232, "grad_norm": 0.96484375, "learning_rate": 5.702748438454548e-07, "loss": 0.2992064356803894, "step": 4418, "token_acc": 0.9126103045829775 }, { "epoch": 0.9322784810126582, "grad_norm": 0.66796875, "learning_rate": 5.701074335854661e-07, "loss": 0.2624170184135437, "step": 4419, "token_acc": 0.9264793783622236 }, { "epoch": 0.9324894514767933, "grad_norm": 0.75390625, "learning_rate": 5.699400153080991e-07, "loss": 0.2625029385089874, "step": 4420, "token_acc": 0.9222992489890237 }, { "epoch": 0.9327004219409283, "grad_norm": 0.69921875, "learning_rate": 5.697725890324993e-07, "loss": 0.24633285403251648, "step": 4421, "token_acc": 0.9270348837209302 }, { "epoch": 0.9329113924050633, "grad_norm": 0.671875, "learning_rate": 5.696051547778134e-07, "loss": 0.25819751620292664, "step": 4422, "token_acc": 0.923963133640553 }, { "epoch": 0.9331223628691984, "grad_norm": 0.79296875, "learning_rate": 5.694377125631888e-07, "loss": 0.2231207638978958, "step": 4423, "token_acc": 0.9346073454762616 }, { "epoch": 0.9333333333333333, "grad_norm": 0.74609375, "learning_rate": 5.692702624077743e-07, "loss": 0.2712147831916809, "step": 4424, "token_acc": 0.9189665296535525 }, { "epoch": 0.9335443037974683, "grad_norm": 0.8203125, "learning_rate": 5.691028043307189e-07, "loss": 0.23542457818984985, "step": 4425, "token_acc": 0.932806324110672 }, { "epoch": 0.9337552742616034, "grad_norm": 0.7734375, "learning_rate": 5.689353383511729e-07, "loss": 0.2597743570804596, "step": 4426, "token_acc": 0.9267817371937639 }, { "epoch": 0.9339662447257384, "grad_norm": 0.6953125, "learning_rate": 5.687678644882876e-07, "loss": 0.2489052712917328, "step": 4427, "token_acc": 0.9330922242314648 }, { "epoch": 0.9341772151898734, "grad_norm": 0.84765625, "learning_rate": 5.686003827612148e-07, "loss": 0.32635870575904846, "step": 4428, "token_acc": 0.9164983164983165 }, { "epoch": 0.9343881856540084, "grad_norm": 0.68359375, "learning_rate": 5.684328931891077e-07, "loss": 0.27935248613357544, "step": 4429, "token_acc": 0.9231200897867564 }, { "epoch": 0.9345991561181435, "grad_norm": 0.59375, "learning_rate": 5.682653957911201e-07, "loss": 0.24132739007472992, "step": 4430, "token_acc": 0.9333529066353494 }, { "epoch": 0.9348101265822785, "grad_norm": 0.84765625, "learning_rate": 5.680978905864064e-07, "loss": 0.28540828824043274, "step": 4431, "token_acc": 0.9202506408430646 }, { "epoch": 0.9350210970464135, "grad_norm": 0.65625, "learning_rate": 5.679303775941227e-07, "loss": 0.22704532742500305, "step": 4432, "token_acc": 0.9334095455844527 }, { "epoch": 0.9352320675105485, "grad_norm": 0.74609375, "learning_rate": 5.677628568334256e-07, "loss": 0.2513170838356018, "step": 4433, "token_acc": 0.9317460317460318 }, { "epoch": 0.9354430379746835, "grad_norm": 0.62109375, "learning_rate": 5.675953283234721e-07, "loss": 0.2438969761133194, "step": 4434, "token_acc": 0.9318303811057435 }, { "epoch": 0.9356540084388185, "grad_norm": 0.77734375, "learning_rate": 5.674277920834208e-07, "loss": 0.25183093547821045, "step": 4435, "token_acc": 0.9264043256233103 }, { "epoch": 0.9358649789029536, "grad_norm": 0.63671875, "learning_rate": 5.672602481324306e-07, "loss": 0.22969092428684235, "step": 4436, "token_acc": 0.9334310850439883 }, { "epoch": 0.9360759493670886, "grad_norm": 0.71484375, "learning_rate": 5.670926964896618e-07, "loss": 0.24348701536655426, "step": 4437, "token_acc": 0.9297520661157025 }, { "epoch": 0.9362869198312236, "grad_norm": 0.8125, "learning_rate": 5.669251371742753e-07, "loss": 0.31419461965560913, "step": 4438, "token_acc": 0.9234507897934386 }, { "epoch": 0.9364978902953587, "grad_norm": 0.69921875, "learning_rate": 5.667575702054329e-07, "loss": 0.25972098112106323, "step": 4439, "token_acc": 0.9279442930669088 }, { "epoch": 0.9367088607594937, "grad_norm": 0.69140625, "learning_rate": 5.665899956022972e-07, "loss": 0.2403879165649414, "step": 4440, "token_acc": 0.928448275862069 }, { "epoch": 0.9369198312236287, "grad_norm": 0.703125, "learning_rate": 5.664224133840321e-07, "loss": 0.2959568500518799, "step": 4441, "token_acc": 0.9194589609591146 }, { "epoch": 0.9371308016877637, "grad_norm": 0.72265625, "learning_rate": 5.662548235698018e-07, "loss": 0.20678211748600006, "step": 4442, "token_acc": 0.941753171856978 }, { "epoch": 0.9373417721518987, "grad_norm": 0.76171875, "learning_rate": 5.660872261787715e-07, "loss": 0.31633493304252625, "step": 4443, "token_acc": 0.9133333333333333 }, { "epoch": 0.9375527426160337, "grad_norm": 0.8046875, "learning_rate": 5.659196212301075e-07, "loss": 0.24892787635326385, "step": 4444, "token_acc": 0.9286567164179105 }, { "epoch": 0.9377637130801688, "grad_norm": 0.6015625, "learning_rate": 5.657520087429771e-07, "loss": 0.21799276769161224, "step": 4445, "token_acc": 0.936183395291202 }, { "epoch": 0.9379746835443038, "grad_norm": 0.68359375, "learning_rate": 5.655843887365479e-07, "loss": 0.2528685927391052, "step": 4446, "token_acc": 0.9292840375586855 }, { "epoch": 0.9381856540084388, "grad_norm": 0.65234375, "learning_rate": 5.654167612299888e-07, "loss": 0.24617847800254822, "step": 4447, "token_acc": 0.9317235636969192 }, { "epoch": 0.9383966244725739, "grad_norm": 0.76171875, "learning_rate": 5.652491262424691e-07, "loss": 0.2610861361026764, "step": 4448, "token_acc": 0.9341379310344827 }, { "epoch": 0.9386075949367089, "grad_norm": 0.609375, "learning_rate": 5.650814837931598e-07, "loss": 0.22153803706169128, "step": 4449, "token_acc": 0.9324359306238674 }, { "epoch": 0.9388185654008439, "grad_norm": 0.60546875, "learning_rate": 5.64913833901232e-07, "loss": 0.24263890087604523, "step": 4450, "token_acc": 0.9335504885993485 }, { "epoch": 0.939029535864979, "grad_norm": 0.70703125, "learning_rate": 5.647461765858576e-07, "loss": 0.23150673508644104, "step": 4451, "token_acc": 0.9314301707248811 }, { "epoch": 0.9392405063291139, "grad_norm": 0.73828125, "learning_rate": 5.645785118662102e-07, "loss": 0.27059465646743774, "step": 4452, "token_acc": 0.9310673443456162 }, { "epoch": 0.9394514767932489, "grad_norm": 0.64453125, "learning_rate": 5.644108397614633e-07, "loss": 0.21945405006408691, "step": 4453, "token_acc": 0.9339953271028038 }, { "epoch": 0.939662447257384, "grad_norm": 0.828125, "learning_rate": 5.642431602907917e-07, "loss": 0.2873837947845459, "step": 4454, "token_acc": 0.9206260480715484 }, { "epoch": 0.939873417721519, "grad_norm": 0.64453125, "learning_rate": 5.640754734733708e-07, "loss": 0.26824724674224854, "step": 4455, "token_acc": 0.9237082066869301 }, { "epoch": 0.940084388185654, "grad_norm": 0.71484375, "learning_rate": 5.639077793283774e-07, "loss": 0.28945863246917725, "step": 4456, "token_acc": 0.9223159732324702 }, { "epoch": 0.9402953586497891, "grad_norm": 0.8828125, "learning_rate": 5.637400778749884e-07, "loss": 0.3003215789794922, "step": 4457, "token_acc": 0.9238828967642527 }, { "epoch": 0.9405063291139241, "grad_norm": 0.62109375, "learning_rate": 5.635723691323822e-07, "loss": 0.21447455883026123, "step": 4458, "token_acc": 0.936085626911315 }, { "epoch": 0.940717299578059, "grad_norm": 0.86328125, "learning_rate": 5.634046531197372e-07, "loss": 0.25178882479667664, "step": 4459, "token_acc": 0.9363905325443787 }, { "epoch": 0.9409282700421941, "grad_norm": 0.8125, "learning_rate": 5.632369298562337e-07, "loss": 0.243199422955513, "step": 4460, "token_acc": 0.935552193645991 }, { "epoch": 0.9411392405063291, "grad_norm": 0.890625, "learning_rate": 5.63069199361052e-07, "loss": 0.2883370518684387, "step": 4461, "token_acc": 0.9253489126906849 }, { "epoch": 0.9413502109704641, "grad_norm": 0.71484375, "learning_rate": 5.629014616533735e-07, "loss": 0.2576502561569214, "step": 4462, "token_acc": 0.9307671005261701 }, { "epoch": 0.9415611814345991, "grad_norm": 0.828125, "learning_rate": 5.627337167523803e-07, "loss": 0.2582550048828125, "step": 4463, "token_acc": 0.9265222482435597 }, { "epoch": 0.9417721518987342, "grad_norm": 0.7734375, "learning_rate": 5.625659646772559e-07, "loss": 0.24630007147789001, "step": 4464, "token_acc": 0.9251587959127313 }, { "epoch": 0.9419831223628692, "grad_norm": 0.734375, "learning_rate": 5.623982054471839e-07, "loss": 0.24558809399604797, "step": 4465, "token_acc": 0.9355692850838482 }, { "epoch": 0.9421940928270042, "grad_norm": 0.64453125, "learning_rate": 5.622304390813487e-07, "loss": 0.237171933054924, "step": 4466, "token_acc": 0.9323715058611362 }, { "epoch": 0.9424050632911393, "grad_norm": 0.69140625, "learning_rate": 5.620626655989362e-07, "loss": 0.2891885042190552, "step": 4467, "token_acc": 0.9238852598693553 }, { "epoch": 0.9426160337552743, "grad_norm": 0.80859375, "learning_rate": 5.618948850191326e-07, "loss": 0.2498980313539505, "step": 4468, "token_acc": 0.9261083743842364 }, { "epoch": 0.9428270042194092, "grad_norm": 0.62890625, "learning_rate": 5.617270973611252e-07, "loss": 0.23183225095272064, "step": 4469, "token_acc": 0.9325218311722678 }, { "epoch": 0.9430379746835443, "grad_norm": 0.64453125, "learning_rate": 5.615593026441016e-07, "loss": 0.24455231428146362, "step": 4470, "token_acc": 0.9322079314040729 }, { "epoch": 0.9432489451476793, "grad_norm": 0.8125, "learning_rate": 5.613915008872505e-07, "loss": 0.26692163944244385, "step": 4471, "token_acc": 0.9249931563098823 }, { "epoch": 0.9434599156118143, "grad_norm": 0.7421875, "learning_rate": 5.612236921097619e-07, "loss": 0.2726472318172455, "step": 4472, "token_acc": 0.9241071428571429 }, { "epoch": 0.9436708860759494, "grad_norm": 0.78125, "learning_rate": 5.610558763308259e-07, "loss": 0.258222758769989, "step": 4473, "token_acc": 0.9294154228855721 }, { "epoch": 0.9438818565400844, "grad_norm": 0.62109375, "learning_rate": 5.608880535696338e-07, "loss": 0.22604089975357056, "step": 4474, "token_acc": 0.9377548246806198 }, { "epoch": 0.9440928270042194, "grad_norm": 0.7265625, "learning_rate": 5.607202238453772e-07, "loss": 0.2615683078765869, "step": 4475, "token_acc": 0.9242085065558043 }, { "epoch": 0.9443037974683545, "grad_norm": 0.76953125, "learning_rate": 5.605523871772492e-07, "loss": 0.260919451713562, "step": 4476, "token_acc": 0.9264305177111717 }, { "epoch": 0.9445147679324895, "grad_norm": 0.7421875, "learning_rate": 5.603845435844432e-07, "loss": 0.2392599880695343, "step": 4477, "token_acc": 0.9295320064550834 }, { "epoch": 0.9447257383966244, "grad_norm": 0.62890625, "learning_rate": 5.602166930861535e-07, "loss": 0.22516842186450958, "step": 4478, "token_acc": 0.9347217904880323 }, { "epoch": 0.9449367088607595, "grad_norm": 0.6640625, "learning_rate": 5.600488357015752e-07, "loss": 0.25472259521484375, "step": 4479, "token_acc": 0.9258493353028066 }, { "epoch": 0.9451476793248945, "grad_norm": 0.75390625, "learning_rate": 5.598809714499044e-07, "loss": 0.24524132907390594, "step": 4480, "token_acc": 0.931585292344786 }, { "epoch": 0.9453586497890295, "grad_norm": 0.62890625, "learning_rate": 5.597131003503377e-07, "loss": 0.2743436098098755, "step": 4481, "token_acc": 0.9243498817966903 }, { "epoch": 0.9455696202531646, "grad_norm": 0.5546875, "learning_rate": 5.595452224220725e-07, "loss": 0.19892990589141846, "step": 4482, "token_acc": 0.938169164882227 }, { "epoch": 0.9457805907172996, "grad_norm": 1.9375, "learning_rate": 5.593773376843071e-07, "loss": 0.219451904296875, "step": 4483, "token_acc": 0.9368088467614534 }, { "epoch": 0.9459915611814346, "grad_norm": 0.85546875, "learning_rate": 5.592094461562407e-07, "loss": 0.2814572751522064, "step": 4484, "token_acc": 0.9229160442187033 }, { "epoch": 0.9462025316455697, "grad_norm": 0.490234375, "learning_rate": 5.590415478570729e-07, "loss": 0.20449310541152954, "step": 4485, "token_acc": 0.9420576596947428 }, { "epoch": 0.9464135021097047, "grad_norm": 0.89453125, "learning_rate": 5.588736428060043e-07, "loss": 0.2367660105228424, "step": 4486, "token_acc": 0.9287757437070938 }, { "epoch": 0.9466244725738396, "grad_norm": 0.69140625, "learning_rate": 5.587057310222365e-07, "loss": 0.2612887918949127, "step": 4487, "token_acc": 0.9313976658126957 }, { "epoch": 0.9468354430379747, "grad_norm": 0.75, "learning_rate": 5.585378125249714e-07, "loss": 0.24243563413619995, "step": 4488, "token_acc": 0.9304647160068846 }, { "epoch": 0.9470464135021097, "grad_norm": 0.70703125, "learning_rate": 5.58369887333412e-07, "loss": 0.2630927562713623, "step": 4489, "token_acc": 0.9290342486886763 }, { "epoch": 0.9472573839662447, "grad_norm": 0.8984375, "learning_rate": 5.58201955466762e-07, "loss": 0.2807869613170624, "step": 4490, "token_acc": 0.9242382651660719 }, { "epoch": 0.9474683544303798, "grad_norm": 0.58984375, "learning_rate": 5.580340169442257e-07, "loss": 0.2375790774822235, "step": 4491, "token_acc": 0.9327422806481198 }, { "epoch": 0.9476793248945148, "grad_norm": 0.84765625, "learning_rate": 5.578660717850084e-07, "loss": 0.24225102365016937, "step": 4492, "token_acc": 0.9280413036463375 }, { "epoch": 0.9478902953586498, "grad_norm": 0.984375, "learning_rate": 5.576981200083161e-07, "loss": 0.31735312938690186, "step": 4493, "token_acc": 0.916794674859191 }, { "epoch": 0.9481012658227848, "grad_norm": 0.6328125, "learning_rate": 5.575301616333552e-07, "loss": 0.22917918860912323, "step": 4494, "token_acc": 0.9321026282853567 }, { "epoch": 0.9483122362869199, "grad_norm": 0.8125, "learning_rate": 5.573621966793335e-07, "loss": 0.2600996494293213, "step": 4495, "token_acc": 0.9278294149808639 }, { "epoch": 0.9485232067510548, "grad_norm": 0.94921875, "learning_rate": 5.571942251654592e-07, "loss": 0.27852770686149597, "step": 4496, "token_acc": 0.9301075268817204 }, { "epoch": 0.9487341772151898, "grad_norm": 0.74609375, "learning_rate": 5.57026247110941e-07, "loss": 0.2728361487388611, "step": 4497, "token_acc": 0.9278915488895298 }, { "epoch": 0.9489451476793249, "grad_norm": 0.73046875, "learning_rate": 5.568582625349888e-07, "loss": 0.26195797324180603, "step": 4498, "token_acc": 0.9260162601626016 }, { "epoch": 0.9491561181434599, "grad_norm": 0.8046875, "learning_rate": 5.566902714568131e-07, "loss": 0.29690706729888916, "step": 4499, "token_acc": 0.9223135809469805 }, { "epoch": 0.9493670886075949, "grad_norm": 0.81640625, "learning_rate": 5.56522273895625e-07, "loss": 0.2410832643508911, "step": 4500, "token_acc": 0.9290875033449291 }, { "epoch": 0.94957805907173, "grad_norm": 3.140625, "learning_rate": 5.563542698706366e-07, "loss": 0.26895982027053833, "step": 4501, "token_acc": 0.9287671232876712 }, { "epoch": 0.949789029535865, "grad_norm": 0.703125, "learning_rate": 5.561862594010602e-07, "loss": 0.2245972752571106, "step": 4502, "token_acc": 0.9340277777777778 }, { "epoch": 0.95, "grad_norm": 0.765625, "learning_rate": 5.560182425061098e-07, "loss": 0.2794555723667145, "step": 4503, "token_acc": 0.9208435207823961 }, { "epoch": 0.950210970464135, "grad_norm": 0.76953125, "learning_rate": 5.55850219204999e-07, "loss": 0.4048742651939392, "step": 4504, "token_acc": 0.9008746355685131 }, { "epoch": 0.95042194092827, "grad_norm": 1.15625, "learning_rate": 5.55682189516943e-07, "loss": 0.2508431375026703, "step": 4505, "token_acc": 0.9317331670822943 }, { "epoch": 0.950632911392405, "grad_norm": 0.796875, "learning_rate": 5.555141534611572e-07, "loss": 0.2880476415157318, "step": 4506, "token_acc": 0.9220099450405653 }, { "epoch": 0.9508438818565401, "grad_norm": 0.75, "learning_rate": 5.553461110568582e-07, "loss": 0.25603699684143066, "step": 4507, "token_acc": 0.9285509325681492 }, { "epoch": 0.9510548523206751, "grad_norm": 0.6953125, "learning_rate": 5.551780623232632e-07, "loss": 0.23154105246067047, "step": 4508, "token_acc": 0.9344059405940595 }, { "epoch": 0.9512658227848101, "grad_norm": 0.7890625, "learning_rate": 5.550100072795894e-07, "loss": 0.26158180832862854, "step": 4509, "token_acc": 0.9283958205285802 }, { "epoch": 0.9514767932489452, "grad_norm": 0.77734375, "learning_rate": 5.548419459450558e-07, "loss": 0.27546802163124084, "step": 4510, "token_acc": 0.9300173510699826 }, { "epoch": 0.9516877637130802, "grad_norm": 0.61328125, "learning_rate": 5.546738783388814e-07, "loss": 0.23769918084144592, "step": 4511, "token_acc": 0.9326468132479822 }, { "epoch": 0.9518987341772152, "grad_norm": 0.68359375, "learning_rate": 5.545058044802863e-07, "loss": 0.25574854016304016, "step": 4512, "token_acc": 0.9293193717277487 }, { "epoch": 0.9521097046413503, "grad_norm": 0.734375, "learning_rate": 5.543377243884913e-07, "loss": 0.2485152781009674, "step": 4513, "token_acc": 0.9308906761989872 }, { "epoch": 0.9523206751054852, "grad_norm": 0.6640625, "learning_rate": 5.541696380827174e-07, "loss": 0.2500949800014496, "step": 4514, "token_acc": 0.9288679767506227 }, { "epoch": 0.9525316455696202, "grad_norm": 0.84765625, "learning_rate": 5.540015455821871e-07, "loss": 0.27296584844589233, "step": 4515, "token_acc": 0.9282437745740498 }, { "epoch": 0.9527426160337553, "grad_norm": 0.6484375, "learning_rate": 5.538334469061229e-07, "loss": 0.25283387303352356, "step": 4516, "token_acc": 0.9275568181818182 }, { "epoch": 0.9529535864978903, "grad_norm": 0.796875, "learning_rate": 5.536653420737484e-07, "loss": 0.25005894899368286, "step": 4517, "token_acc": 0.9285094066570189 }, { "epoch": 0.9531645569620253, "grad_norm": 0.67578125, "learning_rate": 5.534972311042877e-07, "loss": 0.2732360363006592, "step": 4518, "token_acc": 0.93125 }, { "epoch": 0.9533755274261604, "grad_norm": 0.7734375, "learning_rate": 5.533291140169659e-07, "loss": 0.23741638660430908, "step": 4519, "token_acc": 0.9338762214983714 }, { "epoch": 0.9535864978902954, "grad_norm": 1.25, "learning_rate": 5.531609908310087e-07, "loss": 0.2566979229450226, "step": 4520, "token_acc": 0.9266686620772224 }, { "epoch": 0.9537974683544304, "grad_norm": 0.65625, "learning_rate": 5.52992861565642e-07, "loss": 0.2523461580276489, "step": 4521, "token_acc": 0.9261396422388921 }, { "epoch": 0.9540084388185655, "grad_norm": 0.72265625, "learning_rate": 5.52824726240093e-07, "loss": 0.2820254862308502, "step": 4522, "token_acc": 0.92124959323137 }, { "epoch": 0.9542194092827004, "grad_norm": 1.0859375, "learning_rate": 5.526565848735898e-07, "loss": 0.29494422674179077, "step": 4523, "token_acc": 0.919265520256485 }, { "epoch": 0.9544303797468354, "grad_norm": 0.625, "learning_rate": 5.524884374853602e-07, "loss": 0.22038781642913818, "step": 4524, "token_acc": 0.9332919640086875 }, { "epoch": 0.9546413502109705, "grad_norm": 0.6484375, "learning_rate": 5.523202840946336e-07, "loss": 0.23139828443527222, "step": 4525, "token_acc": 0.9369797859690844 }, { "epoch": 0.9548523206751055, "grad_norm": 0.671875, "learning_rate": 5.521521247206396e-07, "loss": 0.22154146432876587, "step": 4526, "token_acc": 0.936716243802858 }, { "epoch": 0.9550632911392405, "grad_norm": 0.72265625, "learning_rate": 5.519839593826087e-07, "loss": 0.2781516909599304, "step": 4527, "token_acc": 0.9177995573822321 }, { "epoch": 0.9552742616033755, "grad_norm": 0.875, "learning_rate": 5.518157880997722e-07, "loss": 0.25957539677619934, "step": 4528, "token_acc": 0.9302584051125312 }, { "epoch": 0.9554852320675106, "grad_norm": 0.859375, "learning_rate": 5.516476108913617e-07, "loss": 0.25193172693252563, "step": 4529, "token_acc": 0.9301775147928995 }, { "epoch": 0.9556962025316456, "grad_norm": 0.7421875, "learning_rate": 5.514794277766097e-07, "loss": 0.24580858647823334, "step": 4530, "token_acc": 0.9284821986258588 }, { "epoch": 0.9559071729957805, "grad_norm": 0.765625, "learning_rate": 5.513112387747494e-07, "loss": 0.27907225489616394, "step": 4531, "token_acc": 0.9276879162702188 }, { "epoch": 0.9561181434599156, "grad_norm": 0.72265625, "learning_rate": 5.511430439050148e-07, "loss": 0.24184714257717133, "step": 4532, "token_acc": 0.9278551532033427 }, { "epoch": 0.9563291139240506, "grad_norm": 0.84765625, "learning_rate": 5.509748431866401e-07, "loss": 0.23217004537582397, "step": 4533, "token_acc": 0.9340300457217505 }, { "epoch": 0.9565400843881856, "grad_norm": 0.625, "learning_rate": 5.508066366388606e-07, "loss": 0.270810067653656, "step": 4534, "token_acc": 0.9215233698788229 }, { "epoch": 0.9567510548523207, "grad_norm": 0.79296875, "learning_rate": 5.506384242809123e-07, "loss": 0.3111891746520996, "step": 4535, "token_acc": 0.9160493827160494 }, { "epoch": 0.9569620253164557, "grad_norm": 0.89453125, "learning_rate": 5.504702061320317e-07, "loss": 0.2772868871688843, "step": 4536, "token_acc": 0.9237738206810372 }, { "epoch": 0.9571729957805907, "grad_norm": 0.87109375, "learning_rate": 5.503019822114557e-07, "loss": 0.2294209599494934, "step": 4537, "token_acc": 0.9345403899721448 }, { "epoch": 0.9573839662447258, "grad_norm": 1.1640625, "learning_rate": 5.501337525384222e-07, "loss": 0.26090940833091736, "step": 4538, "token_acc": 0.9297163995067818 }, { "epoch": 0.9575949367088608, "grad_norm": 0.7421875, "learning_rate": 5.499655171321697e-07, "loss": 0.24615783989429474, "step": 4539, "token_acc": 0.9314868804664723 }, { "epoch": 0.9578059071729957, "grad_norm": 0.734375, "learning_rate": 5.497972760119378e-07, "loss": 0.2629927396774292, "step": 4540, "token_acc": 0.9314456035767511 }, { "epoch": 0.9580168776371308, "grad_norm": 0.7109375, "learning_rate": 5.496290291969656e-07, "loss": 0.2640964090824127, "step": 4541, "token_acc": 0.9270988945324171 }, { "epoch": 0.9582278481012658, "grad_norm": 0.75390625, "learning_rate": 5.494607767064939e-07, "loss": 0.27108538150787354, "step": 4542, "token_acc": 0.9235614085313484 }, { "epoch": 0.9584388185654008, "grad_norm": 0.8515625, "learning_rate": 5.492925185597638e-07, "loss": 0.29417771100997925, "step": 4543, "token_acc": 0.9162178336325554 }, { "epoch": 0.9586497890295359, "grad_norm": 0.765625, "learning_rate": 5.49124254776017e-07, "loss": 0.26923102140426636, "step": 4544, "token_acc": 0.9241001564945227 }, { "epoch": 0.9588607594936709, "grad_norm": 2.703125, "learning_rate": 5.48955985374496e-07, "loss": 0.21483464539051056, "step": 4545, "token_acc": 0.9322242760320394 }, { "epoch": 0.9590717299578059, "grad_norm": 1.0, "learning_rate": 5.487877103744433e-07, "loss": 0.28801995515823364, "step": 4546, "token_acc": 0.9249355670103093 }, { "epoch": 0.959282700421941, "grad_norm": 0.62890625, "learning_rate": 5.486194297951034e-07, "loss": 0.2749537229537964, "step": 4547, "token_acc": 0.9279199764636658 }, { "epoch": 0.959493670886076, "grad_norm": 0.61328125, "learning_rate": 5.4845114365572e-07, "loss": 0.23160047829151154, "step": 4548, "token_acc": 0.9346747519294377 }, { "epoch": 0.9597046413502109, "grad_norm": 1.2734375, "learning_rate": 5.482828519755383e-07, "loss": 0.30999720096588135, "step": 4549, "token_acc": 0.9151478936360921 }, { "epoch": 0.959915611814346, "grad_norm": 0.6796875, "learning_rate": 5.481145547738037e-07, "loss": 0.2422141134738922, "step": 4550, "token_acc": 0.9368131868131868 }, { "epoch": 0.960126582278481, "grad_norm": 0.65234375, "learning_rate": 5.479462520697626e-07, "loss": 0.2835991084575653, "step": 4551, "token_acc": 0.9252569293055123 }, { "epoch": 0.960337552742616, "grad_norm": 0.76953125, "learning_rate": 5.47777943882662e-07, "loss": 0.25183773040771484, "step": 4552, "token_acc": 0.9330974589900289 }, { "epoch": 0.9605485232067511, "grad_norm": 0.6640625, "learning_rate": 5.476096302317488e-07, "loss": 0.24012866616249084, "step": 4553, "token_acc": 0.9315467521118556 }, { "epoch": 0.9607594936708861, "grad_norm": 1.0, "learning_rate": 5.474413111362715e-07, "loss": 0.25767773389816284, "step": 4554, "token_acc": 0.9278963002970564 }, { "epoch": 0.9609704641350211, "grad_norm": 0.7109375, "learning_rate": 5.472729866154787e-07, "loss": 0.2730708122253418, "step": 4555, "token_acc": 0.9232101616628176 }, { "epoch": 0.9611814345991562, "grad_norm": 0.66015625, "learning_rate": 5.471046566886199e-07, "loss": 0.20969294011592865, "step": 4556, "token_acc": 0.9380922299431459 }, { "epoch": 0.9613924050632912, "grad_norm": 0.70703125, "learning_rate": 5.469363213749447e-07, "loss": 0.2700268030166626, "step": 4557, "token_acc": 0.9293317563571851 }, { "epoch": 0.9616033755274261, "grad_norm": 0.65625, "learning_rate": 5.467679806937041e-07, "loss": 0.2399936020374298, "step": 4558, "token_acc": 0.9272572884234361 }, { "epoch": 0.9618143459915611, "grad_norm": 0.86328125, "learning_rate": 5.46599634664149e-07, "loss": 0.295928955078125, "step": 4559, "token_acc": 0.9191111111111111 }, { "epoch": 0.9620253164556962, "grad_norm": 0.80859375, "learning_rate": 5.464312833055313e-07, "loss": 0.2584003806114197, "step": 4560, "token_acc": 0.9273114355231143 }, { "epoch": 0.9622362869198312, "grad_norm": 0.7890625, "learning_rate": 5.462629266371033e-07, "loss": 0.2600945830345154, "step": 4561, "token_acc": 0.9288779889638259 }, { "epoch": 0.9624472573839662, "grad_norm": 0.7109375, "learning_rate": 5.460945646781181e-07, "loss": 0.281633198261261, "step": 4562, "token_acc": 0.9250285062713797 }, { "epoch": 0.9626582278481013, "grad_norm": 0.85546875, "learning_rate": 5.459261974478292e-07, "loss": 0.2578030824661255, "step": 4563, "token_acc": 0.9280642173510343 }, { "epoch": 0.9628691983122363, "grad_norm": 0.65625, "learning_rate": 5.457578249654911e-07, "loss": 0.2571215331554413, "step": 4564, "token_acc": 0.926487414187643 }, { "epoch": 0.9630801687763713, "grad_norm": 0.66015625, "learning_rate": 5.455894472503581e-07, "loss": 0.28572890162467957, "step": 4565, "token_acc": 0.9271164863144494 }, { "epoch": 0.9632911392405064, "grad_norm": 0.80078125, "learning_rate": 5.454210643216863e-07, "loss": 0.25047290325164795, "step": 4566, "token_acc": 0.9301459174169513 }, { "epoch": 0.9635021097046413, "grad_norm": 0.8046875, "learning_rate": 5.452526761987311e-07, "loss": 0.27517592906951904, "step": 4567, "token_acc": 0.9207501512401693 }, { "epoch": 0.9637130801687763, "grad_norm": 0.80859375, "learning_rate": 5.450842829007495e-07, "loss": 0.29314619302749634, "step": 4568, "token_acc": 0.9214376590330788 }, { "epoch": 0.9639240506329114, "grad_norm": 0.6484375, "learning_rate": 5.449158844469985e-07, "loss": 0.2798030972480774, "step": 4569, "token_acc": 0.9231426131511529 }, { "epoch": 0.9641350210970464, "grad_norm": 0.66015625, "learning_rate": 5.447474808567359e-07, "loss": 0.2903270125389099, "step": 4570, "token_acc": 0.9234317343173432 }, { "epoch": 0.9643459915611814, "grad_norm": 0.7421875, "learning_rate": 5.445790721492204e-07, "loss": 0.3074112832546234, "step": 4571, "token_acc": 0.9153153153153153 }, { "epoch": 0.9645569620253165, "grad_norm": 0.75390625, "learning_rate": 5.444106583437103e-07, "loss": 0.28375330567359924, "step": 4572, "token_acc": 0.9202618268372508 }, { "epoch": 0.9647679324894515, "grad_norm": 0.6796875, "learning_rate": 5.442422394594657e-07, "loss": 0.22980284690856934, "step": 4573, "token_acc": 0.9351418771437481 }, { "epoch": 0.9649789029535865, "grad_norm": 1.5078125, "learning_rate": 5.440738155157466e-07, "loss": 0.2722877860069275, "step": 4574, "token_acc": 0.9211165048543689 }, { "epoch": 0.9651898734177216, "grad_norm": 0.78515625, "learning_rate": 5.439053865318136e-07, "loss": 0.2513143718242645, "step": 4575, "token_acc": 0.9340069466371961 }, { "epoch": 0.9654008438818565, "grad_norm": 0.90234375, "learning_rate": 5.437369525269282e-07, "loss": 0.26087453961372375, "step": 4576, "token_acc": 0.9244614125988546 }, { "epoch": 0.9656118143459915, "grad_norm": 0.78515625, "learning_rate": 5.435685135203518e-07, "loss": 0.2540352940559387, "step": 4577, "token_acc": 0.9240793201133144 }, { "epoch": 0.9658227848101266, "grad_norm": 1.2109375, "learning_rate": 5.434000695313473e-07, "loss": 0.25643736124038696, "step": 4578, "token_acc": 0.9282414536495226 }, { "epoch": 0.9660337552742616, "grad_norm": 0.73046875, "learning_rate": 5.432316205791775e-07, "loss": 0.2644897699356079, "step": 4579, "token_acc": 0.9243650572146246 }, { "epoch": 0.9662447257383966, "grad_norm": 0.6875, "learning_rate": 5.43063166683106e-07, "loss": 0.25990933179855347, "step": 4580, "token_acc": 0.9283930058284763 }, { "epoch": 0.9664556962025317, "grad_norm": 0.703125, "learning_rate": 5.428947078623967e-07, "loss": 0.2273510843515396, "step": 4581, "token_acc": 0.9312933025404158 }, { "epoch": 0.9666666666666667, "grad_norm": 0.7734375, "learning_rate": 5.427262441363147e-07, "loss": 0.27779555320739746, "step": 4582, "token_acc": 0.9220452640402347 }, { "epoch": 0.9668776371308017, "grad_norm": 0.8359375, "learning_rate": 5.425577755241251e-07, "loss": 0.26986002922058105, "step": 4583, "token_acc": 0.9250637755102041 }, { "epoch": 0.9670886075949368, "grad_norm": 0.84765625, "learning_rate": 5.423893020450936e-07, "loss": 0.24137385189533234, "step": 4584, "token_acc": 0.9310146593848807 }, { "epoch": 0.9672995780590717, "grad_norm": 0.671875, "learning_rate": 5.422208237184865e-07, "loss": 0.2770204544067383, "step": 4585, "token_acc": 0.9212934716290421 }, { "epoch": 0.9675105485232067, "grad_norm": 1.296875, "learning_rate": 5.420523405635711e-07, "loss": 0.24688522517681122, "step": 4586, "token_acc": 0.9308493842753395 }, { "epoch": 0.9677215189873418, "grad_norm": 0.796875, "learning_rate": 5.418838525996144e-07, "loss": 0.29166823625564575, "step": 4587, "token_acc": 0.9222495390288875 }, { "epoch": 0.9679324894514768, "grad_norm": 0.734375, "learning_rate": 5.417153598458849e-07, "loss": 0.2672678828239441, "step": 4588, "token_acc": 0.9256474519632414 }, { "epoch": 0.9681434599156118, "grad_norm": 0.875, "learning_rate": 5.415468623216506e-07, "loss": 0.3124541938304901, "step": 4589, "token_acc": 0.9235406091370558 }, { "epoch": 0.9683544303797469, "grad_norm": 0.6015625, "learning_rate": 5.413783600461811e-07, "loss": 0.24338483810424805, "step": 4590, "token_acc": 0.9307126696832579 }, { "epoch": 0.9685654008438819, "grad_norm": 0.73046875, "learning_rate": 5.41209853038746e-07, "loss": 0.23556509613990784, "step": 4591, "token_acc": 0.9287270463741052 }, { "epoch": 0.9687763713080169, "grad_norm": 0.68359375, "learning_rate": 5.410413413186154e-07, "loss": 0.24140600860118866, "step": 4592, "token_acc": 0.9325337331334332 }, { "epoch": 0.9689873417721518, "grad_norm": 0.63671875, "learning_rate": 5.4087282490506e-07, "loss": 0.2033865749835968, "step": 4593, "token_acc": 0.9374384236453202 }, { "epoch": 0.9691983122362869, "grad_norm": 0.7265625, "learning_rate": 5.40704303817351e-07, "loss": 0.21952706575393677, "step": 4594, "token_acc": 0.9379673220714484 }, { "epoch": 0.9694092827004219, "grad_norm": 0.8828125, "learning_rate": 5.405357780747603e-07, "loss": 0.35412973165512085, "step": 4595, "token_acc": 0.9164420485175202 }, { "epoch": 0.9696202531645569, "grad_norm": 0.703125, "learning_rate": 5.403672476965606e-07, "loss": 0.2758987247943878, "step": 4596, "token_acc": 0.9232289020604008 }, { "epoch": 0.969831223628692, "grad_norm": 0.69140625, "learning_rate": 5.401987127020241e-07, "loss": 0.22954927384853363, "step": 4597, "token_acc": 0.9328984156570364 }, { "epoch": 0.970042194092827, "grad_norm": 0.8515625, "learning_rate": 5.400301731104248e-07, "loss": 0.2545018196105957, "step": 4598, "token_acc": 0.9287776708373436 }, { "epoch": 0.970253164556962, "grad_norm": 0.671875, "learning_rate": 5.398616289410364e-07, "loss": 0.26331737637519836, "step": 4599, "token_acc": 0.9322289156626506 }, { "epoch": 0.9704641350210971, "grad_norm": 0.64453125, "learning_rate": 5.396930802131333e-07, "loss": 0.22781367599964142, "step": 4600, "token_acc": 0.9337906352520131 }, { "epoch": 0.9704641350210971, "eval_loss": 0.43364349007606506, "eval_runtime": 245.9481, "eval_samples_per_second": 137.041, "eval_steps_per_second": 2.143, "eval_token_acc": 0.8990509737054653, "step": 4600 }, { "epoch": 0.9706751054852321, "grad_norm": 0.7265625, "learning_rate": 5.395245269459904e-07, "loss": 0.26024794578552246, "step": 4601, "token_acc": 0.928102429415627 }, { "epoch": 0.970886075949367, "grad_norm": 0.87890625, "learning_rate": 5.393559691588835e-07, "loss": 0.23836085200309753, "step": 4602, "token_acc": 0.9278832116788321 }, { "epoch": 0.9710970464135021, "grad_norm": 0.7265625, "learning_rate": 5.391874068710885e-07, "loss": 0.28010326623916626, "step": 4603, "token_acc": 0.9265056141544743 }, { "epoch": 0.9713080168776371, "grad_norm": 0.71484375, "learning_rate": 5.390188401018815e-07, "loss": 0.24762186408042908, "step": 4604, "token_acc": 0.9299648225135912 }, { "epoch": 0.9715189873417721, "grad_norm": 0.68359375, "learning_rate": 5.388502688705401e-07, "loss": 0.25501543283462524, "step": 4605, "token_acc": 0.929002586950273 }, { "epoch": 0.9717299578059072, "grad_norm": 0.765625, "learning_rate": 5.386816931963416e-07, "loss": 0.27469927072525024, "step": 4606, "token_acc": 0.9222536984576645 }, { "epoch": 0.9719409282700422, "grad_norm": 0.51171875, "learning_rate": 5.385131130985641e-07, "loss": 0.21256129443645477, "step": 4607, "token_acc": 0.9382648898558608 }, { "epoch": 0.9721518987341772, "grad_norm": 0.9140625, "learning_rate": 5.383445285964862e-07, "loss": 0.24301084876060486, "step": 4608, "token_acc": 0.93158953722334 }, { "epoch": 0.9723628691983123, "grad_norm": 0.65625, "learning_rate": 5.381759397093867e-07, "loss": 0.22984328866004944, "step": 4609, "token_acc": 0.9361638060825053 }, { "epoch": 0.9725738396624473, "grad_norm": 0.64453125, "learning_rate": 5.380073464565455e-07, "loss": 0.2654673457145691, "step": 4610, "token_acc": 0.9285511766373689 }, { "epoch": 0.9727848101265822, "grad_norm": 0.5546875, "learning_rate": 5.378387488572426e-07, "loss": 0.22662626206874847, "step": 4611, "token_acc": 0.9327777777777778 }, { "epoch": 0.9729957805907173, "grad_norm": 0.6875, "learning_rate": 5.376701469307584e-07, "loss": 0.2520908713340759, "step": 4612, "token_acc": 0.9325554923164485 }, { "epoch": 0.9732067510548523, "grad_norm": 0.79296875, "learning_rate": 5.375015406963741e-07, "loss": 0.2625930905342102, "step": 4613, "token_acc": 0.9279620853080569 }, { "epoch": 0.9734177215189873, "grad_norm": 0.65234375, "learning_rate": 5.373329301733712e-07, "loss": 0.19575364887714386, "step": 4614, "token_acc": 0.9449086969978335 }, { "epoch": 0.9736286919831224, "grad_norm": 0.75390625, "learning_rate": 5.371643153810319e-07, "loss": 0.25921961665153503, "step": 4615, "token_acc": 0.9294469357249626 }, { "epoch": 0.9738396624472574, "grad_norm": 0.9609375, "learning_rate": 5.369956963386384e-07, "loss": 0.29474306106567383, "step": 4616, "token_acc": 0.9134877384196185 }, { "epoch": 0.9740506329113924, "grad_norm": 0.703125, "learning_rate": 5.368270730654738e-07, "loss": 0.2450411319732666, "step": 4617, "token_acc": 0.927870744373918 }, { "epoch": 0.9742616033755275, "grad_norm": 0.5703125, "learning_rate": 5.366584455808219e-07, "loss": 0.22273120284080505, "step": 4618, "token_acc": 0.9361111111111111 }, { "epoch": 0.9744725738396625, "grad_norm": 0.7265625, "learning_rate": 5.364898139039664e-07, "loss": 0.23401491343975067, "step": 4619, "token_acc": 0.9332706766917294 }, { "epoch": 0.9746835443037974, "grad_norm": 0.6484375, "learning_rate": 5.363211780541919e-07, "loss": 0.2445870041847229, "step": 4620, "token_acc": 0.9327217125382263 }, { "epoch": 0.9748945147679325, "grad_norm": 0.62109375, "learning_rate": 5.361525380507832e-07, "loss": 0.23069533705711365, "step": 4621, "token_acc": 0.9359737939249553 }, { "epoch": 0.9751054852320675, "grad_norm": 1.0390625, "learning_rate": 5.359838939130259e-07, "loss": 0.30274248123168945, "step": 4622, "token_acc": 0.9201716738197425 }, { "epoch": 0.9753164556962025, "grad_norm": 0.80078125, "learning_rate": 5.358152456602056e-07, "loss": 0.279882550239563, "step": 4623, "token_acc": 0.9296212549462973 }, { "epoch": 0.9755274261603376, "grad_norm": 1.15625, "learning_rate": 5.356465933116088e-07, "loss": 0.2554342746734619, "step": 4624, "token_acc": 0.9260679079956189 }, { "epoch": 0.9757383966244726, "grad_norm": 0.80078125, "learning_rate": 5.354779368865225e-07, "loss": 0.2824174165725708, "step": 4625, "token_acc": 0.9192260442260443 }, { "epoch": 0.9759493670886076, "grad_norm": 0.765625, "learning_rate": 5.353092764042337e-07, "loss": 0.2861934006214142, "step": 4626, "token_acc": 0.9181547619047619 }, { "epoch": 0.9761603375527426, "grad_norm": 0.62109375, "learning_rate": 5.351406118840305e-07, "loss": 0.2533130645751953, "step": 4627, "token_acc": 0.9309068476249229 }, { "epoch": 0.9763713080168777, "grad_norm": 0.83203125, "learning_rate": 5.349719433452006e-07, "loss": 0.26481419801712036, "step": 4628, "token_acc": 0.9229881742140179 }, { "epoch": 0.9765822784810126, "grad_norm": 0.78515625, "learning_rate": 5.348032708070331e-07, "loss": 0.25153979659080505, "step": 4629, "token_acc": 0.9281045751633987 }, { "epoch": 0.9767932489451476, "grad_norm": 0.7265625, "learning_rate": 5.346345942888171e-07, "loss": 0.25580132007598877, "step": 4630, "token_acc": 0.9290820703460109 }, { "epoch": 0.9770042194092827, "grad_norm": 0.74609375, "learning_rate": 5.344659138098421e-07, "loss": 0.25385791063308716, "step": 4631, "token_acc": 0.9269823160296634 }, { "epoch": 0.9772151898734177, "grad_norm": 0.671875, "learning_rate": 5.34297229389398e-07, "loss": 0.24629075825214386, "step": 4632, "token_acc": 0.9296703296703297 }, { "epoch": 0.9774261603375527, "grad_norm": 0.6484375, "learning_rate": 5.341285410467755e-07, "loss": 0.22976957261562347, "step": 4633, "token_acc": 0.9416469194312796 }, { "epoch": 0.9776371308016878, "grad_norm": 0.80859375, "learning_rate": 5.339598488012655e-07, "loss": 0.2812723219394684, "step": 4634, "token_acc": 0.9229197080291971 }, { "epoch": 0.9778481012658228, "grad_norm": 0.80859375, "learning_rate": 5.337911526721594e-07, "loss": 0.27089154720306396, "step": 4635, "token_acc": 0.9259374031608305 }, { "epoch": 0.9780590717299578, "grad_norm": 0.8046875, "learning_rate": 5.336224526787489e-07, "loss": 0.23445311188697815, "step": 4636, "token_acc": 0.9351256575102279 }, { "epoch": 0.9782700421940929, "grad_norm": 0.703125, "learning_rate": 5.334537488403263e-07, "loss": 0.2216593474149704, "step": 4637, "token_acc": 0.9324906636024131 }, { "epoch": 0.9784810126582278, "grad_norm": 0.8203125, "learning_rate": 5.332850411761844e-07, "loss": 0.26228049397468567, "step": 4638, "token_acc": 0.9286335944299391 }, { "epoch": 0.9786919831223628, "grad_norm": 0.75, "learning_rate": 5.331163297056164e-07, "loss": 0.2424386441707611, "step": 4639, "token_acc": 0.9275239706711788 }, { "epoch": 0.9789029535864979, "grad_norm": 0.85546875, "learning_rate": 5.329476144479158e-07, "loss": 0.3035755753517151, "step": 4640, "token_acc": 0.917849592268197 }, { "epoch": 0.9791139240506329, "grad_norm": 0.86328125, "learning_rate": 5.327788954223766e-07, "loss": 0.27953219413757324, "step": 4641, "token_acc": 0.922 }, { "epoch": 0.9793248945147679, "grad_norm": 0.921875, "learning_rate": 5.326101726482933e-07, "loss": 0.29098471999168396, "step": 4642, "token_acc": 0.916010498687664 }, { "epoch": 0.979535864978903, "grad_norm": 0.71875, "learning_rate": 5.324414461449607e-07, "loss": 0.2325529158115387, "step": 4643, "token_acc": 0.9318313113807047 }, { "epoch": 0.979746835443038, "grad_norm": 0.95703125, "learning_rate": 5.322727159316741e-07, "loss": 0.27699586749076843, "step": 4644, "token_acc": 0.9175507729615034 }, { "epoch": 0.979957805907173, "grad_norm": 0.68359375, "learning_rate": 5.321039820277293e-07, "loss": 0.23185023665428162, "step": 4645, "token_acc": 0.9342027267338471 }, { "epoch": 0.9801687763713081, "grad_norm": 0.83984375, "learning_rate": 5.319352444524225e-07, "loss": 0.2563121020793915, "step": 4646, "token_acc": 0.9277957147050191 }, { "epoch": 0.980379746835443, "grad_norm": 0.66796875, "learning_rate": 5.317665032250503e-07, "loss": 0.25980260968208313, "step": 4647, "token_acc": 0.9292134831460674 }, { "epoch": 0.980590717299578, "grad_norm": 0.76953125, "learning_rate": 5.315977583649092e-07, "loss": 0.23811075091362, "step": 4648, "token_acc": 0.9303909952606635 }, { "epoch": 0.9808016877637131, "grad_norm": 0.73828125, "learning_rate": 5.314290098912975e-07, "loss": 0.3020535111427307, "step": 4649, "token_acc": 0.9248055315471045 }, { "epoch": 0.9810126582278481, "grad_norm": 1.0234375, "learning_rate": 5.312602578235122e-07, "loss": 0.25391751527786255, "step": 4650, "token_acc": 0.9314253329555114 }, { "epoch": 0.9812236286919831, "grad_norm": 0.671875, "learning_rate": 5.310915021808519e-07, "loss": 0.2546118199825287, "step": 4651, "token_acc": 0.9259259259259259 }, { "epoch": 0.9814345991561182, "grad_norm": 0.9609375, "learning_rate": 5.309227429826151e-07, "loss": 0.26613450050354004, "step": 4652, "token_acc": 0.9265099672716454 }, { "epoch": 0.9816455696202532, "grad_norm": 0.8359375, "learning_rate": 5.307539802481009e-07, "loss": 0.296033650636673, "step": 4653, "token_acc": 0.9174437739989029 }, { "epoch": 0.9818565400843882, "grad_norm": 0.9765625, "learning_rate": 5.305852139966089e-07, "loss": 0.22390195727348328, "step": 4654, "token_acc": 0.9363295880149812 }, { "epoch": 0.9820675105485233, "grad_norm": 0.6953125, "learning_rate": 5.304164442474388e-07, "loss": 0.21997621655464172, "step": 4655, "token_acc": 0.9397815081268319 }, { "epoch": 0.9822784810126582, "grad_norm": 0.75390625, "learning_rate": 5.302476710198906e-07, "loss": 0.30757322907447815, "step": 4656, "token_acc": 0.9196160767846431 }, { "epoch": 0.9824894514767932, "grad_norm": 0.73046875, "learning_rate": 5.300788943332654e-07, "loss": 0.25044792890548706, "step": 4657, "token_acc": 0.9268224036025894 }, { "epoch": 0.9827004219409282, "grad_norm": 0.91796875, "learning_rate": 5.299101142068641e-07, "loss": 0.24816876649856567, "step": 4658, "token_acc": 0.9311216429699842 }, { "epoch": 0.9829113924050633, "grad_norm": 0.6796875, "learning_rate": 5.297413306599882e-07, "loss": 0.23954495787620544, "step": 4659, "token_acc": 0.9354933726067747 }, { "epoch": 0.9831223628691983, "grad_norm": 0.61328125, "learning_rate": 5.29572543711939e-07, "loss": 0.259615421295166, "step": 4660, "token_acc": 0.9280177187153932 }, { "epoch": 0.9833333333333333, "grad_norm": 0.84375, "learning_rate": 5.294037533820195e-07, "loss": 0.30355608463287354, "step": 4661, "token_acc": 0.9164179104477612 }, { "epoch": 0.9835443037974684, "grad_norm": 0.9375, "learning_rate": 5.292349596895319e-07, "loss": 0.30018556118011475, "step": 4662, "token_acc": 0.9181728557344726 }, { "epoch": 0.9837552742616034, "grad_norm": 0.7734375, "learning_rate": 5.29066162653779e-07, "loss": 0.2780727446079254, "step": 4663, "token_acc": 0.9282153539381854 }, { "epoch": 0.9839662447257383, "grad_norm": 0.6875, "learning_rate": 5.288973622940644e-07, "loss": 0.27369242906570435, "step": 4664, "token_acc": 0.9240905184760814 }, { "epoch": 0.9841772151898734, "grad_norm": 0.93359375, "learning_rate": 5.28728558629692e-07, "loss": 0.23746763169765472, "step": 4665, "token_acc": 0.9390715667311412 }, { "epoch": 0.9843881856540084, "grad_norm": 0.78515625, "learning_rate": 5.285597516799658e-07, "loss": 0.2882470488548279, "step": 4666, "token_acc": 0.9234642497482377 }, { "epoch": 0.9845991561181434, "grad_norm": 0.80078125, "learning_rate": 5.283909414641901e-07, "loss": 0.265074759721756, "step": 4667, "token_acc": 0.9274541531823085 }, { "epoch": 0.9848101265822785, "grad_norm": 0.65625, "learning_rate": 5.282221280016699e-07, "loss": 0.2709978520870209, "step": 4668, "token_acc": 0.924572775486152 }, { "epoch": 0.9850210970464135, "grad_norm": 0.69921875, "learning_rate": 5.280533113117107e-07, "loss": 0.24648457765579224, "step": 4669, "token_acc": 0.9317875438123483 }, { "epoch": 0.9852320675105485, "grad_norm": 0.57421875, "learning_rate": 5.278844914136178e-07, "loss": 0.21280433237552643, "step": 4670, "token_acc": 0.9481525625744934 }, { "epoch": 0.9854430379746836, "grad_norm": 0.9453125, "learning_rate": 5.277156683266974e-07, "loss": 0.22829166054725647, "step": 4671, "token_acc": 0.9385290889132821 }, { "epoch": 0.9856540084388186, "grad_norm": 0.83203125, "learning_rate": 5.275468420702555e-07, "loss": 0.24300214648246765, "step": 4672, "token_acc": 0.93343653250774 }, { "epoch": 0.9858649789029535, "grad_norm": 0.703125, "learning_rate": 5.273780126635992e-07, "loss": 0.2903904318809509, "step": 4673, "token_acc": 0.9175104228707563 }, { "epoch": 0.9860759493670886, "grad_norm": 0.7890625, "learning_rate": 5.272091801260354e-07, "loss": 0.25252822041511536, "step": 4674, "token_acc": 0.9286831404441664 }, { "epoch": 0.9862869198312236, "grad_norm": 0.76953125, "learning_rate": 5.270403444768716e-07, "loss": 0.31226471066474915, "step": 4675, "token_acc": 0.9199277543648404 }, { "epoch": 0.9864978902953586, "grad_norm": 0.625, "learning_rate": 5.268715057354156e-07, "loss": 0.24067997932434082, "step": 4676, "token_acc": 0.9318181818181818 }, { "epoch": 0.9867088607594937, "grad_norm": 0.6640625, "learning_rate": 5.267026639209754e-07, "loss": 0.2319367229938507, "step": 4677, "token_acc": 0.9303155006858711 }, { "epoch": 0.9869198312236287, "grad_norm": 0.85546875, "learning_rate": 5.265338190528596e-07, "loss": 0.2492537945508957, "step": 4678, "token_acc": 0.9305126621371217 }, { "epoch": 0.9871308016877637, "grad_norm": 0.62109375, "learning_rate": 5.263649711503771e-07, "loss": 0.24360042810440063, "step": 4679, "token_acc": 0.9300451321727917 }, { "epoch": 0.9873417721518988, "grad_norm": 0.87109375, "learning_rate": 5.261961202328367e-07, "loss": 0.22903317213058472, "step": 4680, "token_acc": 0.9347948285553682 }, { "epoch": 0.9875527426160338, "grad_norm": 0.69140625, "learning_rate": 5.260272663195484e-07, "loss": 0.2523333728313446, "step": 4681, "token_acc": 0.9282643739924772 }, { "epoch": 0.9877637130801687, "grad_norm": 0.65234375, "learning_rate": 5.258584094298219e-07, "loss": 0.19284164905548096, "step": 4682, "token_acc": 0.9417282546901649 }, { "epoch": 0.9879746835443038, "grad_norm": 0.984375, "learning_rate": 5.256895495829676e-07, "loss": 0.32180845737457275, "step": 4683, "token_acc": 0.9157700584076237 }, { "epoch": 0.9881856540084388, "grad_norm": 0.87890625, "learning_rate": 5.255206867982956e-07, "loss": 0.23641401529312134, "step": 4684, "token_acc": 0.9427959620679107 }, { "epoch": 0.9883966244725738, "grad_norm": 0.7109375, "learning_rate": 5.253518210951172e-07, "loss": 0.2743535339832306, "step": 4685, "token_acc": 0.9265890778871978 }, { "epoch": 0.9886075949367089, "grad_norm": 0.9609375, "learning_rate": 5.251829524927435e-07, "loss": 0.276460736989975, "step": 4686, "token_acc": 0.9261028378758078 }, { "epoch": 0.9888185654008439, "grad_norm": 0.640625, "learning_rate": 5.250140810104859e-07, "loss": 0.2528797388076782, "step": 4687, "token_acc": 0.9278996865203761 }, { "epoch": 0.9890295358649789, "grad_norm": 0.69140625, "learning_rate": 5.248452066676565e-07, "loss": 0.25390708446502686, "step": 4688, "token_acc": 0.9322219185569828 }, { "epoch": 0.989240506329114, "grad_norm": 1.3125, "learning_rate": 5.246763294835674e-07, "loss": 0.24529936909675598, "step": 4689, "token_acc": 0.9306029579067122 }, { "epoch": 0.989451476793249, "grad_norm": 0.6328125, "learning_rate": 5.245074494775313e-07, "loss": 0.23570950329303741, "step": 4690, "token_acc": 0.9337152209492635 }, { "epoch": 0.989662447257384, "grad_norm": 0.8359375, "learning_rate": 5.243385666688607e-07, "loss": 0.25767192244529724, "step": 4691, "token_acc": 0.9274568847522584 }, { "epoch": 0.9898734177215189, "grad_norm": 0.69921875, "learning_rate": 5.241696810768691e-07, "loss": 0.2618895471096039, "step": 4692, "token_acc": 0.9273643640675956 }, { "epoch": 0.990084388185654, "grad_norm": 0.625, "learning_rate": 5.2400079272087e-07, "loss": 0.2239118218421936, "step": 4693, "token_acc": 0.9366151866151866 }, { "epoch": 0.990295358649789, "grad_norm": 0.83984375, "learning_rate": 5.23831901620177e-07, "loss": 0.22031012177467346, "step": 4694, "token_acc": 0.9342389626427909 }, { "epoch": 0.990506329113924, "grad_norm": 0.90234375, "learning_rate": 5.236630077941044e-07, "loss": 0.3069133758544922, "step": 4695, "token_acc": 0.9154353976736875 }, { "epoch": 0.9907172995780591, "grad_norm": 0.69921875, "learning_rate": 5.234941112619665e-07, "loss": 0.25205758213996887, "step": 4696, "token_acc": 0.929007388371346 }, { "epoch": 0.9909282700421941, "grad_norm": 0.734375, "learning_rate": 5.233252120430782e-07, "loss": 0.2305661290884018, "step": 4697, "token_acc": 0.9344216155361666 }, { "epoch": 0.9911392405063291, "grad_norm": 0.65234375, "learning_rate": 5.231563101567545e-07, "loss": 0.23033729195594788, "step": 4698, "token_acc": 0.9333703498056635 }, { "epoch": 0.9913502109704642, "grad_norm": 0.76953125, "learning_rate": 5.229874056223107e-07, "loss": 0.26873764395713806, "step": 4699, "token_acc": 0.9290598290598291 }, { "epoch": 0.9915611814345991, "grad_norm": 1.2109375, "learning_rate": 5.228184984590625e-07, "loss": 0.2356519103050232, "step": 4700, "token_acc": 0.9286737324548839 }, { "epoch": 0.9917721518987341, "grad_norm": 0.6796875, "learning_rate": 5.226495886863258e-07, "loss": 0.24657103419303894, "step": 4701, "token_acc": 0.9283738087918845 }, { "epoch": 0.9919831223628692, "grad_norm": 0.61328125, "learning_rate": 5.224806763234169e-07, "loss": 0.2596723437309265, "step": 4702, "token_acc": 0.9290193842645382 }, { "epoch": 0.9921940928270042, "grad_norm": 0.78515625, "learning_rate": 5.223117613896525e-07, "loss": 0.2537384331226349, "step": 4703, "token_acc": 0.925214899713467 }, { "epoch": 0.9924050632911392, "grad_norm": 0.73828125, "learning_rate": 5.221428439043494e-07, "loss": 0.231331467628479, "step": 4704, "token_acc": 0.9369186046511628 }, { "epoch": 0.9926160337552743, "grad_norm": 0.703125, "learning_rate": 5.219739238868246e-07, "loss": 0.2424582540988922, "step": 4705, "token_acc": 0.9350845410628019 }, { "epoch": 0.9928270042194093, "grad_norm": 0.8515625, "learning_rate": 5.218050013563956e-07, "loss": 0.2771230638027191, "step": 4706, "token_acc": 0.9207424867413082 }, { "epoch": 0.9930379746835443, "grad_norm": 0.65625, "learning_rate": 5.216360763323802e-07, "loss": 0.27592095732688904, "step": 4707, "token_acc": 0.9233750745378653 }, { "epoch": 0.9932489451476794, "grad_norm": 0.59765625, "learning_rate": 5.214671488340963e-07, "loss": 0.21883830428123474, "step": 4708, "token_acc": 0.9429811866859624 }, { "epoch": 0.9934599156118143, "grad_norm": 0.73046875, "learning_rate": 5.212982188808623e-07, "loss": 0.23002049326896667, "step": 4709, "token_acc": 0.9321648815319702 }, { "epoch": 0.9936708860759493, "grad_norm": 0.67578125, "learning_rate": 5.211292864919966e-07, "loss": 0.25469112396240234, "step": 4710, "token_acc": 0.9321614940950288 }, { "epoch": 0.9938818565400844, "grad_norm": 0.76953125, "learning_rate": 5.209603516868181e-07, "loss": 0.2746848464012146, "step": 4711, "token_acc": 0.9276582475834113 }, { "epoch": 0.9940928270042194, "grad_norm": 0.734375, "learning_rate": 5.20791414484646e-07, "loss": 0.25624868273735046, "step": 4712, "token_acc": 0.9221228728006923 }, { "epoch": 0.9943037974683544, "grad_norm": 0.9296875, "learning_rate": 5.206224749047999e-07, "loss": 0.26016664505004883, "step": 4713, "token_acc": 0.926605504587156 }, { "epoch": 0.9945147679324895, "grad_norm": 0.5703125, "learning_rate": 5.204535329665991e-07, "loss": 0.24939924478530884, "step": 4714, "token_acc": 0.9301005932421976 }, { "epoch": 0.9947257383966245, "grad_norm": 0.69140625, "learning_rate": 5.202845886893636e-07, "loss": 0.22621600329875946, "step": 4715, "token_acc": 0.9335302806499262 }, { "epoch": 0.9949367088607595, "grad_norm": 0.71484375, "learning_rate": 5.201156420924137e-07, "loss": 0.2911354899406433, "step": 4716, "token_acc": 0.9231426131511529 }, { "epoch": 0.9951476793248946, "grad_norm": 0.7109375, "learning_rate": 5.199466931950702e-07, "loss": 0.22638025879859924, "step": 4717, "token_acc": 0.9332572732458643 }, { "epoch": 0.9953586497890295, "grad_norm": 0.71875, "learning_rate": 5.197777420166531e-07, "loss": 0.241557776927948, "step": 4718, "token_acc": 0.9250203748981255 }, { "epoch": 0.9955696202531645, "grad_norm": 1.0703125, "learning_rate": 5.196087885764839e-07, "loss": 0.2560037672519684, "step": 4719, "token_acc": 0.9277743335399876 }, { "epoch": 0.9957805907172996, "grad_norm": 0.6328125, "learning_rate": 5.19439832893884e-07, "loss": 0.25417983531951904, "step": 4720, "token_acc": 0.9296969696969697 }, { "epoch": 0.9959915611814346, "grad_norm": 0.6875, "learning_rate": 5.192708749881743e-07, "loss": 0.25039994716644287, "step": 4721, "token_acc": 0.9343451006268558 }, { "epoch": 0.9962025316455696, "grad_norm": 0.70703125, "learning_rate": 5.191019148786772e-07, "loss": 0.2661602795124054, "step": 4722, "token_acc": 0.9295193158360365 }, { "epoch": 0.9964135021097047, "grad_norm": 0.78125, "learning_rate": 5.189329525847144e-07, "loss": 0.26626619696617126, "step": 4723, "token_acc": 0.9226227470478559 }, { "epoch": 0.9966244725738397, "grad_norm": 0.7734375, "learning_rate": 5.187639881256081e-07, "loss": 0.24744093418121338, "step": 4724, "token_acc": 0.9270553064275038 }, { "epoch": 0.9968354430379747, "grad_norm": 0.59375, "learning_rate": 5.185950215206811e-07, "loss": 0.22012066841125488, "step": 4725, "token_acc": 0.9336946126872808 }, { "epoch": 0.9970464135021097, "grad_norm": 0.62890625, "learning_rate": 5.18426052789256e-07, "loss": 0.22872236371040344, "step": 4726, "token_acc": 0.9367899796096708 }, { "epoch": 0.9972573839662447, "grad_norm": 0.78125, "learning_rate": 5.182570819506557e-07, "loss": 0.26475057005882263, "step": 4727, "token_acc": 0.9250895070228587 }, { "epoch": 0.9974683544303797, "grad_norm": 0.71875, "learning_rate": 5.180881090242037e-07, "loss": 0.22150731086730957, "step": 4728, "token_acc": 0.9406528189910979 }, { "epoch": 0.9976793248945147, "grad_norm": 0.73828125, "learning_rate": 5.179191340292232e-07, "loss": 0.2806920111179352, "step": 4729, "token_acc": 0.9264264264264265 }, { "epoch": 0.9978902953586498, "grad_norm": 0.6796875, "learning_rate": 5.177501569850382e-07, "loss": 0.2379036694765091, "step": 4730, "token_acc": 0.928955223880597 }, { "epoch": 0.9981012658227848, "grad_norm": 0.69140625, "learning_rate": 5.175811779109722e-07, "loss": 0.23797214031219482, "step": 4731, "token_acc": 0.9326801517067004 }, { "epoch": 0.9983122362869198, "grad_norm": 0.5546875, "learning_rate": 5.174121968263501e-07, "loss": 0.2289476841688156, "step": 4732, "token_acc": 0.9340966210555711 }, { "epoch": 0.9985232067510549, "grad_norm": 0.5703125, "learning_rate": 5.172432137504956e-07, "loss": 0.219358429312706, "step": 4733, "token_acc": 0.9354317998385795 }, { "epoch": 0.9987341772151899, "grad_norm": 0.734375, "learning_rate": 5.17074228702734e-07, "loss": 0.22490793466567993, "step": 4734, "token_acc": 0.9360957105339948 }, { "epoch": 0.9989451476793249, "grad_norm": 0.66015625, "learning_rate": 5.169052417023895e-07, "loss": 0.2422950565814972, "step": 4735, "token_acc": 0.9312080536912751 }, { "epoch": 0.99915611814346, "grad_norm": 1.8203125, "learning_rate": 5.167362527687876e-07, "loss": 0.2492435723543167, "step": 4736, "token_acc": 0.9221009147241074 }, { "epoch": 0.9993670886075949, "grad_norm": 0.82421875, "learning_rate": 5.165672619212537e-07, "loss": 0.28103193640708923, "step": 4737, "token_acc": 0.9232181095136127 }, { "epoch": 0.9995780590717299, "grad_norm": 0.6875, "learning_rate": 5.16398269179113e-07, "loss": 0.2497856169939041, "step": 4738, "token_acc": 0.9288942452545947 }, { "epoch": 0.999789029535865, "grad_norm": 0.73046875, "learning_rate": 5.162292745616915e-07, "loss": 0.27417346835136414, "step": 4739, "token_acc": 0.9252818035426731 }, { "epoch": 1.0, "grad_norm": 0.8359375, "learning_rate": 5.160602780883151e-07, "loss": 0.2338496446609497, "step": 4740, "token_acc": 0.9312567132116004 }, { "epoch": 1.000210970464135, "grad_norm": 0.7265625, "learning_rate": 5.158912797783099e-07, "loss": 0.2863706946372986, "step": 4741, "token_acc": 0.9129445234708392 }, { "epoch": 1.00042194092827, "grad_norm": 0.94140625, "learning_rate": 5.157222796510025e-07, "loss": 0.2738494277000427, "step": 4742, "token_acc": 0.9290012033694344 }, { "epoch": 1.000632911392405, "grad_norm": 0.7265625, "learning_rate": 5.155532777257191e-07, "loss": 0.2663953900337219, "step": 4743, "token_acc": 0.9315365551425031 }, { "epoch": 1.0008438818565402, "grad_norm": 0.7578125, "learning_rate": 5.153842740217871e-07, "loss": 0.26782479882240295, "step": 4744, "token_acc": 0.9249448123620309 }, { "epoch": 1.001054852320675, "grad_norm": 0.5703125, "learning_rate": 5.15215268558533e-07, "loss": 0.2473321557044983, "step": 4745, "token_acc": 0.9348909657320872 }, { "epoch": 1.0012658227848101, "grad_norm": 0.765625, "learning_rate": 5.150462613552841e-07, "loss": 0.2705077528953552, "step": 4746, "token_acc": 0.9292929292929293 }, { "epoch": 1.0014767932489452, "grad_norm": 0.7109375, "learning_rate": 5.148772524313678e-07, "loss": 0.28794240951538086, "step": 4747, "token_acc": 0.9213622291021671 }, { "epoch": 1.00168776371308, "grad_norm": 0.66015625, "learning_rate": 5.147082418061118e-07, "loss": 0.2849356532096863, "step": 4748, "token_acc": 0.9212248102590945 }, { "epoch": 1.0018987341772152, "grad_norm": 0.73046875, "learning_rate": 5.14539229498844e-07, "loss": 0.2767043113708496, "step": 4749, "token_acc": 0.9245785270629991 }, { "epoch": 1.0021097046413503, "grad_norm": 0.71484375, "learning_rate": 5.143702155288922e-07, "loss": 0.24759012460708618, "step": 4750, "token_acc": 0.9343552149283573 }, { "epoch": 1.0023206751054852, "grad_norm": 0.7265625, "learning_rate": 5.142011999155844e-07, "loss": 0.25576671957969666, "step": 4751, "token_acc": 0.9350989522700814 }, { "epoch": 1.0025316455696203, "grad_norm": 0.70703125, "learning_rate": 5.140321826782495e-07, "loss": 0.270576536655426, "step": 4752, "token_acc": 0.9254971801721579 }, { "epoch": 1.0027426160337554, "grad_norm": 0.66796875, "learning_rate": 5.138631638362156e-07, "loss": 0.25256842374801636, "step": 4753, "token_acc": 0.932258064516129 }, { "epoch": 1.0029535864978902, "grad_norm": 0.6171875, "learning_rate": 5.136941434088115e-07, "loss": 0.2430078089237213, "step": 4754, "token_acc": 0.9310538116591929 }, { "epoch": 1.0031645569620253, "grad_norm": 0.65625, "learning_rate": 5.135251214153663e-07, "loss": 0.2620999813079834, "step": 4755, "token_acc": 0.9287790697674418 }, { "epoch": 1.0033755274261604, "grad_norm": 1.4609375, "learning_rate": 5.133560978752091e-07, "loss": 0.2626093626022339, "step": 4756, "token_acc": 0.9231661351116266 }, { "epoch": 1.0035864978902953, "grad_norm": 0.63671875, "learning_rate": 5.131870728076689e-07, "loss": 0.24292053282260895, "step": 4757, "token_acc": 0.9319769207409657 }, { "epoch": 1.0037974683544304, "grad_norm": 0.75, "learning_rate": 5.130180462320753e-07, "loss": 0.2710002064704895, "step": 4758, "token_acc": 0.9299401197604791 }, { "epoch": 1.0040084388185655, "grad_norm": 0.6015625, "learning_rate": 5.128490181677581e-07, "loss": 0.20281577110290527, "step": 4759, "token_acc": 0.9375 }, { "epoch": 1.0042194092827004, "grad_norm": 1.84375, "learning_rate": 5.126799886340467e-07, "loss": 0.2934504747390747, "step": 4760, "token_acc": 0.9197941888619855 }, { "epoch": 1.0044303797468355, "grad_norm": 0.8359375, "learning_rate": 5.125109576502715e-07, "loss": 0.2933291792869568, "step": 4761, "token_acc": 0.9189016602809706 }, { "epoch": 1.0046413502109706, "grad_norm": 0.6171875, "learning_rate": 5.123419252357623e-07, "loss": 0.27335286140441895, "step": 4762, "token_acc": 0.9271153309131528 }, { "epoch": 1.0048523206751054, "grad_norm": 0.74609375, "learning_rate": 5.121728914098494e-07, "loss": 0.31626754999160767, "step": 4763, "token_acc": 0.9106174115215706 }, { "epoch": 1.0050632911392405, "grad_norm": 0.671875, "learning_rate": 5.120038561918634e-07, "loss": 0.2398698776960373, "step": 4764, "token_acc": 0.9351483634138881 }, { "epoch": 1.0052742616033756, "grad_norm": 1.03125, "learning_rate": 5.118348196011348e-07, "loss": 0.2646433711051941, "step": 4765, "token_acc": 0.9287387085981934 }, { "epoch": 1.0054852320675105, "grad_norm": 0.82421875, "learning_rate": 5.116657816569945e-07, "loss": 0.2771931290626526, "step": 4766, "token_acc": 0.9263392857142857 }, { "epoch": 1.0056962025316456, "grad_norm": 0.76171875, "learning_rate": 5.114967423787733e-07, "loss": 0.25466179847717285, "step": 4767, "token_acc": 0.9310030395136778 }, { "epoch": 1.0059071729957807, "grad_norm": 0.63671875, "learning_rate": 5.113277017858022e-07, "loss": 0.23403432965278625, "step": 4768, "token_acc": 0.931454005934718 }, { "epoch": 1.0061181434599156, "grad_norm": 0.61328125, "learning_rate": 5.111586598974127e-07, "loss": 0.28173840045928955, "step": 4769, "token_acc": 0.9254237288135593 }, { "epoch": 1.0063291139240507, "grad_norm": 1.203125, "learning_rate": 5.109896167329359e-07, "loss": 0.2407315969467163, "step": 4770, "token_acc": 0.9319242382651661 }, { "epoch": 1.0065400843881858, "grad_norm": 0.7421875, "learning_rate": 5.108205723117034e-07, "loss": 0.26796621084213257, "step": 4771, "token_acc": 0.9234378858977526 }, { "epoch": 1.0067510548523206, "grad_norm": 1.0234375, "learning_rate": 5.106515266530469e-07, "loss": 0.23578926920890808, "step": 4772, "token_acc": 0.9314121037463977 }, { "epoch": 1.0069620253164557, "grad_norm": 0.62109375, "learning_rate": 5.104824797762982e-07, "loss": 0.2561977803707123, "step": 4773, "token_acc": 0.9265745007680491 }, { "epoch": 1.0071729957805906, "grad_norm": 0.75, "learning_rate": 5.103134317007891e-07, "loss": 0.2647457718849182, "step": 4774, "token_acc": 0.9287573194534808 }, { "epoch": 1.0073839662447257, "grad_norm": 0.66015625, "learning_rate": 5.101443824458519e-07, "loss": 0.22622770071029663, "step": 4775, "token_acc": 0.9344874405974203 }, { "epoch": 1.0075949367088608, "grad_norm": 0.85546875, "learning_rate": 5.099753320308187e-07, "loss": 0.299474835395813, "step": 4776, "token_acc": 0.9242524916943522 }, { "epoch": 1.0078059071729957, "grad_norm": 0.68359375, "learning_rate": 5.098062804750218e-07, "loss": 0.2501489520072937, "step": 4777, "token_acc": 0.9298300818124606 }, { "epoch": 1.0080168776371308, "grad_norm": 0.65234375, "learning_rate": 5.096372277977939e-07, "loss": 0.2502216696739197, "step": 4778, "token_acc": 0.9341500765696784 }, { "epoch": 1.0082278481012659, "grad_norm": 0.71875, "learning_rate": 5.094681740184672e-07, "loss": 0.26816707849502563, "step": 4779, "token_acc": 0.922943722943723 }, { "epoch": 1.0084388185654007, "grad_norm": 2.34375, "learning_rate": 5.092991191563747e-07, "loss": 0.24676986038684845, "step": 4780, "token_acc": 0.9322338830584708 }, { "epoch": 1.0086497890295358, "grad_norm": 0.5703125, "learning_rate": 5.091300632308494e-07, "loss": 0.19584371149539948, "step": 4781, "token_acc": 0.9408117249154453 }, { "epoch": 1.008860759493671, "grad_norm": 0.91796875, "learning_rate": 5.089610062612238e-07, "loss": 0.2671702802181244, "step": 4782, "token_acc": 0.9283651068746389 }, { "epoch": 1.0090717299578058, "grad_norm": 0.84765625, "learning_rate": 5.087919482668316e-07, "loss": 0.30225634574890137, "step": 4783, "token_acc": 0.9183168316831684 }, { "epoch": 1.009282700421941, "grad_norm": 0.61328125, "learning_rate": 5.086228892670054e-07, "loss": 0.19541838765144348, "step": 4784, "token_acc": 0.9399445642131198 }, { "epoch": 1.009493670886076, "grad_norm": 0.69140625, "learning_rate": 5.084538292810791e-07, "loss": 0.2763522267341614, "step": 4785, "token_acc": 0.927277953434895 }, { "epoch": 1.0097046413502109, "grad_norm": 0.7109375, "learning_rate": 5.082847683283857e-07, "loss": 0.279130756855011, "step": 4786, "token_acc": 0.9224232456140351 }, { "epoch": 1.009915611814346, "grad_norm": 0.70703125, "learning_rate": 5.08115706428259e-07, "loss": 0.2520114779472351, "step": 4787, "token_acc": 0.929607250755287 }, { "epoch": 1.010126582278481, "grad_norm": 0.828125, "learning_rate": 5.079466436000326e-07, "loss": 0.28318116068840027, "step": 4788, "token_acc": 0.9218653489808524 }, { "epoch": 1.010337552742616, "grad_norm": 0.70703125, "learning_rate": 5.077775798630402e-07, "loss": 0.25128889083862305, "step": 4789, "token_acc": 0.9336594313665546 }, { "epoch": 1.010548523206751, "grad_norm": 2.515625, "learning_rate": 5.076085152366157e-07, "loss": 0.21418628096580505, "step": 4790, "token_acc": 0.9374298540965208 }, { "epoch": 1.0107594936708861, "grad_norm": 0.72265625, "learning_rate": 5.07439449740093e-07, "loss": 0.23462989926338196, "step": 4791, "token_acc": 0.9294478527607362 }, { "epoch": 1.010970464135021, "grad_norm": 0.6484375, "learning_rate": 5.072703833928064e-07, "loss": 0.24937224388122559, "step": 4792, "token_acc": 0.929481733220051 }, { "epoch": 1.011181434599156, "grad_norm": 0.62109375, "learning_rate": 5.071013162140899e-07, "loss": 0.23954711854457855, "step": 4793, "token_acc": 0.929471032745592 }, { "epoch": 1.0113924050632912, "grad_norm": 0.59375, "learning_rate": 5.069322482232776e-07, "loss": 0.22992610931396484, "step": 4794, "token_acc": 0.9261430246189918 }, { "epoch": 1.011603375527426, "grad_norm": 0.703125, "learning_rate": 5.067631794397043e-07, "loss": 0.23618942499160767, "step": 4795, "token_acc": 0.9359954687057491 }, { "epoch": 1.0118143459915612, "grad_norm": 0.70703125, "learning_rate": 5.065941098827041e-07, "loss": 0.2706661820411682, "step": 4796, "token_acc": 0.9288598943771358 }, { "epoch": 1.0120253164556963, "grad_norm": 0.69921875, "learning_rate": 5.064250395716116e-07, "loss": 0.30924320220947266, "step": 4797, "token_acc": 0.9193373166757197 }, { "epoch": 1.0122362869198311, "grad_norm": 0.6640625, "learning_rate": 5.062559685257615e-07, "loss": 0.28242161870002747, "step": 4798, "token_acc": 0.9254057428214731 }, { "epoch": 1.0124472573839662, "grad_norm": 0.75, "learning_rate": 5.060868967644883e-07, "loss": 0.25653260946273804, "step": 4799, "token_acc": 0.9295813695591905 }, { "epoch": 1.0126582278481013, "grad_norm": 0.734375, "learning_rate": 5.059178243071271e-07, "loss": 0.2962134778499603, "step": 4800, "token_acc": 0.9206744057490326 }, { "epoch": 1.0126582278481013, "eval_loss": 0.43365511298179626, "eval_runtime": 245.7632, "eval_samples_per_second": 137.144, "eval_steps_per_second": 2.144, "eval_token_acc": 0.899104819168985, "step": 4800 }, { "epoch": 1.0128691983122362, "grad_norm": 0.65234375, "learning_rate": 5.057487511730126e-07, "loss": 0.20759758353233337, "step": 4801, "token_acc": 0.9414561664190193 }, { "epoch": 1.0130801687763713, "grad_norm": 0.59765625, "learning_rate": 5.055796773814795e-07, "loss": 0.2252160906791687, "step": 4802, "token_acc": 0.9291146116241173 }, { "epoch": 1.0132911392405064, "grad_norm": 0.859375, "learning_rate": 5.054106029518634e-07, "loss": 0.28102508187294006, "step": 4803, "token_acc": 0.9206489675516224 }, { "epoch": 1.0135021097046413, "grad_norm": 0.66015625, "learning_rate": 5.052415279034989e-07, "loss": 0.290669322013855, "step": 4804, "token_acc": 0.9238142049643647 }, { "epoch": 1.0137130801687764, "grad_norm": 0.796875, "learning_rate": 5.050724522557213e-07, "loss": 0.2530815601348877, "step": 4805, "token_acc": 0.9296407185628742 }, { "epoch": 1.0139240506329115, "grad_norm": 1.09375, "learning_rate": 5.049033760278659e-07, "loss": 0.2708277106285095, "step": 4806, "token_acc": 0.9236749116607774 }, { "epoch": 1.0141350210970463, "grad_norm": 0.8125, "learning_rate": 5.047342992392679e-07, "loss": 0.26884305477142334, "step": 4807, "token_acc": 0.9256472004816376 }, { "epoch": 1.0143459915611814, "grad_norm": 0.6796875, "learning_rate": 5.045652219092629e-07, "loss": 0.26602625846862793, "step": 4808, "token_acc": 0.9235880398671097 }, { "epoch": 1.0145569620253165, "grad_norm": 0.921875, "learning_rate": 5.043961440571859e-07, "loss": 0.2878214716911316, "step": 4809, "token_acc": 0.9197589597209007 }, { "epoch": 1.0147679324894514, "grad_norm": 0.85546875, "learning_rate": 5.042270657023727e-07, "loss": 0.31388962268829346, "step": 4810, "token_acc": 0.9220860823122841 }, { "epoch": 1.0149789029535865, "grad_norm": 0.72265625, "learning_rate": 5.040579868641587e-07, "loss": 0.25973695516586304, "step": 4811, "token_acc": 0.9335413416536662 }, { "epoch": 1.0151898734177216, "grad_norm": 2.03125, "learning_rate": 5.038889075618798e-07, "loss": 0.2525535225868225, "step": 4812, "token_acc": 0.9268922018348624 }, { "epoch": 1.0154008438818565, "grad_norm": 0.65625, "learning_rate": 5.037198278148711e-07, "loss": 0.24042895436286926, "step": 4813, "token_acc": 0.9336099585062241 }, { "epoch": 1.0156118143459916, "grad_norm": 0.6015625, "learning_rate": 5.035507476424687e-07, "loss": 0.20906437933444977, "step": 4814, "token_acc": 0.9405316525075363 }, { "epoch": 1.0158227848101267, "grad_norm": 0.83203125, "learning_rate": 5.033816670640083e-07, "loss": 0.2556505501270294, "step": 4815, "token_acc": 0.9302256532066508 }, { "epoch": 1.0160337552742615, "grad_norm": 0.64453125, "learning_rate": 5.032125860988256e-07, "loss": 0.26424485445022583, "step": 4816, "token_acc": 0.9297591743119266 }, { "epoch": 1.0162447257383966, "grad_norm": 0.7421875, "learning_rate": 5.030435047662564e-07, "loss": 0.29551416635513306, "step": 4817, "token_acc": 0.9244309559939302 }, { "epoch": 1.0164556962025317, "grad_norm": 0.796875, "learning_rate": 5.028744230856369e-07, "loss": 0.2839418947696686, "step": 4818, "token_acc": 0.9171012901454845 }, { "epoch": 1.0166666666666666, "grad_norm": 0.59375, "learning_rate": 5.027053410763026e-07, "loss": 0.23133526742458344, "step": 4819, "token_acc": 0.9350382128159906 }, { "epoch": 1.0168776371308017, "grad_norm": 0.70703125, "learning_rate": 5.025362587575898e-07, "loss": 0.28290921449661255, "step": 4820, "token_acc": 0.9201555023923444 }, { "epoch": 1.0170886075949368, "grad_norm": 0.75, "learning_rate": 5.023671761488343e-07, "loss": 0.29210013151168823, "step": 4821, "token_acc": 0.9185953711093376 }, { "epoch": 1.0172995780590717, "grad_norm": 0.66796875, "learning_rate": 5.021980932693721e-07, "loss": 0.2572844326496124, "step": 4822, "token_acc": 0.927170868347339 }, { "epoch": 1.0175105485232068, "grad_norm": 0.81640625, "learning_rate": 5.020290101385393e-07, "loss": 0.24976743757724762, "step": 4823, "token_acc": 0.9300989466964571 }, { "epoch": 1.0177215189873419, "grad_norm": 0.8125, "learning_rate": 5.018599267756721e-07, "loss": 0.2903585433959961, "step": 4824, "token_acc": 0.923697270471464 }, { "epoch": 1.0179324894514767, "grad_norm": 0.73046875, "learning_rate": 5.016908432001067e-07, "loss": 0.2611672282218933, "step": 4825, "token_acc": 0.9217787302437718 }, { "epoch": 1.0181434599156118, "grad_norm": 0.7265625, "learning_rate": 5.015217594311787e-07, "loss": 0.22997401654720306, "step": 4826, "token_acc": 0.9316440777843252 }, { "epoch": 1.018354430379747, "grad_norm": 0.73828125, "learning_rate": 5.013526754882249e-07, "loss": 0.29360315203666687, "step": 4827, "token_acc": 0.9201038361695991 }, { "epoch": 1.0185654008438818, "grad_norm": 0.63671875, "learning_rate": 5.011835913905811e-07, "loss": 0.23319895565509796, "step": 4828, "token_acc": 0.9394631639063392 }, { "epoch": 1.018776371308017, "grad_norm": 0.69921875, "learning_rate": 5.010145071575837e-07, "loss": 0.26078665256500244, "step": 4829, "token_acc": 0.925756186984418 }, { "epoch": 1.018987341772152, "grad_norm": 0.71875, "learning_rate": 5.008454228085687e-07, "loss": 0.2757515013217926, "step": 4830, "token_acc": 0.9190503432494279 }, { "epoch": 1.0191983122362869, "grad_norm": 0.78125, "learning_rate": 5.006763383628725e-07, "loss": 0.23120394349098206, "step": 4831, "token_acc": 0.9337264813254704 }, { "epoch": 1.019409282700422, "grad_norm": 0.69921875, "learning_rate": 5.005072538398313e-07, "loss": 0.25059765577316284, "step": 4832, "token_acc": 0.9355227478156071 }, { "epoch": 1.019620253164557, "grad_norm": 1.015625, "learning_rate": 5.003381692587813e-07, "loss": 0.2526248097419739, "step": 4833, "token_acc": 0.9237079573420837 }, { "epoch": 1.019831223628692, "grad_norm": 0.82421875, "learning_rate": 5.001690846390588e-07, "loss": 0.3037266135215759, "step": 4834, "token_acc": 0.9143468950749465 }, { "epoch": 1.020042194092827, "grad_norm": 0.84765625, "learning_rate": 5e-07, "loss": 0.2642693519592285, "step": 4835, "token_acc": 0.9287163141118516 }, { "epoch": 1.0202531645569621, "grad_norm": 0.80859375, "learning_rate": 4.998309153609413e-07, "loss": 0.26739194989204407, "step": 4836, "token_acc": 0.9268221574344023 }, { "epoch": 1.020464135021097, "grad_norm": 0.99609375, "learning_rate": 4.996618307412187e-07, "loss": 0.3068203628063202, "step": 4837, "token_acc": 0.9157372986369269 }, { "epoch": 1.020675105485232, "grad_norm": 0.7265625, "learning_rate": 4.994927461601688e-07, "loss": 0.23695316910743713, "step": 4838, "token_acc": 0.9346425283829396 }, { "epoch": 1.0208860759493672, "grad_norm": 0.9765625, "learning_rate": 4.993236616371276e-07, "loss": 0.24250730872154236, "step": 4839, "token_acc": 0.9265417170495768 }, { "epoch": 1.021097046413502, "grad_norm": 0.64453125, "learning_rate": 4.991545771914314e-07, "loss": 0.2234223484992981, "step": 4840, "token_acc": 0.9352096159484022 }, { "epoch": 1.0213080168776372, "grad_norm": 0.83203125, "learning_rate": 4.989854928424164e-07, "loss": 0.2552555203437805, "step": 4841, "token_acc": 0.9301977231875375 }, { "epoch": 1.021518987341772, "grad_norm": 0.88671875, "learning_rate": 4.988164086094188e-07, "loss": 0.22352609038352966, "step": 4842, "token_acc": 0.9374410562716127 }, { "epoch": 1.0217299578059071, "grad_norm": 0.7734375, "learning_rate": 4.986473245117752e-07, "loss": 0.25879034399986267, "step": 4843, "token_acc": 0.9307138572285543 }, { "epoch": 1.0219409282700422, "grad_norm": 0.7109375, "learning_rate": 4.984782405688213e-07, "loss": 0.3241720199584961, "step": 4844, "token_acc": 0.9069970845481049 }, { "epoch": 1.0221518987341771, "grad_norm": 1.125, "learning_rate": 4.983091567998934e-07, "loss": 0.27527812123298645, "step": 4845, "token_acc": 0.9258480936655659 }, { "epoch": 1.0223628691983122, "grad_norm": 0.67578125, "learning_rate": 4.981400732243278e-07, "loss": 0.2654706835746765, "step": 4846, "token_acc": 0.9247311827956989 }, { "epoch": 1.0225738396624473, "grad_norm": 0.796875, "learning_rate": 4.979709898614606e-07, "loss": 0.29850804805755615, "step": 4847, "token_acc": 0.9207035755478662 }, { "epoch": 1.0227848101265822, "grad_norm": 0.78125, "learning_rate": 4.97801906730628e-07, "loss": 0.32967105507850647, "step": 4848, "token_acc": 0.9190974133186571 }, { "epoch": 1.0229957805907173, "grad_norm": 0.67578125, "learning_rate": 4.976328238511657e-07, "loss": 0.2810131907463074, "step": 4849, "token_acc": 0.9230061349693252 }, { "epoch": 1.0232067510548524, "grad_norm": 0.95703125, "learning_rate": 4.974637412424102e-07, "loss": 0.23897457122802734, "step": 4850, "token_acc": 0.9324894514767933 }, { "epoch": 1.0234177215189872, "grad_norm": 0.73828125, "learning_rate": 4.972946589236974e-07, "loss": 0.29492950439453125, "step": 4851, "token_acc": 0.9221411192214112 }, { "epoch": 1.0236286919831223, "grad_norm": 0.765625, "learning_rate": 4.971255769143631e-07, "loss": 0.27283984422683716, "step": 4852, "token_acc": 0.923429495820976 }, { "epoch": 1.0238396624472574, "grad_norm": 0.9921875, "learning_rate": 4.969564952337435e-07, "loss": 0.2584148943424225, "step": 4853, "token_acc": 0.9336691855583543 }, { "epoch": 1.0240506329113923, "grad_norm": 0.734375, "learning_rate": 4.967874139011745e-07, "loss": 0.279785692691803, "step": 4854, "token_acc": 0.9247985675917636 }, { "epoch": 1.0242616033755274, "grad_norm": 0.80078125, "learning_rate": 4.966183329359918e-07, "loss": 0.26148852705955505, "step": 4855, "token_acc": 0.9263125575683144 }, { "epoch": 1.0244725738396625, "grad_norm": 0.65234375, "learning_rate": 4.964492523575313e-07, "loss": 0.22005316615104675, "step": 4856, "token_acc": 0.9392451672292114 }, { "epoch": 1.0246835443037974, "grad_norm": 0.64453125, "learning_rate": 4.96280172185129e-07, "loss": 0.24725309014320374, "step": 4857, "token_acc": 0.9320056899004268 }, { "epoch": 1.0248945147679325, "grad_norm": 0.75390625, "learning_rate": 4.961110924381205e-07, "loss": 0.312958300113678, "step": 4858, "token_acc": 0.9175704989154013 }, { "epoch": 1.0251054852320676, "grad_norm": 0.73828125, "learning_rate": 4.959420131358413e-07, "loss": 0.2291390299797058, "step": 4859, "token_acc": 0.9296092993219245 }, { "epoch": 1.0253164556962024, "grad_norm": 0.7421875, "learning_rate": 4.957729342976273e-07, "loss": 0.27491700649261475, "step": 4860, "token_acc": 0.9267015706806283 }, { "epoch": 1.0255274261603375, "grad_norm": 0.796875, "learning_rate": 4.956038559428141e-07, "loss": 0.26580125093460083, "step": 4861, "token_acc": 0.9314503332275468 }, { "epoch": 1.0257383966244726, "grad_norm": 0.63671875, "learning_rate": 4.954347780907374e-07, "loss": 0.24776138365268707, "step": 4862, "token_acc": 0.935513169845595 }, { "epoch": 1.0259493670886075, "grad_norm": 0.8515625, "learning_rate": 4.952657007607321e-07, "loss": 0.2973008155822754, "step": 4863, "token_acc": 0.9198339188757585 }, { "epoch": 1.0261603375527426, "grad_norm": 0.71484375, "learning_rate": 4.950966239721342e-07, "loss": 0.24766841530799866, "step": 4864, "token_acc": 0.931974921630094 }, { "epoch": 1.0263713080168777, "grad_norm": 0.9453125, "learning_rate": 4.949275477442786e-07, "loss": 0.2533305585384369, "step": 4865, "token_acc": 0.9282674772036474 }, { "epoch": 1.0265822784810126, "grad_norm": 0.90234375, "learning_rate": 4.94758472096501e-07, "loss": 0.2897741198539734, "step": 4866, "token_acc": 0.9204545454545454 }, { "epoch": 1.0267932489451477, "grad_norm": 0.69921875, "learning_rate": 4.945893970481367e-07, "loss": 0.28134655952453613, "step": 4867, "token_acc": 0.9213384208981509 }, { "epoch": 1.0270042194092828, "grad_norm": 0.6328125, "learning_rate": 4.944203226185204e-07, "loss": 0.23564282059669495, "step": 4868, "token_acc": 0.935006045949214 }, { "epoch": 1.0272151898734176, "grad_norm": 0.625, "learning_rate": 4.942512488269874e-07, "loss": 0.23936673998832703, "step": 4869, "token_acc": 0.9318066157760814 }, { "epoch": 1.0274261603375527, "grad_norm": 0.59375, "learning_rate": 4.940821756928728e-07, "loss": 0.22363325953483582, "step": 4870, "token_acc": 0.9366059817945384 }, { "epoch": 1.0276371308016878, "grad_norm": 0.68359375, "learning_rate": 4.939131032355115e-07, "loss": 0.24384337663650513, "step": 4871, "token_acc": 0.9315659679408138 }, { "epoch": 1.0278481012658227, "grad_norm": 0.69921875, "learning_rate": 4.937440314742386e-07, "loss": 0.24074260890483856, "step": 4872, "token_acc": 0.9309941520467836 }, { "epoch": 1.0280590717299578, "grad_norm": 0.703125, "learning_rate": 4.935749604283884e-07, "loss": 0.26260125637054443, "step": 4873, "token_acc": 0.9220135628586332 }, { "epoch": 1.028270042194093, "grad_norm": 0.7109375, "learning_rate": 4.934058901172958e-07, "loss": 0.2695898711681366, "step": 4874, "token_acc": 0.9264705882352942 }, { "epoch": 1.0284810126582278, "grad_norm": 0.875, "learning_rate": 4.932368205602956e-07, "loss": 0.2596041262149811, "step": 4875, "token_acc": 0.9220574313663616 }, { "epoch": 1.0286919831223629, "grad_norm": 0.70703125, "learning_rate": 4.930677517767222e-07, "loss": 0.23531211912631989, "step": 4876, "token_acc": 0.926409684612934 }, { "epoch": 1.028902953586498, "grad_norm": 0.7734375, "learning_rate": 4.928986837859102e-07, "loss": 0.2747429609298706, "step": 4877, "token_acc": 0.9274797722505245 }, { "epoch": 1.0291139240506328, "grad_norm": 0.765625, "learning_rate": 4.927296166071937e-07, "loss": 0.26830005645751953, "step": 4878, "token_acc": 0.9251812788398154 }, { "epoch": 1.029324894514768, "grad_norm": 0.6953125, "learning_rate": 4.92560550259907e-07, "loss": 0.24451366066932678, "step": 4879, "token_acc": 0.9309712889431888 }, { "epoch": 1.029535864978903, "grad_norm": 0.84765625, "learning_rate": 4.923914847633844e-07, "loss": 0.2786101698875427, "step": 4880, "token_acc": 0.9245912579245913 }, { "epoch": 1.029746835443038, "grad_norm": 0.75, "learning_rate": 4.922224201369599e-07, "loss": 0.28559309244155884, "step": 4881, "token_acc": 0.9196988707653702 }, { "epoch": 1.029957805907173, "grad_norm": 0.73046875, "learning_rate": 4.920533563999675e-07, "loss": 0.2397073358297348, "step": 4882, "token_acc": 0.9321550741163056 }, { "epoch": 1.030168776371308, "grad_norm": 0.5859375, "learning_rate": 4.918842935717411e-07, "loss": 0.24848303198814392, "step": 4883, "token_acc": 0.9332210998877666 }, { "epoch": 1.030379746835443, "grad_norm": 0.6640625, "learning_rate": 4.917152316716143e-07, "loss": 0.21271595358848572, "step": 4884, "token_acc": 0.9364931846344485 }, { "epoch": 1.030590717299578, "grad_norm": 0.87890625, "learning_rate": 4.915461707189209e-07, "loss": 0.3145957589149475, "step": 4885, "token_acc": 0.9169130691898285 }, { "epoch": 1.0308016877637132, "grad_norm": 1.5078125, "learning_rate": 4.913771107329944e-07, "loss": 0.2543647885322571, "step": 4886, "token_acc": 0.9325463743676222 }, { "epoch": 1.031012658227848, "grad_norm": 0.640625, "learning_rate": 4.912080517331686e-07, "loss": 0.25194627046585083, "step": 4887, "token_acc": 0.9238400933761307 }, { "epoch": 1.0312236286919831, "grad_norm": 0.83203125, "learning_rate": 4.910389937387762e-07, "loss": 0.262692391872406, "step": 4888, "token_acc": 0.9265822784810127 }, { "epoch": 1.0314345991561182, "grad_norm": 0.765625, "learning_rate": 4.908699367691506e-07, "loss": 0.21538680791854858, "step": 4889, "token_acc": 0.936897730805098 }, { "epoch": 1.0316455696202531, "grad_norm": 0.71484375, "learning_rate": 4.907008808436252e-07, "loss": 0.27459123730659485, "step": 4890, "token_acc": 0.9227430555555556 }, { "epoch": 1.0318565400843882, "grad_norm": 0.6796875, "learning_rate": 4.905318259815329e-07, "loss": 0.22107531130313873, "step": 4891, "token_acc": 0.9343672112988092 }, { "epoch": 1.0320675105485233, "grad_norm": 0.765625, "learning_rate": 4.903627722022062e-07, "loss": 0.262470006942749, "step": 4892, "token_acc": 0.925214899713467 }, { "epoch": 1.0322784810126582, "grad_norm": 0.7109375, "learning_rate": 4.901937195249782e-07, "loss": 0.24456307291984558, "step": 4893, "token_acc": 0.930952380952381 }, { "epoch": 1.0324894514767933, "grad_norm": 0.64453125, "learning_rate": 4.900246679691813e-07, "loss": 0.2359718382358551, "step": 4894, "token_acc": 0.9338368580060423 }, { "epoch": 1.0327004219409284, "grad_norm": 0.66796875, "learning_rate": 4.89855617554148e-07, "loss": 0.2556799054145813, "step": 4895, "token_acc": 0.9322617680826636 }, { "epoch": 1.0329113924050632, "grad_norm": 0.66796875, "learning_rate": 4.896865682992109e-07, "loss": 0.2873612344264984, "step": 4896, "token_acc": 0.9141517726340463 }, { "epoch": 1.0331223628691983, "grad_norm": 5.09375, "learning_rate": 4.895175202237019e-07, "loss": 0.26919132471084595, "step": 4897, "token_acc": 0.926303175554224 }, { "epoch": 1.0333333333333334, "grad_norm": 0.7890625, "learning_rate": 4.893484733469532e-07, "loss": 0.2694014608860016, "step": 4898, "token_acc": 0.9228758169934641 }, { "epoch": 1.0335443037974683, "grad_norm": 0.82421875, "learning_rate": 4.891794276882966e-07, "loss": 0.2555182874202728, "step": 4899, "token_acc": 0.9280864197530864 }, { "epoch": 1.0337552742616034, "grad_norm": 0.69140625, "learning_rate": 4.890103832670642e-07, "loss": 0.28969237208366394, "step": 4900, "token_acc": 0.9212735166425471 }, { "epoch": 1.0339662447257385, "grad_norm": 0.57421875, "learning_rate": 4.888413401025875e-07, "loss": 0.2153073400259018, "step": 4901, "token_acc": 0.9404255319148936 }, { "epoch": 1.0341772151898734, "grad_norm": 0.66796875, "learning_rate": 4.886722982141978e-07, "loss": 0.23629635572433472, "step": 4902, "token_acc": 0.9315226813059809 }, { "epoch": 1.0343881856540085, "grad_norm": 0.6875, "learning_rate": 4.885032576212268e-07, "loss": 0.26666951179504395, "step": 4903, "token_acc": 0.9259154929577464 }, { "epoch": 1.0345991561181433, "grad_norm": 0.77734375, "learning_rate": 4.883342183430055e-07, "loss": 0.27540141344070435, "step": 4904, "token_acc": 0.9204847768253029 }, { "epoch": 1.0348101265822784, "grad_norm": 0.765625, "learning_rate": 4.881651803988651e-07, "loss": 0.2758994698524475, "step": 4905, "token_acc": 0.9249296215201752 }, { "epoch": 1.0350210970464135, "grad_norm": 0.7421875, "learning_rate": 4.879961438081368e-07, "loss": 0.27446067333221436, "step": 4906, "token_acc": 0.9219934994582882 }, { "epoch": 1.0352320675105484, "grad_norm": 0.66796875, "learning_rate": 4.878271085901507e-07, "loss": 0.23161737620830536, "step": 4907, "token_acc": 0.9363957597173145 }, { "epoch": 1.0354430379746835, "grad_norm": 0.75, "learning_rate": 4.876580747642378e-07, "loss": 0.27333933115005493, "step": 4908, "token_acc": 0.9260675589547482 }, { "epoch": 1.0356540084388186, "grad_norm": 0.68359375, "learning_rate": 4.874890423497285e-07, "loss": 0.26521652936935425, "step": 4909, "token_acc": 0.9314808967477107 }, { "epoch": 1.0358649789029535, "grad_norm": 0.61328125, "learning_rate": 4.873200113659532e-07, "loss": 0.23638391494750977, "step": 4910, "token_acc": 0.9311794063567114 }, { "epoch": 1.0360759493670886, "grad_norm": 0.7578125, "learning_rate": 4.871509818322421e-07, "loss": 0.2673051059246063, "step": 4911, "token_acc": 0.9248668348752453 }, { "epoch": 1.0362869198312237, "grad_norm": 0.68359375, "learning_rate": 4.869819537679247e-07, "loss": 0.2752510905265808, "step": 4912, "token_acc": 0.9255447941888619 }, { "epoch": 1.0364978902953585, "grad_norm": 0.8359375, "learning_rate": 4.86812927192331e-07, "loss": 0.30111950635910034, "step": 4913, "token_acc": 0.919091967403958 }, { "epoch": 1.0367088607594936, "grad_norm": 0.6953125, "learning_rate": 4.866439021247909e-07, "loss": 0.28249579668045044, "step": 4914, "token_acc": 0.9252424147638411 }, { "epoch": 1.0369198312236287, "grad_norm": 0.6953125, "learning_rate": 4.864748785846336e-07, "loss": 0.2882341146469116, "step": 4915, "token_acc": 0.919063733784546 }, { "epoch": 1.0371308016877636, "grad_norm": 0.63671875, "learning_rate": 4.863058565911884e-07, "loss": 0.22230026125907898, "step": 4916, "token_acc": 0.9345403899721448 }, { "epoch": 1.0373417721518987, "grad_norm": 0.5859375, "learning_rate": 4.861368361637844e-07, "loss": 0.24030190706253052, "step": 4917, "token_acc": 0.931811697574893 }, { "epoch": 1.0375527426160338, "grad_norm": 0.69921875, "learning_rate": 4.859678173217505e-07, "loss": 0.2648245096206665, "step": 4918, "token_acc": 0.9287705171879839 }, { "epoch": 1.0377637130801687, "grad_norm": 0.62890625, "learning_rate": 4.857988000844155e-07, "loss": 0.2725488245487213, "step": 4919, "token_acc": 0.9303047404063205 }, { "epoch": 1.0379746835443038, "grad_norm": 0.6953125, "learning_rate": 4.856297844711079e-07, "loss": 0.23606809973716736, "step": 4920, "token_acc": 0.937460716530484 }, { "epoch": 1.0381856540084389, "grad_norm": 0.640625, "learning_rate": 4.854607705011561e-07, "loss": 0.25005239248275757, "step": 4921, "token_acc": 0.9311512415349887 }, { "epoch": 1.0383966244725737, "grad_norm": 0.63671875, "learning_rate": 4.852917581938883e-07, "loss": 0.2829882502555847, "step": 4922, "token_acc": 0.9223731632935958 }, { "epoch": 1.0386075949367088, "grad_norm": 0.73046875, "learning_rate": 4.851227475686322e-07, "loss": 0.22413331270217896, "step": 4923, "token_acc": 0.931457800511509 }, { "epoch": 1.038818565400844, "grad_norm": 0.7109375, "learning_rate": 4.84953738644716e-07, "loss": 0.2514442801475525, "step": 4924, "token_acc": 0.9264264264264265 }, { "epoch": 1.0390295358649788, "grad_norm": 0.76953125, "learning_rate": 4.84784731441467e-07, "loss": 0.30862343311309814, "step": 4925, "token_acc": 0.9143043336591724 }, { "epoch": 1.039240506329114, "grad_norm": 0.7109375, "learning_rate": 4.84615725978213e-07, "loss": 0.25220081210136414, "step": 4926, "token_acc": 0.9278046162104133 }, { "epoch": 1.039451476793249, "grad_norm": 0.7109375, "learning_rate": 4.844467222742809e-07, "loss": 0.2631048560142517, "step": 4927, "token_acc": 0.9244823386114495 }, { "epoch": 1.0396624472573839, "grad_norm": 0.91796875, "learning_rate": 4.842777203489975e-07, "loss": 0.22704309225082397, "step": 4928, "token_acc": 0.9380993897122929 }, { "epoch": 1.039873417721519, "grad_norm": 0.68359375, "learning_rate": 4.841087202216899e-07, "loss": 0.2648930251598358, "step": 4929, "token_acc": 0.9298981426003595 }, { "epoch": 1.040084388185654, "grad_norm": 0.984375, "learning_rate": 4.83939721911685e-07, "loss": 0.21985237300395966, "step": 4930, "token_acc": 0.9345444059976932 }, { "epoch": 1.040295358649789, "grad_norm": 0.5703125, "learning_rate": 4.837707254383086e-07, "loss": 0.24172426760196686, "step": 4931, "token_acc": 0.9281899109792285 }, { "epoch": 1.040506329113924, "grad_norm": 0.76171875, "learning_rate": 4.83601730820887e-07, "loss": 0.2459920048713684, "step": 4932, "token_acc": 0.9335924846128928 }, { "epoch": 1.0407172995780591, "grad_norm": 0.7421875, "learning_rate": 4.834327380787464e-07, "loss": 0.2489829659461975, "step": 4933, "token_acc": 0.9312906220984215 }, { "epoch": 1.040928270042194, "grad_norm": 0.671875, "learning_rate": 4.832637472312123e-07, "loss": 0.25843390822410583, "step": 4934, "token_acc": 0.9341947115384616 }, { "epoch": 1.0411392405063291, "grad_norm": 0.7578125, "learning_rate": 4.830947582976106e-07, "loss": 0.2358623743057251, "step": 4935, "token_acc": 0.9285910464158198 }, { "epoch": 1.0413502109704642, "grad_norm": 0.8359375, "learning_rate": 4.829257712972662e-07, "loss": 0.3018052577972412, "step": 4936, "token_acc": 0.9216095380029806 }, { "epoch": 1.041561181434599, "grad_norm": 0.62890625, "learning_rate": 4.827567862495044e-07, "loss": 0.2479781210422516, "step": 4937, "token_acc": 0.9337710533828147 }, { "epoch": 1.0417721518987342, "grad_norm": 0.609375, "learning_rate": 4.8258780317365e-07, "loss": 0.21854622662067413, "step": 4938, "token_acc": 0.9356664951106536 }, { "epoch": 1.0419831223628693, "grad_norm": 0.78515625, "learning_rate": 4.824188220890276e-07, "loss": 0.3025684356689453, "step": 4939, "token_acc": 0.9253640776699029 }, { "epoch": 1.0421940928270041, "grad_norm": 0.78125, "learning_rate": 4.82249843014962e-07, "loss": 0.2899351716041565, "step": 4940, "token_acc": 0.921644685802948 }, { "epoch": 1.0424050632911392, "grad_norm": 0.87890625, "learning_rate": 4.820808659707769e-07, "loss": 0.26304739713668823, "step": 4941, "token_acc": 0.9285714285714286 }, { "epoch": 1.0426160337552743, "grad_norm": 0.71484375, "learning_rate": 4.819118909757964e-07, "loss": 0.23074010014533997, "step": 4942, "token_acc": 0.9336969001148105 }, { "epoch": 1.0428270042194092, "grad_norm": 0.609375, "learning_rate": 4.817429180493442e-07, "loss": 0.21496199071407318, "step": 4943, "token_acc": 0.943645435867332 }, { "epoch": 1.0430379746835443, "grad_norm": 0.73046875, "learning_rate": 4.81573947210744e-07, "loss": 0.2599087059497833, "step": 4944, "token_acc": 0.9301768055139347 }, { "epoch": 1.0432489451476794, "grad_norm": 0.67578125, "learning_rate": 4.814049784793189e-07, "loss": 0.2283693552017212, "step": 4945, "token_acc": 0.9324015972618368 }, { "epoch": 1.0434599156118143, "grad_norm": 0.65625, "learning_rate": 4.812360118743919e-07, "loss": 0.2290116846561432, "step": 4946, "token_acc": 0.935992855016374 }, { "epoch": 1.0436708860759494, "grad_norm": 1.390625, "learning_rate": 4.810670474152857e-07, "loss": 0.2461305409669876, "step": 4947, "token_acc": 0.9334453781512605 }, { "epoch": 1.0438818565400845, "grad_norm": 0.72265625, "learning_rate": 4.808980851213227e-07, "loss": 0.2561722993850708, "step": 4948, "token_acc": 0.93184 }, { "epoch": 1.0440928270042193, "grad_norm": 0.73046875, "learning_rate": 4.807291250118255e-07, "loss": 0.23679956793785095, "step": 4949, "token_acc": 0.9358974358974359 }, { "epoch": 1.0443037974683544, "grad_norm": 0.80859375, "learning_rate": 4.805601671061162e-07, "loss": 0.2802262306213379, "step": 4950, "token_acc": 0.9242614707730987 }, { "epoch": 1.0445147679324895, "grad_norm": 0.671875, "learning_rate": 4.803912114235161e-07, "loss": 0.2763640582561493, "step": 4951, "token_acc": 0.9260431654676259 }, { "epoch": 1.0447257383966244, "grad_norm": 0.7890625, "learning_rate": 4.802222579833468e-07, "loss": 0.2987332344055176, "step": 4952, "token_acc": 0.9154443485763589 }, { "epoch": 1.0449367088607595, "grad_norm": 0.71875, "learning_rate": 4.800533068049299e-07, "loss": 0.2665545344352722, "step": 4953, "token_acc": 0.9205278592375367 }, { "epoch": 1.0451476793248946, "grad_norm": 0.64453125, "learning_rate": 4.798843579075861e-07, "loss": 0.2341473251581192, "step": 4954, "token_acc": 0.9352300242130751 }, { "epoch": 1.0453586497890295, "grad_norm": 0.84375, "learning_rate": 4.797154113106363e-07, "loss": 0.3125021159648895, "step": 4955, "token_acc": 0.9147157190635451 }, { "epoch": 1.0455696202531646, "grad_norm": 0.70703125, "learning_rate": 4.79546467033401e-07, "loss": 0.2778211236000061, "step": 4956, "token_acc": 0.9261846901579587 }, { "epoch": 1.0457805907172997, "grad_norm": 0.765625, "learning_rate": 4.793775250952002e-07, "loss": 0.2767367362976074, "step": 4957, "token_acc": 0.9183564567769477 }, { "epoch": 1.0459915611814345, "grad_norm": 0.69921875, "learning_rate": 4.792085855153539e-07, "loss": 0.2687263488769531, "step": 4958, "token_acc": 0.924636803874092 }, { "epoch": 1.0462025316455696, "grad_norm": 1.5, "learning_rate": 4.790396483131819e-07, "loss": 0.23430433869361877, "step": 4959, "token_acc": 0.9383435582822086 }, { "epoch": 1.0464135021097047, "grad_norm": 0.70703125, "learning_rate": 4.788707135080035e-07, "loss": 0.2520412504673004, "step": 4960, "token_acc": 0.922564529558701 }, { "epoch": 1.0466244725738396, "grad_norm": 0.875, "learning_rate": 4.787017811191378e-07, "loss": 0.26135319471359253, "step": 4961, "token_acc": 0.9286100594916171 }, { "epoch": 1.0468354430379747, "grad_norm": 0.8359375, "learning_rate": 4.785328511659038e-07, "loss": 0.2318907082080841, "step": 4962, "token_acc": 0.9338865096359743 }, { "epoch": 1.0470464135021098, "grad_norm": 0.59375, "learning_rate": 4.783639236676199e-07, "loss": 0.22290349006652832, "step": 4963, "token_acc": 0.9342105263157895 }, { "epoch": 1.0472573839662447, "grad_norm": 0.66015625, "learning_rate": 4.781949986436042e-07, "loss": 0.21891456842422485, "step": 4964, "token_acc": 0.9380200860832137 }, { "epoch": 1.0474683544303798, "grad_norm": 0.6328125, "learning_rate": 4.780260761131755e-07, "loss": 0.2573544681072235, "step": 4965, "token_acc": 0.9234006734006734 }, { "epoch": 1.0476793248945149, "grad_norm": 0.63671875, "learning_rate": 4.778571560956507e-07, "loss": 0.22865524888038635, "step": 4966, "token_acc": 0.9291666666666667 }, { "epoch": 1.0478902953586497, "grad_norm": 0.98828125, "learning_rate": 4.776882386103475e-07, "loss": 0.23516467213630676, "step": 4967, "token_acc": 0.9336814621409921 }, { "epoch": 1.0481012658227848, "grad_norm": 0.71484375, "learning_rate": 4.775193236765829e-07, "loss": 0.2223290503025055, "step": 4968, "token_acc": 0.9320897697464297 }, { "epoch": 1.04831223628692, "grad_norm": 0.69921875, "learning_rate": 4.773504113136743e-07, "loss": 0.2786903977394104, "step": 4969, "token_acc": 0.9309815950920245 }, { "epoch": 1.0485232067510548, "grad_norm": 0.8203125, "learning_rate": 4.771815015409377e-07, "loss": 0.25915753841400146, "step": 4970, "token_acc": 0.9329211746522411 }, { "epoch": 1.04873417721519, "grad_norm": 0.8359375, "learning_rate": 4.770125943776893e-07, "loss": 0.25156092643737793, "step": 4971, "token_acc": 0.9309417040358744 }, { "epoch": 1.048945147679325, "grad_norm": 0.73828125, "learning_rate": 4.768436898432456e-07, "loss": 0.2511846125125885, "step": 4972, "token_acc": 0.9227417949462677 }, { "epoch": 1.0491561181434599, "grad_norm": 0.6484375, "learning_rate": 4.7667478795692175e-07, "loss": 0.23302404582500458, "step": 4973, "token_acc": 0.9332439678284182 }, { "epoch": 1.049367088607595, "grad_norm": 0.64453125, "learning_rate": 4.765058887380336e-07, "loss": 0.24972784519195557, "step": 4974, "token_acc": 0.9278832920451417 }, { "epoch": 1.0495780590717299, "grad_norm": 0.75, "learning_rate": 4.7633699220589564e-07, "loss": 0.25779157876968384, "step": 4975, "token_acc": 0.9306265984654731 }, { "epoch": 1.049789029535865, "grad_norm": 0.765625, "learning_rate": 4.7616809837982296e-07, "loss": 0.2540343999862671, "step": 4976, "token_acc": 0.9297195083517176 }, { "epoch": 1.05, "grad_norm": 0.70703125, "learning_rate": 4.7599920727913003e-07, "loss": 0.21267357468605042, "step": 4977, "token_acc": 0.9363603861279943 }, { "epoch": 1.050210970464135, "grad_norm": 0.71875, "learning_rate": 4.758303189231308e-07, "loss": 0.24803665280342102, "step": 4978, "token_acc": 0.9312481557981706 }, { "epoch": 1.05042194092827, "grad_norm": 0.80859375, "learning_rate": 4.756614333311393e-07, "loss": 0.28462210297584534, "step": 4979, "token_acc": 0.9211029211029211 }, { "epoch": 1.0506329113924051, "grad_norm": 0.8203125, "learning_rate": 4.754925505224688e-07, "loss": 0.28511273860931396, "step": 4980, "token_acc": 0.9205158968206358 }, { "epoch": 1.05084388185654, "grad_norm": 1.140625, "learning_rate": 4.7532367051643257e-07, "loss": 0.23726974427700043, "step": 4981, "token_acc": 0.9354460093896714 }, { "epoch": 1.051054852320675, "grad_norm": 0.75, "learning_rate": 4.751547933323435e-07, "loss": 0.25046414136886597, "step": 4982, "token_acc": 0.9317632850241546 }, { "epoch": 1.0512658227848102, "grad_norm": 0.8046875, "learning_rate": 4.7498591898951406e-07, "loss": 0.3188059329986572, "step": 4983, "token_acc": 0.9090909090909091 }, { "epoch": 1.051476793248945, "grad_norm": 0.67578125, "learning_rate": 4.7481704750725663e-07, "loss": 0.2454972267150879, "step": 4984, "token_acc": 0.9313479623824451 }, { "epoch": 1.0516877637130801, "grad_norm": 0.60546875, "learning_rate": 4.746481789048829e-07, "loss": 0.2197190225124359, "step": 4985, "token_acc": 0.9318927789934355 }, { "epoch": 1.0518987341772152, "grad_norm": 0.6875, "learning_rate": 4.744793132017044e-07, "loss": 0.26309916377067566, "step": 4986, "token_acc": 0.9299799942840812 }, { "epoch": 1.0521097046413501, "grad_norm": 0.73828125, "learning_rate": 4.7431045041703255e-07, "loss": 0.2629668712615967, "step": 4987, "token_acc": 0.9313779745434422 }, { "epoch": 1.0523206751054852, "grad_norm": 0.66796875, "learning_rate": 4.7414159057017793e-07, "loss": 0.24095523357391357, "step": 4988, "token_acc": 0.932057158263683 }, { "epoch": 1.0525316455696203, "grad_norm": 0.6796875, "learning_rate": 4.7397273368045165e-07, "loss": 0.1990797519683838, "step": 4989, "token_acc": 0.9395861148197597 }, { "epoch": 1.0527426160337552, "grad_norm": 0.65234375, "learning_rate": 4.7380387976716336e-07, "loss": 0.2625981867313385, "step": 4990, "token_acc": 0.9267769607843137 }, { "epoch": 1.0529535864978903, "grad_norm": 0.71484375, "learning_rate": 4.7363502884962303e-07, "loss": 0.231420636177063, "step": 4991, "token_acc": 0.935042237110399 }, { "epoch": 1.0531645569620254, "grad_norm": 1.0234375, "learning_rate": 4.734661809471404e-07, "loss": 0.30788636207580566, "step": 4992, "token_acc": 0.916015625 }, { "epoch": 1.0533755274261603, "grad_norm": 0.734375, "learning_rate": 4.732973360790246e-07, "loss": 0.25459054112434387, "step": 4993, "token_acc": 0.9242706393544382 }, { "epoch": 1.0535864978902953, "grad_norm": 0.74609375, "learning_rate": 4.7312849426458455e-07, "loss": 0.26502537727355957, "step": 4994, "token_acc": 0.9272336505987104 }, { "epoch": 1.0537974683544304, "grad_norm": 0.6015625, "learning_rate": 4.729596555231284e-07, "loss": 0.24779823422431946, "step": 4995, "token_acc": 0.9319402985074627 }, { "epoch": 1.0540084388185653, "grad_norm": 0.83984375, "learning_rate": 4.727908198739645e-07, "loss": 0.20425017178058624, "step": 4996, "token_acc": 0.939877300613497 }, { "epoch": 1.0542194092827004, "grad_norm": 0.71484375, "learning_rate": 4.7262198733640074e-07, "loss": 0.2655612826347351, "step": 4997, "token_acc": 0.9286123853211009 }, { "epoch": 1.0544303797468355, "grad_norm": 0.70703125, "learning_rate": 4.7245315792974443e-07, "loss": 0.2729964256286621, "step": 4998, "token_acc": 0.9231236706168338 }, { "epoch": 1.0546413502109704, "grad_norm": 0.6953125, "learning_rate": 4.7228433167330276e-07, "loss": 0.2585394084453583, "step": 4999, "token_acc": 0.929316338354577 }, { "epoch": 1.0548523206751055, "grad_norm": 0.69140625, "learning_rate": 4.7211550858638225e-07, "loss": 0.2796923518180847, "step": 5000, "token_acc": 0.9286132241076653 }, { "epoch": 1.0548523206751055, "eval_loss": 0.433645635843277, "eval_runtime": 245.7539, "eval_samples_per_second": 137.149, "eval_steps_per_second": 2.144, "eval_token_acc": 0.899111549851925, "step": 5000 } ], "logging_steps": 1, "max_steps": 9480, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.579073107848069e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }