| { |
| "best_metric": 0.43363777, |
| "best_model_checkpoint": "/data/liuzihang/haobin/pangkaiyu/output/output_step_audio2_mini-encoder+align+llm-whole0130_signal_new1_dpdc-lora-1gpu-bs16_4_gckF_2e6_all/v2-20260215-150801/checkpoint-1000", |
| "epoch": 0.2109704641350211, |
| "eval_steps": 200, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002109704641350211, |
| "grad_norm": 0.76171875, |
| "learning_rate": 5.263157894736842e-09, |
| "loss": 0.2671268880367279, |
| "step": 1, |
| "token_acc": 0.9268792044457443 |
| }, |
| { |
| "epoch": 0.0004219409282700422, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.0526315789473684e-08, |
| "loss": 0.25691983103752136, |
| "step": 2, |
| "token_acc": 0.9315326633165829 |
| }, |
| { |
| "epoch": 0.0006329113924050633, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.5789473684210525e-08, |
| "loss": 0.20553478598594666, |
| "step": 3, |
| "token_acc": 0.9399263247378861 |
| }, |
| { |
| "epoch": 0.0008438818565400844, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.1052631578947368e-08, |
| "loss": 0.2874465584754944, |
| "step": 4, |
| "token_acc": 0.9190901238122661 |
| }, |
| { |
| "epoch": 0.0010548523206751054, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.6315789473684208e-08, |
| "loss": 0.23955097794532776, |
| "step": 5, |
| "token_acc": 0.9297171186934966 |
| }, |
| { |
| "epoch": 0.0012658227848101266, |
| "grad_norm": 0.6484375, |
| "learning_rate": 3.157894736842105e-08, |
| "loss": 0.22314852476119995, |
| "step": 6, |
| "token_acc": 0.9365693865396069 |
| }, |
| { |
| "epoch": 0.0014767932489451476, |
| "grad_norm": 0.74609375, |
| "learning_rate": 3.684210526315789e-08, |
| "loss": 0.23751771450042725, |
| "step": 7, |
| "token_acc": 0.9336440910337264 |
| }, |
| { |
| "epoch": 0.0016877637130801688, |
| "grad_norm": 0.86328125, |
| "learning_rate": 4.2105263157894737e-08, |
| "loss": 0.22823776304721832, |
| "step": 8, |
| "token_acc": 0.9342301943198804 |
| }, |
| { |
| "epoch": 0.0018987341772151898, |
| "grad_norm": 0.6015625, |
| "learning_rate": 4.736842105263158e-08, |
| "loss": 0.22278322279453278, |
| "step": 9, |
| "token_acc": 0.9367531331973186 |
| }, |
| { |
| "epoch": 0.002109704641350211, |
| "grad_norm": 0.72265625, |
| "learning_rate": 5.2631578947368416e-08, |
| "loss": 0.25690096616744995, |
| "step": 10, |
| "token_acc": 0.9291217257318952 |
| }, |
| { |
| "epoch": 0.002320675105485232, |
| "grad_norm": 1.046875, |
| "learning_rate": 5.789473684210526e-08, |
| "loss": 0.3066456913948059, |
| "step": 11, |
| "token_acc": 0.9201006605850897 |
| }, |
| { |
| "epoch": 0.002531645569620253, |
| "grad_norm": 0.7890625, |
| "learning_rate": 6.31578947368421e-08, |
| "loss": 0.2525354027748108, |
| "step": 12, |
| "token_acc": 0.9276785714285715 |
| }, |
| { |
| "epoch": 0.0027426160337552744, |
| "grad_norm": 0.76953125, |
| "learning_rate": 6.842105263157895e-08, |
| "loss": 0.30059731006622314, |
| "step": 13, |
| "token_acc": 0.9238062986793092 |
| }, |
| { |
| "epoch": 0.002953586497890295, |
| "grad_norm": 0.9921875, |
| "learning_rate": 7.368421052631577e-08, |
| "loss": 0.250629723072052, |
| "step": 14, |
| "token_acc": 0.9291187739463601 |
| }, |
| { |
| "epoch": 0.0031645569620253164, |
| "grad_norm": 0.6796875, |
| "learning_rate": 7.894736842105262e-08, |
| "loss": 0.25745123624801636, |
| "step": 15, |
| "token_acc": 0.9317230273752013 |
| }, |
| { |
| "epoch": 0.0033755274261603376, |
| "grad_norm": 0.85546875, |
| "learning_rate": 8.421052631578947e-08, |
| "loss": 0.3376100957393646, |
| "step": 16, |
| "token_acc": 0.9077240566037735 |
| }, |
| { |
| "epoch": 0.003586497890295359, |
| "grad_norm": 0.703125, |
| "learning_rate": 8.947368421052631e-08, |
| "loss": 0.24760955572128296, |
| "step": 17, |
| "token_acc": 0.9222253760999148 |
| }, |
| { |
| "epoch": 0.0037974683544303796, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.473684210526316e-08, |
| "loss": 0.2046602964401245, |
| "step": 18, |
| "token_acc": 0.9403993855606759 |
| }, |
| { |
| "epoch": 0.004008438818565401, |
| "grad_norm": 0.83203125, |
| "learning_rate": 1e-07, |
| "loss": 0.26379868388175964, |
| "step": 19, |
| "token_acc": 0.9294478527607362 |
| }, |
| { |
| "epoch": 0.004219409282700422, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.0526315789473683e-07, |
| "loss": 0.2657994031906128, |
| "step": 20, |
| "token_acc": 0.9254714157437893 |
| }, |
| { |
| "epoch": 0.004430379746835443, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.1052631578947368e-07, |
| "loss": 0.28728997707366943, |
| "step": 21, |
| "token_acc": 0.9262518968133535 |
| }, |
| { |
| "epoch": 0.004641350210970464, |
| "grad_norm": 0.77734375, |
| "learning_rate": 1.1578947368421052e-07, |
| "loss": 0.27618926763534546, |
| "step": 22, |
| "token_acc": 0.9308072487644151 |
| }, |
| { |
| "epoch": 0.004852320675105486, |
| "grad_norm": 0.7109375, |
| "learning_rate": 1.2105263157894737e-07, |
| "loss": 0.2314767688512802, |
| "step": 23, |
| "token_acc": 0.936124911284599 |
| }, |
| { |
| "epoch": 0.005063291139240506, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.263157894736842e-07, |
| "loss": 0.24274200201034546, |
| "step": 24, |
| "token_acc": 0.9345039018952063 |
| }, |
| { |
| "epoch": 0.005274261603375527, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.3157894736842104e-07, |
| "loss": 0.2632070481777191, |
| "step": 25, |
| "token_acc": 0.9223796033994335 |
| }, |
| { |
| "epoch": 0.005485232067510549, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.368421052631579e-07, |
| "loss": 0.2736364006996155, |
| "step": 26, |
| "token_acc": 0.9269776876267748 |
| }, |
| { |
| "epoch": 0.00569620253164557, |
| "grad_norm": 0.96875, |
| "learning_rate": 1.4210526315789474e-07, |
| "loss": 0.29377132654190063, |
| "step": 27, |
| "token_acc": 0.9181763285024155 |
| }, |
| { |
| "epoch": 0.00590717299578059, |
| "grad_norm": 0.7734375, |
| "learning_rate": 1.4736842105263155e-07, |
| "loss": 0.25689125061035156, |
| "step": 28, |
| "token_acc": 0.9299856527977044 |
| }, |
| { |
| "epoch": 0.006118143459915612, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.526315789473684e-07, |
| "loss": 0.24775874614715576, |
| "step": 29, |
| "token_acc": 0.9330016583747927 |
| }, |
| { |
| "epoch": 0.006329113924050633, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.5789473684210525e-07, |
| "loss": 0.25338542461395264, |
| "step": 30, |
| "token_acc": 0.925512104283054 |
| }, |
| { |
| "epoch": 0.006540084388185654, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.631578947368421e-07, |
| "loss": 0.25087809562683105, |
| "step": 31, |
| "token_acc": 0.9313120472229676 |
| }, |
| { |
| "epoch": 0.006751054852320675, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.6842105263157895e-07, |
| "loss": 0.2502059042453766, |
| "step": 32, |
| "token_acc": 0.9279077218840115 |
| }, |
| { |
| "epoch": 0.006962025316455696, |
| "grad_norm": 0.578125, |
| "learning_rate": 1.7368421052631578e-07, |
| "loss": 0.18295930325984955, |
| "step": 33, |
| "token_acc": 0.9424480628860191 |
| }, |
| { |
| "epoch": 0.007172995780590718, |
| "grad_norm": 0.7265625, |
| "learning_rate": 1.7894736842105262e-07, |
| "loss": 0.2690507471561432, |
| "step": 34, |
| "token_acc": 0.9241155819605725 |
| }, |
| { |
| "epoch": 0.007383966244725738, |
| "grad_norm": 0.90234375, |
| "learning_rate": 1.8421052631578946e-07, |
| "loss": 0.2535433769226074, |
| "step": 35, |
| "token_acc": 0.9346446700507615 |
| }, |
| { |
| "epoch": 0.007594936708860759, |
| "grad_norm": 0.73046875, |
| "learning_rate": 1.8947368421052632e-07, |
| "loss": 0.26006314158439636, |
| "step": 36, |
| "token_acc": 0.9291425420457678 |
| }, |
| { |
| "epoch": 0.007805907172995781, |
| "grad_norm": 0.86328125, |
| "learning_rate": 1.9473684210526315e-07, |
| "loss": 0.2664929926395416, |
| "step": 37, |
| "token_acc": 0.9286128845037724 |
| }, |
| { |
| "epoch": 0.008016877637130802, |
| "grad_norm": 0.65234375, |
| "learning_rate": 2e-07, |
| "loss": 0.2170935869216919, |
| "step": 38, |
| "token_acc": 0.9359218028780885 |
| }, |
| { |
| "epoch": 0.008227848101265823, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.0526315789473683e-07, |
| "loss": 0.31706634163856506, |
| "step": 39, |
| "token_acc": 0.9133278822567457 |
| }, |
| { |
| "epoch": 0.008438818565400843, |
| "grad_norm": 0.6953125, |
| "learning_rate": 2.1052631578947366e-07, |
| "loss": 0.23433184623718262, |
| "step": 40, |
| "token_acc": 0.9313227829202747 |
| }, |
| { |
| "epoch": 0.008649789029535865, |
| "grad_norm": 0.65234375, |
| "learning_rate": 2.1578947368421053e-07, |
| "loss": 0.19157642126083374, |
| "step": 41, |
| "token_acc": 0.9416859122401847 |
| }, |
| { |
| "epoch": 0.008860759493670886, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.2105263157894736e-07, |
| "loss": 0.26239246129989624, |
| "step": 42, |
| "token_acc": 0.9240352476450927 |
| }, |
| { |
| "epoch": 0.009071729957805906, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.263157894736842e-07, |
| "loss": 0.27333155274391174, |
| "step": 43, |
| "token_acc": 0.9244935543278084 |
| }, |
| { |
| "epoch": 0.009282700421940928, |
| "grad_norm": 0.671875, |
| "learning_rate": 2.3157894736842104e-07, |
| "loss": 0.22739389538764954, |
| "step": 44, |
| "token_acc": 0.9345622119815669 |
| }, |
| { |
| "epoch": 0.00949367088607595, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.3684210526315787e-07, |
| "loss": 0.2970912754535675, |
| "step": 45, |
| "token_acc": 0.918966119455117 |
| }, |
| { |
| "epoch": 0.009704641350210971, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.4210526315789473e-07, |
| "loss": 0.24367359280586243, |
| "step": 46, |
| "token_acc": 0.9307627357162961 |
| }, |
| { |
| "epoch": 0.009915611814345991, |
| "grad_norm": 0.69140625, |
| "learning_rate": 2.4736842105263157e-07, |
| "loss": 0.24166589975357056, |
| "step": 47, |
| "token_acc": 0.925096985974336 |
| }, |
| { |
| "epoch": 0.010126582278481013, |
| "grad_norm": 0.8203125, |
| "learning_rate": 2.526315789473684e-07, |
| "loss": 0.2498053014278412, |
| "step": 48, |
| "token_acc": 0.9309855154785572 |
| }, |
| { |
| "epoch": 0.010337552742616034, |
| "grad_norm": 0.81640625, |
| "learning_rate": 2.578947368421053e-07, |
| "loss": 0.27882808446884155, |
| "step": 49, |
| "token_acc": 0.9232902033271719 |
| }, |
| { |
| "epoch": 0.010548523206751054, |
| "grad_norm": 0.7890625, |
| "learning_rate": 2.631578947368421e-07, |
| "loss": 0.2516263723373413, |
| "step": 50, |
| "token_acc": 0.9283572142619126 |
| }, |
| { |
| "epoch": 0.010759493670886076, |
| "grad_norm": 0.65625, |
| "learning_rate": 2.684210526315789e-07, |
| "loss": 0.22138270735740662, |
| "step": 51, |
| "token_acc": 0.9392942583732058 |
| }, |
| { |
| "epoch": 0.010970464135021098, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.736842105263158e-07, |
| "loss": 0.25101763010025024, |
| "step": 52, |
| "token_acc": 0.9323397913561848 |
| }, |
| { |
| "epoch": 0.011181434599156118, |
| "grad_norm": 0.77734375, |
| "learning_rate": 2.789473684210526e-07, |
| "loss": 0.2590043842792511, |
| "step": 53, |
| "token_acc": 0.9248989023685731 |
| }, |
| { |
| "epoch": 0.01139240506329114, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.842105263157895e-07, |
| "loss": 0.22629833221435547, |
| "step": 54, |
| "token_acc": 0.9388133498145859 |
| }, |
| { |
| "epoch": 0.011603375527426161, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.894736842105263e-07, |
| "loss": 0.26315873861312866, |
| "step": 55, |
| "token_acc": 0.9215632686526374 |
| }, |
| { |
| "epoch": 0.01181434599156118, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.947368421052631e-07, |
| "loss": 0.3269142806529999, |
| "step": 56, |
| "token_acc": 0.9163541967118546 |
| }, |
| { |
| "epoch": 0.012025316455696202, |
| "grad_norm": 0.8125, |
| "learning_rate": 3e-07, |
| "loss": 0.2740277945995331, |
| "step": 57, |
| "token_acc": 0.9223241590214067 |
| }, |
| { |
| "epoch": 0.012236286919831224, |
| "grad_norm": 0.7109375, |
| "learning_rate": 3.052631578947368e-07, |
| "loss": 0.2807028889656067, |
| "step": 58, |
| "token_acc": 0.9231628946633138 |
| }, |
| { |
| "epoch": 0.012447257383966244, |
| "grad_norm": 0.83984375, |
| "learning_rate": 3.105263157894737e-07, |
| "loss": 0.3021116256713867, |
| "step": 59, |
| "token_acc": 0.9209953343701399 |
| }, |
| { |
| "epoch": 0.012658227848101266, |
| "grad_norm": 0.73828125, |
| "learning_rate": 3.157894736842105e-07, |
| "loss": 0.2364785373210907, |
| "step": 60, |
| "token_acc": 0.9329750237116662 |
| }, |
| { |
| "epoch": 0.012869198312236287, |
| "grad_norm": 0.7109375, |
| "learning_rate": 3.2105263157894733e-07, |
| "loss": 0.2884541153907776, |
| "step": 61, |
| "token_acc": 0.9274515831540117 |
| }, |
| { |
| "epoch": 0.013080168776371307, |
| "grad_norm": 0.76953125, |
| "learning_rate": 3.263157894736842e-07, |
| "loss": 0.25490057468414307, |
| "step": 62, |
| "token_acc": 0.9336579427875837 |
| }, |
| { |
| "epoch": 0.013291139240506329, |
| "grad_norm": 0.859375, |
| "learning_rate": 3.31578947368421e-07, |
| "loss": 0.27591922879219055, |
| "step": 63, |
| "token_acc": 0.9201367308887508 |
| }, |
| { |
| "epoch": 0.01350210970464135, |
| "grad_norm": 0.83203125, |
| "learning_rate": 3.368421052631579e-07, |
| "loss": 0.2646903693675995, |
| "step": 64, |
| "token_acc": 0.9261189454322502 |
| }, |
| { |
| "epoch": 0.013713080168776372, |
| "grad_norm": 0.7265625, |
| "learning_rate": 3.4210526315789473e-07, |
| "loss": 0.2481774091720581, |
| "step": 65, |
| "token_acc": 0.9300921512551636 |
| }, |
| { |
| "epoch": 0.013924050632911392, |
| "grad_norm": 0.671875, |
| "learning_rate": 3.4736842105263157e-07, |
| "loss": 0.2667776644229889, |
| "step": 66, |
| "token_acc": 0.9202678027997565 |
| }, |
| { |
| "epoch": 0.014135021097046414, |
| "grad_norm": 0.77734375, |
| "learning_rate": 3.526315789473684e-07, |
| "loss": 0.2720962464809418, |
| "step": 67, |
| "token_acc": 0.9237356168049238 |
| }, |
| { |
| "epoch": 0.014345991561181435, |
| "grad_norm": 0.6171875, |
| "learning_rate": 3.5789473684210524e-07, |
| "loss": 0.25555452704429626, |
| "step": 68, |
| "token_acc": 0.9306469298245614 |
| }, |
| { |
| "epoch": 0.014556962025316455, |
| "grad_norm": 0.640625, |
| "learning_rate": 3.6315789473684213e-07, |
| "loss": 0.22453869879245758, |
| "step": 69, |
| "token_acc": 0.9388444990780578 |
| }, |
| { |
| "epoch": 0.014767932489451477, |
| "grad_norm": 0.75, |
| "learning_rate": 3.684210526315789e-07, |
| "loss": 0.28728553652763367, |
| "step": 70, |
| "token_acc": 0.9182989690721649 |
| }, |
| { |
| "epoch": 0.014978902953586498, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.7368421052631575e-07, |
| "loss": 0.2622889578342438, |
| "step": 71, |
| "token_acc": 0.9239098624524437 |
| }, |
| { |
| "epoch": 0.015189873417721518, |
| "grad_norm": 0.78125, |
| "learning_rate": 3.7894736842105264e-07, |
| "loss": 0.2780531346797943, |
| "step": 72, |
| "token_acc": 0.9266362252663622 |
| }, |
| { |
| "epoch": 0.01540084388185654, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.842105263157894e-07, |
| "loss": 0.2625043988227844, |
| "step": 73, |
| "token_acc": 0.9331468531468532 |
| }, |
| { |
| "epoch": 0.015611814345991562, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.894736842105263e-07, |
| "loss": 0.25795984268188477, |
| "step": 74, |
| "token_acc": 0.9242160278745645 |
| }, |
| { |
| "epoch": 0.015822784810126583, |
| "grad_norm": 0.71484375, |
| "learning_rate": 3.9473684210526315e-07, |
| "loss": 0.2481173276901245, |
| "step": 75, |
| "token_acc": 0.9310846176214016 |
| }, |
| { |
| "epoch": 0.016033755274261603, |
| "grad_norm": 0.640625, |
| "learning_rate": 4e-07, |
| "loss": 0.23631326854228973, |
| "step": 76, |
| "token_acc": 0.9369342184671092 |
| }, |
| { |
| "epoch": 0.016244725738396623, |
| "grad_norm": 0.69140625, |
| "learning_rate": 4.052631578947368e-07, |
| "loss": 0.24659401178359985, |
| "step": 77, |
| "token_acc": 0.930279458369346 |
| }, |
| { |
| "epoch": 0.016455696202531647, |
| "grad_norm": 0.77734375, |
| "learning_rate": 4.1052631578947365e-07, |
| "loss": 0.28330034017562866, |
| "step": 78, |
| "token_acc": 0.9203966005665722 |
| }, |
| { |
| "epoch": 0.016666666666666666, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.1578947368421054e-07, |
| "loss": 0.2582593560218811, |
| "step": 79, |
| "token_acc": 0.9296250768285187 |
| }, |
| { |
| "epoch": 0.016877637130801686, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.2105263157894733e-07, |
| "loss": 0.2518593370914459, |
| "step": 80, |
| "token_acc": 0.9288208434058555 |
| }, |
| { |
| "epoch": 0.01708860759493671, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.2631578947368416e-07, |
| "loss": 0.30441492795944214, |
| "step": 81, |
| "token_acc": 0.9242614707730987 |
| }, |
| { |
| "epoch": 0.01729957805907173, |
| "grad_norm": 0.7578125, |
| "learning_rate": 4.3157894736842105e-07, |
| "loss": 0.30916911363601685, |
| "step": 82, |
| "token_acc": 0.9124253625248792 |
| }, |
| { |
| "epoch": 0.01751054852320675, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.368421052631579e-07, |
| "loss": 0.28638702630996704, |
| "step": 83, |
| "token_acc": 0.9245689655172413 |
| }, |
| { |
| "epoch": 0.017721518987341773, |
| "grad_norm": 0.80859375, |
| "learning_rate": 4.421052631578947e-07, |
| "loss": 0.2646373510360718, |
| "step": 84, |
| "token_acc": 0.9279176201372997 |
| }, |
| { |
| "epoch": 0.017932489451476793, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.4736842105263156e-07, |
| "loss": 0.27530571818351746, |
| "step": 85, |
| "token_acc": 0.9217687074829932 |
| }, |
| { |
| "epoch": 0.018143459915611813, |
| "grad_norm": 0.73828125, |
| "learning_rate": 4.526315789473684e-07, |
| "loss": 0.30989915132522583, |
| "step": 86, |
| "token_acc": 0.9133137062479555 |
| }, |
| { |
| "epoch": 0.018354430379746836, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.5789473684210523e-07, |
| "loss": 0.2850973308086395, |
| "step": 87, |
| "token_acc": 0.9211218229623137 |
| }, |
| { |
| "epoch": 0.018565400843881856, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.6315789473684207e-07, |
| "loss": 0.2523067593574524, |
| "step": 88, |
| "token_acc": 0.9337899543378996 |
| }, |
| { |
| "epoch": 0.018776371308016876, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.6842105263157896e-07, |
| "loss": 0.2510542869567871, |
| "step": 89, |
| "token_acc": 0.9345845983991168 |
| }, |
| { |
| "epoch": 0.0189873417721519, |
| "grad_norm": 0.65625, |
| "learning_rate": 4.7368421052631574e-07, |
| "loss": 0.27831587195396423, |
| "step": 90, |
| "token_acc": 0.9237072619384007 |
| }, |
| { |
| "epoch": 0.01919831223628692, |
| "grad_norm": 0.67578125, |
| "learning_rate": 4.789473684210526e-07, |
| "loss": 0.2806670665740967, |
| "step": 91, |
| "token_acc": 0.9245553643144004 |
| }, |
| { |
| "epoch": 0.019409282700421943, |
| "grad_norm": 0.8046875, |
| "learning_rate": 4.842105263157895e-07, |
| "loss": 0.2630135416984558, |
| "step": 92, |
| "token_acc": 0.9274447949526814 |
| }, |
| { |
| "epoch": 0.019620253164556962, |
| "grad_norm": 0.71875, |
| "learning_rate": 4.894736842105263e-07, |
| "loss": 0.2662945091724396, |
| "step": 93, |
| "token_acc": 0.9303838646714379 |
| }, |
| { |
| "epoch": 0.019831223628691982, |
| "grad_norm": 0.703125, |
| "learning_rate": 4.947368421052631e-07, |
| "loss": 0.2430751472711563, |
| "step": 94, |
| "token_acc": 0.9333737129012719 |
| }, |
| { |
| "epoch": 0.020042194092827006, |
| "grad_norm": 0.80078125, |
| "learning_rate": 5e-07, |
| "loss": 0.2676389515399933, |
| "step": 95, |
| "token_acc": 0.9231901118304885 |
| }, |
| { |
| "epoch": 0.020253164556962026, |
| "grad_norm": 0.765625, |
| "learning_rate": 5.052631578947368e-07, |
| "loss": 0.24123789370059967, |
| "step": 96, |
| "token_acc": 0.9292089873807325 |
| }, |
| { |
| "epoch": 0.020464135021097046, |
| "grad_norm": 0.80078125, |
| "learning_rate": 5.105263157894736e-07, |
| "loss": 0.24845993518829346, |
| "step": 97, |
| "token_acc": 0.9311657879320445 |
| }, |
| { |
| "epoch": 0.02067510548523207, |
| "grad_norm": 0.6953125, |
| "learning_rate": 5.157894736842106e-07, |
| "loss": 0.2502240836620331, |
| "step": 98, |
| "token_acc": 0.9316136772645471 |
| }, |
| { |
| "epoch": 0.02088607594936709, |
| "grad_norm": 0.64453125, |
| "learning_rate": 5.210526315789473e-07, |
| "loss": 0.2715577483177185, |
| "step": 99, |
| "token_acc": 0.9284542172628816 |
| }, |
| { |
| "epoch": 0.02109704641350211, |
| "grad_norm": 0.640625, |
| "learning_rate": 5.263157894736842e-07, |
| "loss": 0.25573915243148804, |
| "step": 100, |
| "token_acc": 0.9303937007874016 |
| }, |
| { |
| "epoch": 0.021308016877637132, |
| "grad_norm": 0.5859375, |
| "learning_rate": 5.31578947368421e-07, |
| "loss": 0.23634707927703857, |
| "step": 101, |
| "token_acc": 0.9278404618210443 |
| }, |
| { |
| "epoch": 0.021518987341772152, |
| "grad_norm": 0.79296875, |
| "learning_rate": 5.368421052631578e-07, |
| "loss": 0.24541448056697845, |
| "step": 102, |
| "token_acc": 0.9269794721407625 |
| }, |
| { |
| "epoch": 0.021729957805907172, |
| "grad_norm": 0.8046875, |
| "learning_rate": 5.421052631578948e-07, |
| "loss": 0.29984721541404724, |
| "step": 103, |
| "token_acc": 0.9175257731958762 |
| }, |
| { |
| "epoch": 0.021940928270042195, |
| "grad_norm": 0.703125, |
| "learning_rate": 5.473684210526316e-07, |
| "loss": 0.23752300441265106, |
| "step": 104, |
| "token_acc": 0.9307146753955264 |
| }, |
| { |
| "epoch": 0.022151898734177215, |
| "grad_norm": 0.7578125, |
| "learning_rate": 5.526315789473684e-07, |
| "loss": 0.26209786534309387, |
| "step": 105, |
| "token_acc": 0.9331191588785047 |
| }, |
| { |
| "epoch": 0.022362869198312235, |
| "grad_norm": 0.6015625, |
| "learning_rate": 5.578947368421052e-07, |
| "loss": 0.26135504245758057, |
| "step": 106, |
| "token_acc": 0.9239098624524437 |
| }, |
| { |
| "epoch": 0.02257383966244726, |
| "grad_norm": 0.76171875, |
| "learning_rate": 5.63157894736842e-07, |
| "loss": 0.2286645919084549, |
| "step": 107, |
| "token_acc": 0.9346016646848989 |
| }, |
| { |
| "epoch": 0.02278481012658228, |
| "grad_norm": 0.94921875, |
| "learning_rate": 5.68421052631579e-07, |
| "loss": 0.2228844314813614, |
| "step": 108, |
| "token_acc": 0.9335610058987892 |
| }, |
| { |
| "epoch": 0.0229957805907173, |
| "grad_norm": 0.7265625, |
| "learning_rate": 5.736842105263158e-07, |
| "loss": 0.23979443311691284, |
| "step": 109, |
| "token_acc": 0.9332460732984293 |
| }, |
| { |
| "epoch": 0.023206751054852322, |
| "grad_norm": 0.7734375, |
| "learning_rate": 5.789473684210526e-07, |
| "loss": 0.2925586402416229, |
| "step": 110, |
| "token_acc": 0.9247163973874184 |
| }, |
| { |
| "epoch": 0.02341772151898734, |
| "grad_norm": 0.89453125, |
| "learning_rate": 5.842105263157895e-07, |
| "loss": 0.26546645164489746, |
| "step": 111, |
| "token_acc": 0.9265167364016736 |
| }, |
| { |
| "epoch": 0.02362869198312236, |
| "grad_norm": 0.71875, |
| "learning_rate": 5.894736842105262e-07, |
| "loss": 0.23430243134498596, |
| "step": 112, |
| "token_acc": 0.9325294286534597 |
| }, |
| { |
| "epoch": 0.023839662447257385, |
| "grad_norm": 0.6875, |
| "learning_rate": 5.947368421052631e-07, |
| "loss": 0.2555156946182251, |
| "step": 113, |
| "token_acc": 0.9277472527472528 |
| }, |
| { |
| "epoch": 0.024050632911392405, |
| "grad_norm": 0.625, |
| "learning_rate": 6e-07, |
| "loss": 0.27733132243156433, |
| "step": 114, |
| "token_acc": 0.9281454979129398 |
| }, |
| { |
| "epoch": 0.024261603375527425, |
| "grad_norm": 0.6328125, |
| "learning_rate": 6.052631578947368e-07, |
| "loss": 0.2438090741634369, |
| "step": 115, |
| "token_acc": 0.9304691916336914 |
| }, |
| { |
| "epoch": 0.024472573839662448, |
| "grad_norm": 0.70703125, |
| "learning_rate": 6.105263157894736e-07, |
| "loss": 0.2810766100883484, |
| "step": 116, |
| "token_acc": 0.9260921603830042 |
| }, |
| { |
| "epoch": 0.024683544303797468, |
| "grad_norm": 0.6171875, |
| "learning_rate": 6.157894736842105e-07, |
| "loss": 0.21388083696365356, |
| "step": 117, |
| "token_acc": 0.9422394320748628 |
| }, |
| { |
| "epoch": 0.024894514767932488, |
| "grad_norm": 0.8828125, |
| "learning_rate": 6.210526315789474e-07, |
| "loss": 0.304085373878479, |
| "step": 118, |
| "token_acc": 0.9148753224419605 |
| }, |
| { |
| "epoch": 0.02510548523206751, |
| "grad_norm": 0.8125, |
| "learning_rate": 6.263157894736842e-07, |
| "loss": 0.24785375595092773, |
| "step": 119, |
| "token_acc": 0.9345686160972785 |
| }, |
| { |
| "epoch": 0.02531645569620253, |
| "grad_norm": 0.6015625, |
| "learning_rate": 6.31578947368421e-07, |
| "loss": 0.23662757873535156, |
| "step": 120, |
| "token_acc": 0.9339464882943144 |
| }, |
| { |
| "epoch": 0.02552742616033755, |
| "grad_norm": 0.7890625, |
| "learning_rate": 6.368421052631578e-07, |
| "loss": 0.30741703510284424, |
| "step": 121, |
| "token_acc": 0.9190517490604221 |
| }, |
| { |
| "epoch": 0.025738396624472575, |
| "grad_norm": 0.58984375, |
| "learning_rate": 6.421052631578947e-07, |
| "loss": 0.2324860692024231, |
| "step": 122, |
| "token_acc": 0.9350512753089666 |
| }, |
| { |
| "epoch": 0.025949367088607594, |
| "grad_norm": 0.71875, |
| "learning_rate": 6.473684210526316e-07, |
| "loss": 0.22093364596366882, |
| "step": 123, |
| "token_acc": 0.9382284382284383 |
| }, |
| { |
| "epoch": 0.026160337552742614, |
| "grad_norm": 0.58203125, |
| "learning_rate": 6.526315789473684e-07, |
| "loss": 0.26719486713409424, |
| "step": 124, |
| "token_acc": 0.9285504263451926 |
| }, |
| { |
| "epoch": 0.026371308016877638, |
| "grad_norm": 0.69921875, |
| "learning_rate": 6.578947368421053e-07, |
| "loss": 0.26403629779815674, |
| "step": 125, |
| "token_acc": 0.9270715096481271 |
| }, |
| { |
| "epoch": 0.026582278481012658, |
| "grad_norm": 1.046875, |
| "learning_rate": 6.63157894736842e-07, |
| "loss": 0.24764738976955414, |
| "step": 126, |
| "token_acc": 0.9328358208955224 |
| }, |
| { |
| "epoch": 0.02679324894514768, |
| "grad_norm": 0.80078125, |
| "learning_rate": 6.684210526315788e-07, |
| "loss": 0.2644532322883606, |
| "step": 127, |
| "token_acc": 0.9267714201008005 |
| }, |
| { |
| "epoch": 0.0270042194092827, |
| "grad_norm": 0.80859375, |
| "learning_rate": 6.736842105263158e-07, |
| "loss": 0.2985777258872986, |
| "step": 128, |
| "token_acc": 0.9214629997164729 |
| }, |
| { |
| "epoch": 0.02721518987341772, |
| "grad_norm": 0.62109375, |
| "learning_rate": 6.789473684210526e-07, |
| "loss": 0.23605869710445404, |
| "step": 129, |
| "token_acc": 0.9335576114381834 |
| }, |
| { |
| "epoch": 0.027426160337552744, |
| "grad_norm": 0.72265625, |
| "learning_rate": 6.842105263157895e-07, |
| "loss": 0.254613995552063, |
| "step": 130, |
| "token_acc": 0.9289265867212635 |
| }, |
| { |
| "epoch": 0.027637130801687764, |
| "grad_norm": 0.91015625, |
| "learning_rate": 6.894736842105263e-07, |
| "loss": 0.32649004459381104, |
| "step": 131, |
| "token_acc": 0.9107457428068115 |
| }, |
| { |
| "epoch": 0.027848101265822784, |
| "grad_norm": 0.66796875, |
| "learning_rate": 6.947368421052631e-07, |
| "loss": 0.2345716655254364, |
| "step": 132, |
| "token_acc": 0.9308671922377199 |
| }, |
| { |
| "epoch": 0.028059071729957807, |
| "grad_norm": 0.796875, |
| "learning_rate": 7e-07, |
| "loss": 0.2658767104148865, |
| "step": 133, |
| "token_acc": 0.9226377390807879 |
| }, |
| { |
| "epoch": 0.028270042194092827, |
| "grad_norm": 0.70703125, |
| "learning_rate": 7.052631578947368e-07, |
| "loss": 0.2791082561016083, |
| "step": 134, |
| "token_acc": 0.9298836497244336 |
| }, |
| { |
| "epoch": 0.028481012658227847, |
| "grad_norm": 0.75390625, |
| "learning_rate": 7.105263157894736e-07, |
| "loss": 0.28342780470848083, |
| "step": 135, |
| "token_acc": 0.9173340961098398 |
| }, |
| { |
| "epoch": 0.02869198312236287, |
| "grad_norm": 0.53125, |
| "learning_rate": 7.157894736842105e-07, |
| "loss": 0.19060048460960388, |
| "step": 136, |
| "token_acc": 0.9426000620539869 |
| }, |
| { |
| "epoch": 0.02890295358649789, |
| "grad_norm": 0.76171875, |
| "learning_rate": 7.210526315789473e-07, |
| "loss": 0.24044275283813477, |
| "step": 137, |
| "token_acc": 0.9315665883931566 |
| }, |
| { |
| "epoch": 0.02911392405063291, |
| "grad_norm": 0.69921875, |
| "learning_rate": 7.263157894736843e-07, |
| "loss": 0.22750994563102722, |
| "step": 138, |
| "token_acc": 0.9367798193709125 |
| }, |
| { |
| "epoch": 0.029324894514767934, |
| "grad_norm": 0.65234375, |
| "learning_rate": 7.315789473684211e-07, |
| "loss": 0.2409280240535736, |
| "step": 139, |
| "token_acc": 0.9357879234167894 |
| }, |
| { |
| "epoch": 0.029535864978902954, |
| "grad_norm": 0.74609375, |
| "learning_rate": 7.368421052631578e-07, |
| "loss": 0.24030432105064392, |
| "step": 140, |
| "token_acc": 0.928141912206855 |
| }, |
| { |
| "epoch": 0.029746835443037974, |
| "grad_norm": 0.703125, |
| "learning_rate": 7.421052631578947e-07, |
| "loss": 0.255402147769928, |
| "step": 141, |
| "token_acc": 0.9247853124074622 |
| }, |
| { |
| "epoch": 0.029957805907172997, |
| "grad_norm": 0.74609375, |
| "learning_rate": 7.473684210526315e-07, |
| "loss": 0.21290147304534912, |
| "step": 142, |
| "token_acc": 0.9362466327446872 |
| }, |
| { |
| "epoch": 0.030168776371308017, |
| "grad_norm": 0.88671875, |
| "learning_rate": 7.526315789473684e-07, |
| "loss": 0.2536450922489166, |
| "step": 143, |
| "token_acc": 0.9314112291350531 |
| }, |
| { |
| "epoch": 0.030379746835443037, |
| "grad_norm": 0.6015625, |
| "learning_rate": 7.578947368421053e-07, |
| "loss": 0.20192307233810425, |
| "step": 144, |
| "token_acc": 0.9473519272955186 |
| }, |
| { |
| "epoch": 0.03059071729957806, |
| "grad_norm": 0.7734375, |
| "learning_rate": 7.631578947368421e-07, |
| "loss": 0.27158498764038086, |
| "step": 145, |
| "token_acc": 0.9229805886036319 |
| }, |
| { |
| "epoch": 0.03080168776371308, |
| "grad_norm": 0.93359375, |
| "learning_rate": 7.684210526315788e-07, |
| "loss": 0.2483355551958084, |
| "step": 146, |
| "token_acc": 0.9249113760876571 |
| }, |
| { |
| "epoch": 0.0310126582278481, |
| "grad_norm": 0.546875, |
| "learning_rate": 7.736842105263157e-07, |
| "loss": 0.2523292303085327, |
| "step": 147, |
| "token_acc": 0.9309432853364679 |
| }, |
| { |
| "epoch": 0.031223628691983123, |
| "grad_norm": 0.9375, |
| "learning_rate": 7.789473684210526e-07, |
| "loss": 0.28833281993865967, |
| "step": 148, |
| "token_acc": 0.9170937594211637 |
| }, |
| { |
| "epoch": 0.03143459915611815, |
| "grad_norm": 0.7421875, |
| "learning_rate": 7.842105263157895e-07, |
| "loss": 0.250460147857666, |
| "step": 149, |
| "token_acc": 0.9285714285714286 |
| }, |
| { |
| "epoch": 0.03164556962025317, |
| "grad_norm": 0.9453125, |
| "learning_rate": 7.894736842105263e-07, |
| "loss": 0.2840534746646881, |
| "step": 150, |
| "token_acc": 0.9216349108789182 |
| }, |
| { |
| "epoch": 0.03185654008438819, |
| "grad_norm": 0.74609375, |
| "learning_rate": 7.947368421052631e-07, |
| "loss": 0.2967279851436615, |
| "step": 151, |
| "token_acc": 0.9164603960396039 |
| }, |
| { |
| "epoch": 0.032067510548523206, |
| "grad_norm": 0.83203125, |
| "learning_rate": 8e-07, |
| "loss": 0.25097131729125977, |
| "step": 152, |
| "token_acc": 0.929299572509043 |
| }, |
| { |
| "epoch": 0.032278481012658226, |
| "grad_norm": 0.6953125, |
| "learning_rate": 8.052631578947368e-07, |
| "loss": 0.23201878368854523, |
| "step": 153, |
| "token_acc": 0.9383025367992484 |
| }, |
| { |
| "epoch": 0.032489451476793246, |
| "grad_norm": 0.65625, |
| "learning_rate": 8.105263157894736e-07, |
| "loss": 0.2245524525642395, |
| "step": 154, |
| "token_acc": 0.9406554472984943 |
| }, |
| { |
| "epoch": 0.03270042194092827, |
| "grad_norm": 0.75, |
| "learning_rate": 8.157894736842105e-07, |
| "loss": 0.22958879172801971, |
| "step": 155, |
| "token_acc": 0.9288455860643637 |
| }, |
| { |
| "epoch": 0.03291139240506329, |
| "grad_norm": 0.70703125, |
| "learning_rate": 8.210526315789473e-07, |
| "loss": 0.2539287805557251, |
| "step": 156, |
| "token_acc": 0.9255386565272496 |
| }, |
| { |
| "epoch": 0.03312236286919831, |
| "grad_norm": 0.6875, |
| "learning_rate": 8.263157894736841e-07, |
| "loss": 0.245978444814682, |
| "step": 157, |
| "token_acc": 0.9341576506955178 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 0.66796875, |
| "learning_rate": 8.315789473684211e-07, |
| "loss": 0.26281115412712097, |
| "step": 158, |
| "token_acc": 0.927784222737819 |
| }, |
| { |
| "epoch": 0.03354430379746835, |
| "grad_norm": 1.0, |
| "learning_rate": 8.368421052631579e-07, |
| "loss": 0.229181706905365, |
| "step": 159, |
| "token_acc": 0.9329073482428115 |
| }, |
| { |
| "epoch": 0.03375527426160337, |
| "grad_norm": 0.73046875, |
| "learning_rate": 8.421052631578947e-07, |
| "loss": 0.2598130702972412, |
| "step": 160, |
| "token_acc": 0.9302388707926167 |
| }, |
| { |
| "epoch": 0.0339662447257384, |
| "grad_norm": 0.75, |
| "learning_rate": 8.473684210526315e-07, |
| "loss": 0.25113117694854736, |
| "step": 161, |
| "token_acc": 0.9281177829099307 |
| }, |
| { |
| "epoch": 0.03417721518987342, |
| "grad_norm": 0.6015625, |
| "learning_rate": 8.526315789473683e-07, |
| "loss": 0.20510195195674896, |
| "step": 162, |
| "token_acc": 0.936978417266187 |
| }, |
| { |
| "epoch": 0.03438818565400844, |
| "grad_norm": 0.75, |
| "learning_rate": 8.578947368421053e-07, |
| "loss": 0.25259485840797424, |
| "step": 163, |
| "token_acc": 0.9289311695579183 |
| }, |
| { |
| "epoch": 0.03459915611814346, |
| "grad_norm": 0.6484375, |
| "learning_rate": 8.631578947368421e-07, |
| "loss": 0.2823118567466736, |
| "step": 164, |
| "token_acc": 0.9225122349102773 |
| }, |
| { |
| "epoch": 0.03481012658227848, |
| "grad_norm": 0.84375, |
| "learning_rate": 8.684210526315789e-07, |
| "loss": 0.2576562762260437, |
| "step": 165, |
| "token_acc": 0.931045050566963 |
| }, |
| { |
| "epoch": 0.0350210970464135, |
| "grad_norm": 0.6796875, |
| "learning_rate": 8.736842105263158e-07, |
| "loss": 0.2349683940410614, |
| "step": 166, |
| "token_acc": 0.9357770372614359 |
| }, |
| { |
| "epoch": 0.035232067510548526, |
| "grad_norm": 0.89453125, |
| "learning_rate": 8.789473684210525e-07, |
| "loss": 0.19951120018959045, |
| "step": 167, |
| "token_acc": 0.9381918819188192 |
| }, |
| { |
| "epoch": 0.035443037974683546, |
| "grad_norm": 0.58984375, |
| "learning_rate": 8.842105263157895e-07, |
| "loss": 0.24796079099178314, |
| "step": 168, |
| "token_acc": 0.9311111111111111 |
| }, |
| { |
| "epoch": 0.035654008438818566, |
| "grad_norm": 0.73828125, |
| "learning_rate": 8.894736842105263e-07, |
| "loss": 0.2532733082771301, |
| "step": 169, |
| "token_acc": 0.9297912713472486 |
| }, |
| { |
| "epoch": 0.035864978902953586, |
| "grad_norm": 0.98046875, |
| "learning_rate": 8.947368421052631e-07, |
| "loss": 0.25062763690948486, |
| "step": 170, |
| "token_acc": 0.9289099526066351 |
| }, |
| { |
| "epoch": 0.036075949367088606, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9e-07, |
| "loss": 0.2228512465953827, |
| "step": 171, |
| "token_acc": 0.9302030456852792 |
| }, |
| { |
| "epoch": 0.036286919831223625, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.052631578947368e-07, |
| "loss": 0.20684444904327393, |
| "step": 172, |
| "token_acc": 0.9388583019414662 |
| }, |
| { |
| "epoch": 0.03649789029535865, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.105263157894737e-07, |
| "loss": 0.2119678407907486, |
| "step": 173, |
| "token_acc": 0.938135593220339 |
| }, |
| { |
| "epoch": 0.03670886075949367, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.157894736842105e-07, |
| "loss": 0.25168293714523315, |
| "step": 174, |
| "token_acc": 0.9275818639798489 |
| }, |
| { |
| "epoch": 0.03691983122362869, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.210526315789473e-07, |
| "loss": 0.2518053650856018, |
| "step": 175, |
| "token_acc": 0.9275232105420784 |
| }, |
| { |
| "epoch": 0.03713080168776371, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.263157894736841e-07, |
| "loss": 0.25257354974746704, |
| "step": 176, |
| "token_acc": 0.9272777932571747 |
| }, |
| { |
| "epoch": 0.03734177215189873, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.31578947368421e-07, |
| "loss": 0.24908028542995453, |
| "step": 177, |
| "token_acc": 0.9316982303632412 |
| }, |
| { |
| "epoch": 0.03755274261603375, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.368421052631579e-07, |
| "loss": 0.2594815194606781, |
| "step": 178, |
| "token_acc": 0.9222846441947565 |
| }, |
| { |
| "epoch": 0.03776371308016878, |
| "grad_norm": 1.734375, |
| "learning_rate": 9.421052631578948e-07, |
| "loss": 0.301219642162323, |
| "step": 179, |
| "token_acc": 0.9261637239165329 |
| }, |
| { |
| "epoch": 0.0379746835443038, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.473684210526315e-07, |
| "loss": 0.2224687933921814, |
| "step": 180, |
| "token_acc": 0.9298196166854565 |
| }, |
| { |
| "epoch": 0.03818565400843882, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.526315789473683e-07, |
| "loss": 0.2755109667778015, |
| "step": 181, |
| "token_acc": 0.9245460237946149 |
| }, |
| { |
| "epoch": 0.03839662447257384, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.578947368421053e-07, |
| "loss": 0.24350810050964355, |
| "step": 182, |
| "token_acc": 0.9347500748278958 |
| }, |
| { |
| "epoch": 0.03860759493670886, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.63157894736842e-07, |
| "loss": 0.26835542917251587, |
| "step": 183, |
| "token_acc": 0.9246247205365697 |
| }, |
| { |
| "epoch": 0.038818565400843885, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.68421052631579e-07, |
| "loss": 0.25252771377563477, |
| "step": 184, |
| "token_acc": 0.9330130016958734 |
| }, |
| { |
| "epoch": 0.039029535864978905, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.736842105263158e-07, |
| "loss": 0.27294090390205383, |
| "step": 185, |
| "token_acc": 0.9220917822838848 |
| }, |
| { |
| "epoch": 0.039240506329113925, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.789473684210526e-07, |
| "loss": 0.257973313331604, |
| "step": 186, |
| "token_acc": 0.9298401420959147 |
| }, |
| { |
| "epoch": 0.039451476793248945, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.842105263157894e-07, |
| "loss": 0.20286661386489868, |
| "step": 187, |
| "token_acc": 0.9379893517068587 |
| }, |
| { |
| "epoch": 0.039662447257383965, |
| "grad_norm": 1.0859375, |
| "learning_rate": 9.894736842105263e-07, |
| "loss": 0.30547526478767395, |
| "step": 188, |
| "token_acc": 0.9224360815857512 |
| }, |
| { |
| "epoch": 0.039873417721518985, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.947368421052631e-07, |
| "loss": 0.2581551671028137, |
| "step": 189, |
| "token_acc": 0.9263346257083209 |
| }, |
| { |
| "epoch": 0.04008438818565401, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1e-06, |
| "loss": 0.267391562461853, |
| "step": 190, |
| "token_acc": 0.9272947591638897 |
| }, |
| { |
| "epoch": 0.04029535864978903, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.99999971410384e-07, |
| "loss": 0.22360718250274658, |
| "step": 191, |
| "token_acc": 0.9367160775370581 |
| }, |
| { |
| "epoch": 0.04050632911392405, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.999998856415392e-07, |
| "loss": 0.2589290738105774, |
| "step": 192, |
| "token_acc": 0.9256432004523607 |
| }, |
| { |
| "epoch": 0.04071729957805907, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.999997426934757e-07, |
| "loss": 0.2469128668308258, |
| "step": 193, |
| "token_acc": 0.9290098745663197 |
| }, |
| { |
| "epoch": 0.04092827004219409, |
| "grad_norm": 1.203125, |
| "learning_rate": 9.999995425662095e-07, |
| "loss": 0.2602100968360901, |
| "step": 194, |
| "token_acc": 0.9284731774415406 |
| }, |
| { |
| "epoch": 0.04113924050632911, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.999992852597638e-07, |
| "loss": 0.2579442858695984, |
| "step": 195, |
| "token_acc": 0.9280116110304789 |
| }, |
| { |
| "epoch": 0.04135021097046414, |
| "grad_norm": 0.75, |
| "learning_rate": 9.999989707741678e-07, |
| "loss": 0.266757071018219, |
| "step": 196, |
| "token_acc": 0.923786841321822 |
| }, |
| { |
| "epoch": 0.04156118143459916, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.999985991094577e-07, |
| "loss": 0.22912907600402832, |
| "step": 197, |
| "token_acc": 0.9336933693369337 |
| }, |
| { |
| "epoch": 0.04177215189873418, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.999981702656756e-07, |
| "loss": 0.23296543955802917, |
| "step": 198, |
| "token_acc": 0.931110498759989 |
| }, |
| { |
| "epoch": 0.0419831223628692, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.999976842428708e-07, |
| "loss": 0.27944594621658325, |
| "step": 199, |
| "token_acc": 0.9281785829828535 |
| }, |
| { |
| "epoch": 0.04219409282700422, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.99997141041099e-07, |
| "loss": 0.2449186146259308, |
| "step": 200, |
| "token_acc": 0.9309120699071546 |
| }, |
| { |
| "epoch": 0.04219409282700422, |
| "eval_loss": 0.43372446298599243, |
| "eval_runtime": 245.8313, |
| "eval_samples_per_second": 137.106, |
| "eval_steps_per_second": 2.144, |
| "eval_token_acc": 0.8990801399982051, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04240506329113924, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.99996540660422e-07, |
| "loss": 0.2534567713737488, |
| "step": 201, |
| "token_acc": 0.9282550930026572 |
| }, |
| { |
| "epoch": 0.042616033755274264, |
| "grad_norm": 1.0, |
| "learning_rate": 9.999958831009087e-07, |
| "loss": 0.2861325144767761, |
| "step": 202, |
| "token_acc": 0.9250295159386068 |
| }, |
| { |
| "epoch": 0.042827004219409284, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.999951683626345e-07, |
| "loss": 0.24760206043720245, |
| "step": 203, |
| "token_acc": 0.9320360151031077 |
| }, |
| { |
| "epoch": 0.043037974683544304, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.999943964456805e-07, |
| "loss": 0.2488883137702942, |
| "step": 204, |
| "token_acc": 0.9300783604581073 |
| }, |
| { |
| "epoch": 0.043248945147679324, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.999935673501355e-07, |
| "loss": 0.257844477891922, |
| "step": 205, |
| "token_acc": 0.9278959810874704 |
| }, |
| { |
| "epoch": 0.043459915611814344, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.99992681076094e-07, |
| "loss": 0.21238219738006592, |
| "step": 206, |
| "token_acc": 0.937776467118844 |
| }, |
| { |
| "epoch": 0.043670886075949364, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.999917376236578e-07, |
| "loss": 0.21476256847381592, |
| "step": 207, |
| "token_acc": 0.9357326478149101 |
| }, |
| { |
| "epoch": 0.04388185654008439, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.999907369929344e-07, |
| "loss": 0.24194155633449554, |
| "step": 208, |
| "token_acc": 0.9311714096624751 |
| }, |
| { |
| "epoch": 0.04409282700421941, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.999896791840383e-07, |
| "loss": 0.2757856249809265, |
| "step": 209, |
| "token_acc": 0.9243792325056434 |
| }, |
| { |
| "epoch": 0.04430379746835443, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.999885641970906e-07, |
| "loss": 0.2318935990333557, |
| "step": 210, |
| "token_acc": 0.9357142857142857 |
| }, |
| { |
| "epoch": 0.04451476793248945, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.999873920322186e-07, |
| "loss": 0.2802179157733917, |
| "step": 211, |
| "token_acc": 0.9227716727716728 |
| }, |
| { |
| "epoch": 0.04472573839662447, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.999861626895565e-07, |
| "loss": 0.2558714747428894, |
| "step": 212, |
| "token_acc": 0.9255022321428571 |
| }, |
| { |
| "epoch": 0.04493670886075949, |
| "grad_norm": 0.890625, |
| "learning_rate": 9.99984876169245e-07, |
| "loss": 0.29882046580314636, |
| "step": 213, |
| "token_acc": 0.9213449414590213 |
| }, |
| { |
| "epoch": 0.04514767932489452, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.999835324714307e-07, |
| "loss": 0.24097202718257904, |
| "step": 214, |
| "token_acc": 0.9359388774610637 |
| }, |
| { |
| "epoch": 0.04535864978902954, |
| "grad_norm": 1.703125, |
| "learning_rate": 9.99982131596268e-07, |
| "loss": 0.28899186849594116, |
| "step": 215, |
| "token_acc": 0.9218701937865272 |
| }, |
| { |
| "epoch": 0.04556962025316456, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.999806735439165e-07, |
| "loss": 0.2710872292518616, |
| "step": 216, |
| "token_acc": 0.9248780487804878 |
| }, |
| { |
| "epoch": 0.04578059071729958, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.999791583145433e-07, |
| "loss": 0.2295331209897995, |
| "step": 217, |
| "token_acc": 0.9357304643261608 |
| }, |
| { |
| "epoch": 0.0459915611814346, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.999775859083216e-07, |
| "loss": 0.2171935886144638, |
| "step": 218, |
| "token_acc": 0.9356940509915014 |
| }, |
| { |
| "epoch": 0.046202531645569624, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.99975956325431e-07, |
| "loss": 0.2726757526397705, |
| "step": 219, |
| "token_acc": 0.9209691375829248 |
| }, |
| { |
| "epoch": 0.046413502109704644, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.99974269566058e-07, |
| "loss": 0.27028149366378784, |
| "step": 220, |
| "token_acc": 0.9284016636957814 |
| }, |
| { |
| "epoch": 0.04662447257383966, |
| "grad_norm": 0.56640625, |
| "learning_rate": 9.999725256303957e-07, |
| "loss": 0.20975014567375183, |
| "step": 221, |
| "token_acc": 0.9344503233392122 |
| }, |
| { |
| "epoch": 0.04683544303797468, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.999707245186434e-07, |
| "loss": 0.3065168261528015, |
| "step": 222, |
| "token_acc": 0.9186879823594267 |
| }, |
| { |
| "epoch": 0.0470464135021097, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.999688662310072e-07, |
| "loss": 0.20764990150928497, |
| "step": 223, |
| "token_acc": 0.9452255418863503 |
| }, |
| { |
| "epoch": 0.04725738396624472, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.99966950767699e-07, |
| "loss": 0.2654411196708679, |
| "step": 224, |
| "token_acc": 0.9302244039270687 |
| }, |
| { |
| "epoch": 0.04746835443037975, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.999649781289385e-07, |
| "loss": 0.2514041066169739, |
| "step": 225, |
| "token_acc": 0.933082271147161 |
| }, |
| { |
| "epoch": 0.04767932489451477, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.99962948314951e-07, |
| "loss": 0.21037127077579498, |
| "step": 226, |
| "token_acc": 0.9351134846461949 |
| }, |
| { |
| "epoch": 0.04789029535864979, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.99960861325969e-07, |
| "loss": 0.21236909925937653, |
| "step": 227, |
| "token_acc": 0.940097449125824 |
| }, |
| { |
| "epoch": 0.04810126582278481, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.999587171622305e-07, |
| "loss": 0.21992863714694977, |
| "step": 228, |
| "token_acc": 0.9344711978055471 |
| }, |
| { |
| "epoch": 0.04831223628691983, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.999565158239812e-07, |
| "loss": 0.26401764154434204, |
| "step": 229, |
| "token_acc": 0.9244654262704805 |
| }, |
| { |
| "epoch": 0.04852320675105485, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.999542573114728e-07, |
| "loss": 0.24087585508823395, |
| "step": 230, |
| "token_acc": 0.926786751888437 |
| }, |
| { |
| "epoch": 0.048734177215189876, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.999519416249634e-07, |
| "loss": 0.2533552646636963, |
| "step": 231, |
| "token_acc": 0.9275784028451342 |
| }, |
| { |
| "epoch": 0.048945147679324896, |
| "grad_norm": 1.4296875, |
| "learning_rate": 9.999495687647178e-07, |
| "loss": 0.2529897689819336, |
| "step": 232, |
| "token_acc": 0.9269195189639223 |
| }, |
| { |
| "epoch": 0.049156118143459916, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.999471387310077e-07, |
| "loss": 0.2788076400756836, |
| "step": 233, |
| "token_acc": 0.9202168861347793 |
| }, |
| { |
| "epoch": 0.049367088607594936, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.999446515241108e-07, |
| "loss": 0.2300492525100708, |
| "step": 234, |
| "token_acc": 0.9325668116842759 |
| }, |
| { |
| "epoch": 0.049578059071729956, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.999421071443115e-07, |
| "loss": 0.2711006700992584, |
| "step": 235, |
| "token_acc": 0.9220738900962434 |
| }, |
| { |
| "epoch": 0.049789029535864976, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.999395055919007e-07, |
| "loss": 0.24382656812667847, |
| "step": 236, |
| "token_acc": 0.9297777777777778 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.999368468671758e-07, |
| "loss": 0.2818126380443573, |
| "step": 237, |
| "token_acc": 0.9211531781868705 |
| }, |
| { |
| "epoch": 0.05021097046413502, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.999341309704413e-07, |
| "loss": 0.29420921206474304, |
| "step": 238, |
| "token_acc": 0.9187301587301587 |
| }, |
| { |
| "epoch": 0.05042194092827004, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.999313579020074e-07, |
| "loss": 0.24233081936836243, |
| "step": 239, |
| "token_acc": 0.9322516367776829 |
| }, |
| { |
| "epoch": 0.05063291139240506, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.999285276621913e-07, |
| "loss": 0.22199922800064087, |
| "step": 240, |
| "token_acc": 0.9347892956013534 |
| }, |
| { |
| "epoch": 0.05084388185654008, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.999256402513168e-07, |
| "loss": 0.2756049931049347, |
| "step": 241, |
| "token_acc": 0.9229754682141915 |
| }, |
| { |
| "epoch": 0.0510548523206751, |
| "grad_norm": 1.7421875, |
| "learning_rate": 9.999226956697138e-07, |
| "loss": 0.2459474354982376, |
| "step": 242, |
| "token_acc": 0.9287122207621551 |
| }, |
| { |
| "epoch": 0.05126582278481013, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.999196939177195e-07, |
| "loss": 0.26543667912483215, |
| "step": 243, |
| "token_acc": 0.9251445086705202 |
| }, |
| { |
| "epoch": 0.05147679324894515, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.999166349956768e-07, |
| "loss": 0.29306668043136597, |
| "step": 244, |
| "token_acc": 0.922089552238806 |
| }, |
| { |
| "epoch": 0.05168776371308017, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.999135189039356e-07, |
| "loss": 0.232993021607399, |
| "step": 245, |
| "token_acc": 0.933374460209747 |
| }, |
| { |
| "epoch": 0.05189873417721519, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.999103456428522e-07, |
| "loss": 0.29452502727508545, |
| "step": 246, |
| "token_acc": 0.9255251432208784 |
| }, |
| { |
| "epoch": 0.05210970464135021, |
| "grad_norm": 0.75, |
| "learning_rate": 9.999071152127897e-07, |
| "loss": 0.2289431095123291, |
| "step": 247, |
| "token_acc": 0.9372047791053071 |
| }, |
| { |
| "epoch": 0.05232067510548523, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.999038276141175e-07, |
| "loss": 0.3194141983985901, |
| "step": 248, |
| "token_acc": 0.914375 |
| }, |
| { |
| "epoch": 0.052531645569620256, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.999004828472112e-07, |
| "loss": 0.24136003851890564, |
| "step": 249, |
| "token_acc": 0.9315025252525253 |
| }, |
| { |
| "epoch": 0.052742616033755275, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.998970809124537e-07, |
| "loss": 0.31663718819618225, |
| "step": 250, |
| "token_acc": 0.9186367823150138 |
| }, |
| { |
| "epoch": 0.052953586497890295, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.998936218102338e-07, |
| "loss": 0.2638603448867798, |
| "step": 251, |
| "token_acc": 0.9242610837438424 |
| }, |
| { |
| "epoch": 0.053164556962025315, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.998901055409474e-07, |
| "loss": 0.26234734058380127, |
| "step": 252, |
| "token_acc": 0.9283480238839921 |
| }, |
| { |
| "epoch": 0.053375527426160335, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.99886532104996e-07, |
| "loss": 0.27683377265930176, |
| "step": 253, |
| "token_acc": 0.9228208232445521 |
| }, |
| { |
| "epoch": 0.05358649789029536, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.99882901502789e-07, |
| "loss": 0.20958667993545532, |
| "step": 254, |
| "token_acc": 0.9367622259696459 |
| }, |
| { |
| "epoch": 0.05379746835443038, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.998792137347412e-07, |
| "loss": 0.2769642174243927, |
| "step": 255, |
| "token_acc": 0.9259877573734001 |
| }, |
| { |
| "epoch": 0.0540084388185654, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.998754688012744e-07, |
| "loss": 0.291420578956604, |
| "step": 256, |
| "token_acc": 0.9195469067673541 |
| }, |
| { |
| "epoch": 0.05421940928270042, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.998716667028166e-07, |
| "loss": 0.2671175003051758, |
| "step": 257, |
| "token_acc": 0.9248520710059172 |
| }, |
| { |
| "epoch": 0.05443037974683544, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.99867807439803e-07, |
| "loss": 0.2104148268699646, |
| "step": 258, |
| "token_acc": 0.9412735070933685 |
| }, |
| { |
| "epoch": 0.05464135021097046, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.99863891012675e-07, |
| "loss": 0.25562331080436707, |
| "step": 259, |
| "token_acc": 0.9300422386483632 |
| }, |
| { |
| "epoch": 0.05485232067510549, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.998599174218797e-07, |
| "loss": 0.25945645570755005, |
| "step": 260, |
| "token_acc": 0.9278557114228457 |
| }, |
| { |
| "epoch": 0.05506329113924051, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.998558866678726e-07, |
| "loss": 0.2575325667858124, |
| "step": 261, |
| "token_acc": 0.9237336368810473 |
| }, |
| { |
| "epoch": 0.05527426160337553, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.998517987511139e-07, |
| "loss": 0.21312668919563293, |
| "step": 262, |
| "token_acc": 0.9391352244560727 |
| }, |
| { |
| "epoch": 0.05548523206751055, |
| "grad_norm": 1.5390625, |
| "learning_rate": 9.998476536720712e-07, |
| "loss": 0.27397406101226807, |
| "step": 263, |
| "token_acc": 0.9302109181141439 |
| }, |
| { |
| "epoch": 0.05569620253164557, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.998434514312187e-07, |
| "loss": 0.27095240354537964, |
| "step": 264, |
| "token_acc": 0.9266853059956508 |
| }, |
| { |
| "epoch": 0.05590717299578059, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.99839192029037e-07, |
| "loss": 0.237601175904274, |
| "step": 265, |
| "token_acc": 0.9364719228587635 |
| }, |
| { |
| "epoch": 0.056118143459915615, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.998348754660129e-07, |
| "loss": 0.2851409316062927, |
| "step": 266, |
| "token_acc": 0.9176470588235294 |
| }, |
| { |
| "epoch": 0.056329113924050635, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.998305017426403e-07, |
| "loss": 0.26605701446533203, |
| "step": 267, |
| "token_acc": 0.9261460101867572 |
| }, |
| { |
| "epoch": 0.056540084388185655, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.998260708594192e-07, |
| "loss": 0.26237568259239197, |
| "step": 268, |
| "token_acc": 0.9257142857142857 |
| }, |
| { |
| "epoch": 0.056751054852320675, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.998215828168566e-07, |
| "loss": 0.2315206527709961, |
| "step": 269, |
| "token_acc": 0.9332755632582322 |
| }, |
| { |
| "epoch": 0.056962025316455694, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.998170376154654e-07, |
| "loss": 0.26748204231262207, |
| "step": 270, |
| "token_acc": 0.9308067757680161 |
| }, |
| { |
| "epoch": 0.057172995780590714, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.998124352557655e-07, |
| "loss": 0.33397209644317627, |
| "step": 271, |
| "token_acc": 0.9161147902869757 |
| }, |
| { |
| "epoch": 0.05738396624472574, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.998077757382835e-07, |
| "loss": 0.2637864351272583, |
| "step": 272, |
| "token_acc": 0.9291949563530553 |
| }, |
| { |
| "epoch": 0.05759493670886076, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.998030590635517e-07, |
| "loss": 0.2878430485725403, |
| "step": 273, |
| "token_acc": 0.919882100750268 |
| }, |
| { |
| "epoch": 0.05780590717299578, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.997982852321099e-07, |
| "loss": 0.2438146024942398, |
| "step": 274, |
| "token_acc": 0.9312955692652832 |
| }, |
| { |
| "epoch": 0.0580168776371308, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.99793454244504e-07, |
| "loss": 0.2523839771747589, |
| "step": 275, |
| "token_acc": 0.9254349627174814 |
| }, |
| { |
| "epoch": 0.05822784810126582, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.997885661012865e-07, |
| "loss": 0.23295487463474274, |
| "step": 276, |
| "token_acc": 0.937351934719663 |
| }, |
| { |
| "epoch": 0.05843881856540084, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.99783620803016e-07, |
| "loss": 0.287087619304657, |
| "step": 277, |
| "token_acc": 0.9255798969072165 |
| }, |
| { |
| "epoch": 0.05864978902953587, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.997786183502584e-07, |
| "loss": 0.23424138128757477, |
| "step": 278, |
| "token_acc": 0.924812030075188 |
| }, |
| { |
| "epoch": 0.05886075949367089, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.997735587435858e-07, |
| "loss": 0.25225332379341125, |
| "step": 279, |
| "token_acc": 0.9257325210327821 |
| }, |
| { |
| "epoch": 0.05907172995780591, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.997684419835767e-07, |
| "loss": 0.24867427349090576, |
| "step": 280, |
| "token_acc": 0.9290465631929047 |
| }, |
| { |
| "epoch": 0.05928270042194093, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.997632680708163e-07, |
| "loss": 0.2555754482746124, |
| "step": 281, |
| "token_acc": 0.9300212056952438 |
| }, |
| { |
| "epoch": 0.05949367088607595, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.99758037005896e-07, |
| "loss": 0.25050830841064453, |
| "step": 282, |
| "token_acc": 0.9328039095907147 |
| }, |
| { |
| "epoch": 0.05970464135021097, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.997527487894144e-07, |
| "loss": 0.264704167842865, |
| "step": 283, |
| "token_acc": 0.9269878805793674 |
| }, |
| { |
| "epoch": 0.059915611814345994, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.997474034219762e-07, |
| "loss": 0.29550492763519287, |
| "step": 284, |
| "token_acc": 0.9211438474870017 |
| }, |
| { |
| "epoch": 0.060126582278481014, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.997420009041927e-07, |
| "loss": 0.264403373003006, |
| "step": 285, |
| "token_acc": 0.9260048721071864 |
| }, |
| { |
| "epoch": 0.060337552742616034, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.997365412366812e-07, |
| "loss": 0.2595897316932678, |
| "step": 286, |
| "token_acc": 0.9286173633440514 |
| }, |
| { |
| "epoch": 0.060548523206751054, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.997310244200667e-07, |
| "loss": 0.23976776003837585, |
| "step": 287, |
| "token_acc": 0.9318894271872328 |
| }, |
| { |
| "epoch": 0.060759493670886074, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.997254504549799e-07, |
| "loss": 0.26183438301086426, |
| "step": 288, |
| "token_acc": 0.9322949777495232 |
| }, |
| { |
| "epoch": 0.0609704641350211, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.99719819342058e-07, |
| "loss": 0.24600914120674133, |
| "step": 289, |
| "token_acc": 0.9311287236949987 |
| }, |
| { |
| "epoch": 0.06118143459915612, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.997141310819454e-07, |
| "loss": 0.3296029567718506, |
| "step": 290, |
| "token_acc": 0.9126184834123223 |
| }, |
| { |
| "epoch": 0.06139240506329114, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.997083856752923e-07, |
| "loss": 0.2794192433357239, |
| "step": 291, |
| "token_acc": 0.9190751445086706 |
| }, |
| { |
| "epoch": 0.06160337552742616, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.997025831227557e-07, |
| "loss": 0.23178298771381378, |
| "step": 292, |
| "token_acc": 0.9380645161290323 |
| }, |
| { |
| "epoch": 0.06181434599156118, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.996967234249994e-07, |
| "loss": 0.2989250123500824, |
| "step": 293, |
| "token_acc": 0.9201101928374655 |
| }, |
| { |
| "epoch": 0.0620253164556962, |
| "grad_norm": 0.625, |
| "learning_rate": 9.996908065826935e-07, |
| "loss": 0.20801636576652527, |
| "step": 294, |
| "token_acc": 0.9374828626268166 |
| }, |
| { |
| "epoch": 0.06223628691983123, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.996848325965142e-07, |
| "loss": 0.2513968050479889, |
| "step": 295, |
| "token_acc": 0.9286151960784313 |
| }, |
| { |
| "epoch": 0.06244725738396625, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.99678801467145e-07, |
| "loss": 0.23670442402362823, |
| "step": 296, |
| "token_acc": 0.9297945205479452 |
| }, |
| { |
| "epoch": 0.06265822784810127, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.99672713195276e-07, |
| "loss": 0.3005760908126831, |
| "step": 297, |
| "token_acc": 0.9181008902077151 |
| }, |
| { |
| "epoch": 0.0628691983122363, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.996665677816027e-07, |
| "loss": 0.2198331356048584, |
| "step": 298, |
| "token_acc": 0.934411226357535 |
| }, |
| { |
| "epoch": 0.0630801687763713, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.996603652268283e-07, |
| "loss": 0.22930385172367096, |
| "step": 299, |
| "token_acc": 0.9343891402714932 |
| }, |
| { |
| "epoch": 0.06329113924050633, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.99654105531662e-07, |
| "loss": 0.26769182085990906, |
| "step": 300, |
| "token_acc": 0.9295731707317073 |
| }, |
| { |
| "epoch": 0.06350210970464135, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.9964778869682e-07, |
| "loss": 0.2113886922597885, |
| "step": 301, |
| "token_acc": 0.9383966244725739 |
| }, |
| { |
| "epoch": 0.06371308016877637, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.996414147230242e-07, |
| "loss": 0.2549387812614441, |
| "step": 302, |
| "token_acc": 0.9245056920311564 |
| }, |
| { |
| "epoch": 0.06392405063291139, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.996349836110035e-07, |
| "loss": 0.24877741932868958, |
| "step": 303, |
| "token_acc": 0.9278890600924499 |
| }, |
| { |
| "epoch": 0.06413502109704641, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.996284953614938e-07, |
| "loss": 0.2965357303619385, |
| "step": 304, |
| "token_acc": 0.9167351410572446 |
| }, |
| { |
| "epoch": 0.06434599156118144, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.996219499752365e-07, |
| "loss": 0.21444806456565857, |
| "step": 305, |
| "token_acc": 0.938101788170564 |
| }, |
| { |
| "epoch": 0.06455696202531645, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.996153474529807e-07, |
| "loss": 0.24650560319423676, |
| "step": 306, |
| "token_acc": 0.928284854563691 |
| }, |
| { |
| "epoch": 0.06476793248945148, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.996086877954812e-07, |
| "loss": 0.26447594165802, |
| "step": 307, |
| "token_acc": 0.9272459499263623 |
| }, |
| { |
| "epoch": 0.06497890295358649, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.996019710034997e-07, |
| "loss": 0.22312676906585693, |
| "step": 308, |
| "token_acc": 0.9304399524375743 |
| }, |
| { |
| "epoch": 0.06518987341772152, |
| "grad_norm": 0.75, |
| "learning_rate": 9.99595197077804e-07, |
| "loss": 0.3124806582927704, |
| "step": 309, |
| "token_acc": 0.9134506242905789 |
| }, |
| { |
| "epoch": 0.06540084388185655, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.99588366019169e-07, |
| "loss": 0.21681739389896393, |
| "step": 310, |
| "token_acc": 0.9393859879296772 |
| }, |
| { |
| "epoch": 0.06561181434599156, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.99581477828376e-07, |
| "loss": 0.262542188167572, |
| "step": 311, |
| "token_acc": 0.9339063426200356 |
| }, |
| { |
| "epoch": 0.06582278481012659, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.995745325062126e-07, |
| "loss": 0.24062800407409668, |
| "step": 312, |
| "token_acc": 0.9331594391913922 |
| }, |
| { |
| "epoch": 0.0660337552742616, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.995675300534729e-07, |
| "loss": 0.26486438512802124, |
| "step": 313, |
| "token_acc": 0.9250824093497153 |
| }, |
| { |
| "epoch": 0.06624472573839663, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.995604704709578e-07, |
| "loss": 0.18465927243232727, |
| "step": 314, |
| "token_acc": 0.9464985994397759 |
| }, |
| { |
| "epoch": 0.06645569620253164, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.99553353759475e-07, |
| "loss": 0.2520803213119507, |
| "step": 315, |
| "token_acc": 0.9257213014119091 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.995461799198378e-07, |
| "loss": 0.29753151535987854, |
| "step": 316, |
| "token_acc": 0.9256516587677726 |
| }, |
| { |
| "epoch": 0.06687763713080169, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.995389489528667e-07, |
| "loss": 0.2546613812446594, |
| "step": 317, |
| "token_acc": 0.9310240048617442 |
| }, |
| { |
| "epoch": 0.0670886075949367, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.995316608593886e-07, |
| "loss": 0.24808946251869202, |
| "step": 318, |
| "token_acc": 0.9316290130796671 |
| }, |
| { |
| "epoch": 0.06729957805907173, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.995243156402374e-07, |
| "loss": 0.2512444853782654, |
| "step": 319, |
| "token_acc": 0.9294703723125328 |
| }, |
| { |
| "epoch": 0.06751054852320675, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.995169132962527e-07, |
| "loss": 0.2597760260105133, |
| "step": 320, |
| "token_acc": 0.9277310924369748 |
| }, |
| { |
| "epoch": 0.06772151898734177, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.99509453828281e-07, |
| "loss": 0.2552398443222046, |
| "step": 321, |
| "token_acc": 0.9278263321116437 |
| }, |
| { |
| "epoch": 0.0679324894514768, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.995019372371754e-07, |
| "loss": 0.29060834646224976, |
| "step": 322, |
| "token_acc": 0.9247988807275271 |
| }, |
| { |
| "epoch": 0.06814345991561181, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.994943635237955e-07, |
| "loss": 0.21358612179756165, |
| "step": 323, |
| "token_acc": 0.9360210341805434 |
| }, |
| { |
| "epoch": 0.06835443037974684, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.994867326890078e-07, |
| "loss": 0.2634425759315491, |
| "step": 324, |
| "token_acc": 0.9219944937289691 |
| }, |
| { |
| "epoch": 0.06856540084388185, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.994790447336842e-07, |
| "loss": 0.3185754120349884, |
| "step": 325, |
| "token_acc": 0.9133514986376022 |
| }, |
| { |
| "epoch": 0.06877637130801688, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.994712996587044e-07, |
| "loss": 0.29746031761169434, |
| "step": 326, |
| "token_acc": 0.9256002705444707 |
| }, |
| { |
| "epoch": 0.0689873417721519, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.994634974649541e-07, |
| "loss": 0.29588472843170166, |
| "step": 327, |
| "token_acc": 0.9272846380609236 |
| }, |
| { |
| "epoch": 0.06919831223628692, |
| "grad_norm": 0.875, |
| "learning_rate": 9.994556381533252e-07, |
| "loss": 0.277068167924881, |
| "step": 328, |
| "token_acc": 0.922756981580511 |
| }, |
| { |
| "epoch": 0.06940928270042195, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.994477217247168e-07, |
| "loss": 0.27129507064819336, |
| "step": 329, |
| "token_acc": 0.9270741068792442 |
| }, |
| { |
| "epoch": 0.06962025316455696, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.994397481800342e-07, |
| "loss": 0.24473360180854797, |
| "step": 330, |
| "token_acc": 0.9313361611876988 |
| }, |
| { |
| "epoch": 0.06983122362869199, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.994317175201893e-07, |
| "loss": 0.22818127274513245, |
| "step": 331, |
| "token_acc": 0.9380833851897946 |
| }, |
| { |
| "epoch": 0.070042194092827, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.994236297461003e-07, |
| "loss": 0.262783944606781, |
| "step": 332, |
| "token_acc": 0.9235968263297091 |
| }, |
| { |
| "epoch": 0.07025316455696203, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.994154848586919e-07, |
| "loss": 0.24861930310726166, |
| "step": 333, |
| "token_acc": 0.930621342992477 |
| }, |
| { |
| "epoch": 0.07046413502109705, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.99407282858896e-07, |
| "loss": 0.26165515184402466, |
| "step": 334, |
| "token_acc": 0.9247956403269755 |
| }, |
| { |
| "epoch": 0.07067510548523206, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.993990237476504e-07, |
| "loss": 0.23681169748306274, |
| "step": 335, |
| "token_acc": 0.9285078611687927 |
| }, |
| { |
| "epoch": 0.07088607594936709, |
| "grad_norm": 1.65625, |
| "learning_rate": 9.993907075258994e-07, |
| "loss": 0.2824210524559021, |
| "step": 336, |
| "token_acc": 0.925273390036452 |
| }, |
| { |
| "epoch": 0.0710970464135021, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.993823341945942e-07, |
| "loss": 0.2578677535057068, |
| "step": 337, |
| "token_acc": 0.9232230059685296 |
| }, |
| { |
| "epoch": 0.07130801687763713, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.993739037546924e-07, |
| "loss": 0.25358960032463074, |
| "step": 338, |
| "token_acc": 0.9312054539820267 |
| }, |
| { |
| "epoch": 0.07151898734177216, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.99365416207158e-07, |
| "loss": 0.2980523705482483, |
| "step": 339, |
| "token_acc": 0.9155807365439094 |
| }, |
| { |
| "epoch": 0.07172995780590717, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.993568715529616e-07, |
| "loss": 0.29448622465133667, |
| "step": 340, |
| "token_acc": 0.9224402207234825 |
| }, |
| { |
| "epoch": 0.0719409282700422, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.993482697930805e-07, |
| "loss": 0.2686302065849304, |
| "step": 341, |
| "token_acc": 0.9284134881149807 |
| }, |
| { |
| "epoch": 0.07215189873417721, |
| "grad_norm": 0.75, |
| "learning_rate": 9.993396109284985e-07, |
| "loss": 0.2800794839859009, |
| "step": 342, |
| "token_acc": 0.9271758436944938 |
| }, |
| { |
| "epoch": 0.07236286919831224, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.993308949602054e-07, |
| "loss": 0.2576884329319, |
| "step": 343, |
| "token_acc": 0.9227618490345231 |
| }, |
| { |
| "epoch": 0.07257383966244725, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.993221218891982e-07, |
| "loss": 0.24451857805252075, |
| "step": 344, |
| "token_acc": 0.933461117196057 |
| }, |
| { |
| "epoch": 0.07278481012658228, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.993132917164801e-07, |
| "loss": 0.2957763075828552, |
| "step": 345, |
| "token_acc": 0.9123943661971831 |
| }, |
| { |
| "epoch": 0.0729957805907173, |
| "grad_norm": 1.6875, |
| "learning_rate": 9.99304404443061e-07, |
| "loss": 0.253467321395874, |
| "step": 346, |
| "token_acc": 0.9318112633181126 |
| }, |
| { |
| "epoch": 0.07320675105485232, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.99295460069957e-07, |
| "loss": 0.2847754955291748, |
| "step": 347, |
| "token_acc": 0.9224688355123137 |
| }, |
| { |
| "epoch": 0.07341772151898734, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.992864585981913e-07, |
| "loss": 0.25176408886909485, |
| "step": 348, |
| "token_acc": 0.9317745035233824 |
| }, |
| { |
| "epoch": 0.07362869198312236, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.99277400028793e-07, |
| "loss": 0.282976359128952, |
| "step": 349, |
| "token_acc": 0.9220568335588634 |
| }, |
| { |
| "epoch": 0.07383966244725738, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.992682843627984e-07, |
| "loss": 0.2807369530200958, |
| "step": 350, |
| "token_acc": 0.9215148188803512 |
| }, |
| { |
| "epoch": 0.07405063291139241, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.992591116012495e-07, |
| "loss": 0.2882058322429657, |
| "step": 351, |
| "token_acc": 0.9216602528862012 |
| }, |
| { |
| "epoch": 0.07426160337552742, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.992498817451955e-07, |
| "loss": 0.27112358808517456, |
| "step": 352, |
| "token_acc": 0.9312214611872146 |
| }, |
| { |
| "epoch": 0.07447257383966245, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.99240594795692e-07, |
| "loss": 0.25564056634902954, |
| "step": 353, |
| "token_acc": 0.9308086560364465 |
| }, |
| { |
| "epoch": 0.07468354430379746, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.99231250753801e-07, |
| "loss": 0.21060852706432343, |
| "step": 354, |
| "token_acc": 0.9366489046773239 |
| }, |
| { |
| "epoch": 0.07489451476793249, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.992218496205908e-07, |
| "loss": 0.23291520774364471, |
| "step": 355, |
| "token_acc": 0.9379619852164731 |
| }, |
| { |
| "epoch": 0.0751054852320675, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.99212391397137e-07, |
| "loss": 0.23014740645885468, |
| "step": 356, |
| "token_acc": 0.930849478390462 |
| }, |
| { |
| "epoch": 0.07531645569620253, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.992028760845207e-07, |
| "loss": 0.2653324604034424, |
| "step": 357, |
| "token_acc": 0.9264833574529667 |
| }, |
| { |
| "epoch": 0.07552742616033756, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.991933036838303e-07, |
| "loss": 0.23712849617004395, |
| "step": 358, |
| "token_acc": 0.9348139601961349 |
| }, |
| { |
| "epoch": 0.07573839662447257, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.991836741961605e-07, |
| "loss": 0.24832651019096375, |
| "step": 359, |
| "token_acc": 0.9297736506094022 |
| }, |
| { |
| "epoch": 0.0759493670886076, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.991739876226127e-07, |
| "loss": 0.30170413851737976, |
| "step": 360, |
| "token_acc": 0.9175753688261706 |
| }, |
| { |
| "epoch": 0.07616033755274261, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.991642439642944e-07, |
| "loss": 0.2096886932849884, |
| "step": 361, |
| "token_acc": 0.9416713404374649 |
| }, |
| { |
| "epoch": 0.07637130801687764, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.991544432223198e-07, |
| "loss": 0.24230161309242249, |
| "step": 362, |
| "token_acc": 0.9317358595709111 |
| }, |
| { |
| "epoch": 0.07658227848101266, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.991445853978098e-07, |
| "loss": 0.2464846670627594, |
| "step": 363, |
| "token_acc": 0.9277708592777086 |
| }, |
| { |
| "epoch": 0.07679324894514768, |
| "grad_norm": 0.99609375, |
| "learning_rate": 9.991346704918918e-07, |
| "loss": 0.25032496452331543, |
| "step": 364, |
| "token_acc": 0.931261207411835 |
| }, |
| { |
| "epoch": 0.0770042194092827, |
| "grad_norm": 1.0, |
| "learning_rate": 9.991246985056995e-07, |
| "loss": 0.3197912871837616, |
| "step": 365, |
| "token_acc": 0.9187062937062938 |
| }, |
| { |
| "epoch": 0.07721518987341772, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.991146694403733e-07, |
| "loss": 0.2740510404109955, |
| "step": 366, |
| "token_acc": 0.9192671056398511 |
| }, |
| { |
| "epoch": 0.07742616033755274, |
| "grad_norm": 0.875, |
| "learning_rate": 9.991045832970603e-07, |
| "loss": 0.29503384232521057, |
| "step": 367, |
| "token_acc": 0.919302394324564 |
| }, |
| { |
| "epoch": 0.07763713080168777, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.990944400769138e-07, |
| "loss": 0.27579015493392944, |
| "step": 368, |
| "token_acc": 0.9176502882239912 |
| }, |
| { |
| "epoch": 0.07784810126582278, |
| "grad_norm": 0.8828125, |
| "learning_rate": 9.99084239781094e-07, |
| "loss": 0.2694048583507538, |
| "step": 369, |
| "token_acc": 0.925148762918885 |
| }, |
| { |
| "epoch": 0.07805907172995781, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.990739824107669e-07, |
| "loss": 0.2885046601295471, |
| "step": 370, |
| "token_acc": 0.9219858156028369 |
| }, |
| { |
| "epoch": 0.07827004219409282, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.99063667967106e-07, |
| "loss": 0.2373015433549881, |
| "step": 371, |
| "token_acc": 0.9304477611940298 |
| }, |
| { |
| "epoch": 0.07848101265822785, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.990532964512901e-07, |
| "loss": 0.29645416140556335, |
| "step": 372, |
| "token_acc": 0.918646080760095 |
| }, |
| { |
| "epoch": 0.07869198312236286, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.990428678645062e-07, |
| "loss": 0.24266409873962402, |
| "step": 373, |
| "token_acc": 0.9363528715216104 |
| }, |
| { |
| "epoch": 0.07890295358649789, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.990323822079464e-07, |
| "loss": 0.2219400256872177, |
| "step": 374, |
| "token_acc": 0.9366262814538676 |
| }, |
| { |
| "epoch": 0.07911392405063292, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.9902183948281e-07, |
| "loss": 0.2171144187450409, |
| "step": 375, |
| "token_acc": 0.937206572769953 |
| }, |
| { |
| "epoch": 0.07932489451476793, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.990112396903027e-07, |
| "loss": 0.23284628987312317, |
| "step": 376, |
| "token_acc": 0.9356833642547928 |
| }, |
| { |
| "epoch": 0.07953586497890296, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.990005828316363e-07, |
| "loss": 0.26610440015792847, |
| "step": 377, |
| "token_acc": 0.9276832460732984 |
| }, |
| { |
| "epoch": 0.07974683544303797, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.989898689080299e-07, |
| "loss": 0.19865182042121887, |
| "step": 378, |
| "token_acc": 0.946978672985782 |
| }, |
| { |
| "epoch": 0.079957805907173, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.989790979207085e-07, |
| "loss": 0.2547116279602051, |
| "step": 379, |
| "token_acc": 0.9301753306674869 |
| }, |
| { |
| "epoch": 0.08016877637130802, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.98968269870904e-07, |
| "loss": 0.2702917456626892, |
| "step": 380, |
| "token_acc": 0.9255110613273593 |
| }, |
| { |
| "epoch": 0.08037974683544304, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.989573847598545e-07, |
| "loss": 0.24545586109161377, |
| "step": 381, |
| "token_acc": 0.9353233830845771 |
| }, |
| { |
| "epoch": 0.08059071729957806, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.98946442588805e-07, |
| "loss": 0.25997835397720337, |
| "step": 382, |
| "token_acc": 0.9318181818181818 |
| }, |
| { |
| "epoch": 0.08080168776371308, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.989354433590067e-07, |
| "loss": 0.2865683436393738, |
| "step": 383, |
| "token_acc": 0.9229352164568622 |
| }, |
| { |
| "epoch": 0.0810126582278481, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.989243870717174e-07, |
| "loss": 0.25773969292640686, |
| "step": 384, |
| "token_acc": 0.9284097340124505 |
| }, |
| { |
| "epoch": 0.08122362869198312, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.989132737282015e-07, |
| "loss": 0.2665586471557617, |
| "step": 385, |
| "token_acc": 0.92599672310213 |
| }, |
| { |
| "epoch": 0.08143459915611814, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.989021033297302e-07, |
| "loss": 0.29251331090927124, |
| "step": 386, |
| "token_acc": 0.9255349500713267 |
| }, |
| { |
| "epoch": 0.08164556962025317, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.988908758775807e-07, |
| "loss": 0.31350889801979065, |
| "step": 387, |
| "token_acc": 0.9161179501860864 |
| }, |
| { |
| "epoch": 0.08185654008438818, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.98879591373037e-07, |
| "loss": 0.2779198884963989, |
| "step": 388, |
| "token_acc": 0.9238483234095894 |
| }, |
| { |
| "epoch": 0.08206751054852321, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.988682498173895e-07, |
| "loss": 0.22718225419521332, |
| "step": 389, |
| "token_acc": 0.9420247204237787 |
| }, |
| { |
| "epoch": 0.08227848101265822, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.98856851211935e-07, |
| "loss": 0.22414159774780273, |
| "step": 390, |
| "token_acc": 0.9357366771159875 |
| }, |
| { |
| "epoch": 0.08248945147679325, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.988453955579776e-07, |
| "loss": 0.2700081467628479, |
| "step": 391, |
| "token_acc": 0.9235555555555556 |
| }, |
| { |
| "epoch": 0.08270042194092828, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.98833882856827e-07, |
| "loss": 0.24140852689743042, |
| "step": 392, |
| "token_acc": 0.9277988101676582 |
| }, |
| { |
| "epoch": 0.08291139240506329, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.988223131097996e-07, |
| "loss": 0.28851526975631714, |
| "step": 393, |
| "token_acc": 0.9173528514791095 |
| }, |
| { |
| "epoch": 0.08312236286919832, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.98810686318219e-07, |
| "loss": 0.2673947215080261, |
| "step": 394, |
| "token_acc": 0.9220706930141943 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.98799002483414e-07, |
| "loss": 0.27388495206832886, |
| "step": 395, |
| "token_acc": 0.9260257562144355 |
| }, |
| { |
| "epoch": 0.08354430379746836, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.987872616067216e-07, |
| "loss": 0.2556672692298889, |
| "step": 396, |
| "token_acc": 0.9255903349807798 |
| }, |
| { |
| "epoch": 0.08375527426160338, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.987754636894843e-07, |
| "loss": 0.32614314556121826, |
| "step": 397, |
| "token_acc": 0.9155054847316929 |
| }, |
| { |
| "epoch": 0.0839662447257384, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.987636087330509e-07, |
| "loss": 0.23008616268634796, |
| "step": 398, |
| "token_acc": 0.9367875647668393 |
| }, |
| { |
| "epoch": 0.08417721518987342, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.987516967387775e-07, |
| "loss": 0.2754250764846802, |
| "step": 399, |
| "token_acc": 0.9202546998180715 |
| }, |
| { |
| "epoch": 0.08438818565400844, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.98739727708026e-07, |
| "loss": 0.23332414031028748, |
| "step": 400, |
| "token_acc": 0.9356028368794326 |
| }, |
| { |
| "epoch": 0.08438818565400844, |
| "eval_loss": 0.43364420533180237, |
| "eval_runtime": 245.8014, |
| "eval_samples_per_second": 137.123, |
| "eval_steps_per_second": 2.144, |
| "eval_token_acc": 0.8990711657542853, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08459915611814346, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.987277016421654e-07, |
| "loss": 0.2699899673461914, |
| "step": 401, |
| "token_acc": 0.9288135593220339 |
| }, |
| { |
| "epoch": 0.08481012658227848, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.98715618542571e-07, |
| "loss": 0.25560492277145386, |
| "step": 402, |
| "token_acc": 0.9252772913018097 |
| }, |
| { |
| "epoch": 0.0850210970464135, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.987034784106244e-07, |
| "loss": 0.3024590015411377, |
| "step": 403, |
| "token_acc": 0.9186206896551724 |
| }, |
| { |
| "epoch": 0.08523206751054853, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.98691281247714e-07, |
| "loss": 0.2880774438381195, |
| "step": 404, |
| "token_acc": 0.9188865609099072 |
| }, |
| { |
| "epoch": 0.08544303797468354, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.986790270552347e-07, |
| "loss": 0.2641194760799408, |
| "step": 405, |
| "token_acc": 0.9306480920654149 |
| }, |
| { |
| "epoch": 0.08565400843881857, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.98666715834588e-07, |
| "loss": 0.25727787613868713, |
| "step": 406, |
| "token_acc": 0.9288548752834467 |
| }, |
| { |
| "epoch": 0.08586497890295358, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.986543475871818e-07, |
| "loss": 0.2398534119129181, |
| "step": 407, |
| "token_acc": 0.9377962085308057 |
| }, |
| { |
| "epoch": 0.08607594936708861, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.986419223144302e-07, |
| "loss": 0.25430333614349365, |
| "step": 408, |
| "token_acc": 0.9305912596401028 |
| }, |
| { |
| "epoch": 0.08628691983122364, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.986294400177544e-07, |
| "loss": 0.20513233542442322, |
| "step": 409, |
| "token_acc": 0.9459538416593631 |
| }, |
| { |
| "epoch": 0.08649789029535865, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.986169006985817e-07, |
| "loss": 0.20912200212478638, |
| "step": 410, |
| "token_acc": 0.9390818128310771 |
| }, |
| { |
| "epoch": 0.08670886075949367, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.986043043583462e-07, |
| "loss": 0.2466573864221573, |
| "step": 411, |
| "token_acc": 0.9298196948682386 |
| }, |
| { |
| "epoch": 0.08691983122362869, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.98591650998488e-07, |
| "loss": 0.23664775490760803, |
| "step": 412, |
| "token_acc": 0.9329004329004329 |
| }, |
| { |
| "epoch": 0.08713080168776371, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.985789406204547e-07, |
| "loss": 0.23415768146514893, |
| "step": 413, |
| "token_acc": 0.9317912218268091 |
| }, |
| { |
| "epoch": 0.08734177215189873, |
| "grad_norm": 0.75, |
| "learning_rate": 9.985661732256998e-07, |
| "loss": 0.2954852283000946, |
| "step": 414, |
| "token_acc": 0.9218203033838973 |
| }, |
| { |
| "epoch": 0.08755274261603375, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.98553348815683e-07, |
| "loss": 0.2565650939941406, |
| "step": 415, |
| "token_acc": 0.9298298906439855 |
| }, |
| { |
| "epoch": 0.08776371308016878, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.98540467391871e-07, |
| "loss": 0.22425369918346405, |
| "step": 416, |
| "token_acc": 0.9359098228663446 |
| }, |
| { |
| "epoch": 0.0879746835443038, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.98527528955737e-07, |
| "loss": 0.2637927532196045, |
| "step": 417, |
| "token_acc": 0.9261443414771132 |
| }, |
| { |
| "epoch": 0.08818565400843882, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.985145335087605e-07, |
| "loss": 0.27013063430786133, |
| "step": 418, |
| "token_acc": 0.9248719408081958 |
| }, |
| { |
| "epoch": 0.08839662447257383, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.985014810524278e-07, |
| "loss": 0.25381606817245483, |
| "step": 419, |
| "token_acc": 0.9342265529841657 |
| }, |
| { |
| "epoch": 0.08860759493670886, |
| "grad_norm": 1.59375, |
| "learning_rate": 9.984883715882315e-07, |
| "loss": 0.2093265801668167, |
| "step": 420, |
| "token_acc": 0.936447410231967 |
| }, |
| { |
| "epoch": 0.08881856540084389, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.984752051176707e-07, |
| "loss": 0.2633010447025299, |
| "step": 421, |
| "token_acc": 0.9257308401369502 |
| }, |
| { |
| "epoch": 0.0890295358649789, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.98461981642251e-07, |
| "loss": 0.28037169575691223, |
| "step": 422, |
| "token_acc": 0.9264617239300783 |
| }, |
| { |
| "epoch": 0.08924050632911393, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.984487011634848e-07, |
| "loss": 0.23874756693840027, |
| "step": 423, |
| "token_acc": 0.9345043167609407 |
| }, |
| { |
| "epoch": 0.08945147679324894, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.984353636828908e-07, |
| "loss": 0.2935020923614502, |
| "step": 424, |
| "token_acc": 0.9228658536585366 |
| }, |
| { |
| "epoch": 0.08966244725738397, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.984219692019943e-07, |
| "loss": 0.2578403949737549, |
| "step": 425, |
| "token_acc": 0.9282193468884782 |
| }, |
| { |
| "epoch": 0.08987341772151898, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.98408517722327e-07, |
| "loss": 0.24426788091659546, |
| "step": 426, |
| "token_acc": 0.9371653987038603 |
| }, |
| { |
| "epoch": 0.09008438818565401, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.983950092454272e-07, |
| "loss": 0.2677040994167328, |
| "step": 427, |
| "token_acc": 0.9215219976218787 |
| }, |
| { |
| "epoch": 0.09029535864978903, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.983814437728396e-07, |
| "loss": 0.2604065239429474, |
| "step": 428, |
| "token_acc": 0.9289383561643836 |
| }, |
| { |
| "epoch": 0.09050632911392405, |
| "grad_norm": 1.3125, |
| "learning_rate": 9.983678213061157e-07, |
| "loss": 0.24889585375785828, |
| "step": 429, |
| "token_acc": 0.9251644736842105 |
| }, |
| { |
| "epoch": 0.09071729957805907, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.983541418468134e-07, |
| "loss": 0.2805905342102051, |
| "step": 430, |
| "token_acc": 0.9248591108328115 |
| }, |
| { |
| "epoch": 0.09092827004219409, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.983404053964967e-07, |
| "loss": 0.2725668251514435, |
| "step": 431, |
| "token_acc": 0.9280293116985082 |
| }, |
| { |
| "epoch": 0.09113924050632911, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.98326611956737e-07, |
| "loss": 0.3275222182273865, |
| "step": 432, |
| "token_acc": 0.9095477386934674 |
| }, |
| { |
| "epoch": 0.09135021097046414, |
| "grad_norm": 0.91015625, |
| "learning_rate": 9.98312761529111e-07, |
| "loss": 0.27813225984573364, |
| "step": 433, |
| "token_acc": 0.9216018048505358 |
| }, |
| { |
| "epoch": 0.09156118143459915, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.982988541152036e-07, |
| "loss": 0.2637915015220642, |
| "step": 434, |
| "token_acc": 0.9244929797191888 |
| }, |
| { |
| "epoch": 0.09177215189873418, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.982848897166042e-07, |
| "loss": 0.2686794102191925, |
| "step": 435, |
| "token_acc": 0.9231661351116266 |
| }, |
| { |
| "epoch": 0.0919831223628692, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.982708683349105e-07, |
| "loss": 0.24819687008857727, |
| "step": 436, |
| "token_acc": 0.9376739009460211 |
| }, |
| { |
| "epoch": 0.09219409282700422, |
| "grad_norm": 0.75, |
| "learning_rate": 9.982567899717256e-07, |
| "loss": 0.25789859890937805, |
| "step": 437, |
| "token_acc": 0.9243027888446215 |
| }, |
| { |
| "epoch": 0.09240506329113925, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.982426546286596e-07, |
| "loss": 0.2573246359825134, |
| "step": 438, |
| "token_acc": 0.9290590679726922 |
| }, |
| { |
| "epoch": 0.09261603375527426, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.98228462307329e-07, |
| "loss": 0.2979215383529663, |
| "step": 439, |
| "token_acc": 0.9225908372827805 |
| }, |
| { |
| "epoch": 0.09282700421940929, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.982142130093566e-07, |
| "loss": 0.2403128445148468, |
| "step": 440, |
| "token_acc": 0.9290377519159807 |
| }, |
| { |
| "epoch": 0.0930379746835443, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.98199906736372e-07, |
| "loss": 0.2767883241176605, |
| "step": 441, |
| "token_acc": 0.9238754325259516 |
| }, |
| { |
| "epoch": 0.09324894514767933, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.981855434900115e-07, |
| "loss": 0.25662270188331604, |
| "step": 442, |
| "token_acc": 0.9294367050272562 |
| }, |
| { |
| "epoch": 0.09345991561181434, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.981711232719175e-07, |
| "loss": 0.24665901064872742, |
| "step": 443, |
| "token_acc": 0.9237830319888735 |
| }, |
| { |
| "epoch": 0.09367088607594937, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.98156646083739e-07, |
| "loss": 0.23571115732192993, |
| "step": 444, |
| "token_acc": 0.9281145293938471 |
| }, |
| { |
| "epoch": 0.0938818565400844, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.981421119271316e-07, |
| "loss": 0.2607622742652893, |
| "step": 445, |
| "token_acc": 0.9253941441441441 |
| }, |
| { |
| "epoch": 0.0940928270042194, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.981275208037575e-07, |
| "loss": 0.2898206114768982, |
| "step": 446, |
| "token_acc": 0.918967587034814 |
| }, |
| { |
| "epoch": 0.09430379746835443, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.981128727152854e-07, |
| "loss": 0.2372782677412033, |
| "step": 447, |
| "token_acc": 0.9295408605255558 |
| }, |
| { |
| "epoch": 0.09451476793248945, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.980981676633903e-07, |
| "loss": 0.22987963259220123, |
| "step": 448, |
| "token_acc": 0.9354383986467437 |
| }, |
| { |
| "epoch": 0.09472573839662447, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.980834056497538e-07, |
| "loss": 0.26702481508255005, |
| "step": 449, |
| "token_acc": 0.9252548131370328 |
| }, |
| { |
| "epoch": 0.0949367088607595, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.98068586676064e-07, |
| "loss": 0.27268826961517334, |
| "step": 450, |
| "token_acc": 0.9244391971664699 |
| }, |
| { |
| "epoch": 0.09514767932489451, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.98053710744016e-07, |
| "loss": 0.22254578769207, |
| "step": 451, |
| "token_acc": 0.9397944199706314 |
| }, |
| { |
| "epoch": 0.09535864978902954, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.980387778553103e-07, |
| "loss": 0.2529526948928833, |
| "step": 452, |
| "token_acc": 0.9301044083526682 |
| }, |
| { |
| "epoch": 0.09556962025316455, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.980237880116553e-07, |
| "loss": 0.2600526809692383, |
| "step": 453, |
| "token_acc": 0.9255893212155638 |
| }, |
| { |
| "epoch": 0.09578059071729958, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.980087412147648e-07, |
| "loss": 0.2552299499511719, |
| "step": 454, |
| "token_acc": 0.9276672694394213 |
| }, |
| { |
| "epoch": 0.09599156118143459, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.979936374663595e-07, |
| "loss": 0.28409841656684875, |
| "step": 455, |
| "token_acc": 0.9230544177881802 |
| }, |
| { |
| "epoch": 0.09620253164556962, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.979784767681668e-07, |
| "loss": 0.256397545337677, |
| "step": 456, |
| "token_acc": 0.9331357048748353 |
| }, |
| { |
| "epoch": 0.09641350210970465, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.979632591219207e-07, |
| "loss": 0.2313995659351349, |
| "step": 457, |
| "token_acc": 0.9336188436830836 |
| }, |
| { |
| "epoch": 0.09662447257383966, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.97947984529361e-07, |
| "loss": 0.2967644929885864, |
| "step": 458, |
| "token_acc": 0.9195630585898709 |
| }, |
| { |
| "epoch": 0.09683544303797469, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.979326529922348e-07, |
| "loss": 0.30269140005111694, |
| "step": 459, |
| "token_acc": 0.9189985272459499 |
| }, |
| { |
| "epoch": 0.0970464135021097, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.97917264512295e-07, |
| "loss": 0.2591363787651062, |
| "step": 460, |
| "token_acc": 0.9290578887627696 |
| }, |
| { |
| "epoch": 0.09725738396624473, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.979018190913018e-07, |
| "loss": 0.32560282945632935, |
| "step": 461, |
| "token_acc": 0.9178757980266976 |
| }, |
| { |
| "epoch": 0.09746835443037975, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.978863167310213e-07, |
| "loss": 0.2893942892551422, |
| "step": 462, |
| "token_acc": 0.924191063174114 |
| }, |
| { |
| "epoch": 0.09767932489451477, |
| "grad_norm": 0.625, |
| "learning_rate": 9.978707574332266e-07, |
| "loss": 0.2492993026971817, |
| "step": 463, |
| "token_acc": 0.9310970081595649 |
| }, |
| { |
| "epoch": 0.09789029535864979, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.978551411996967e-07, |
| "loss": 0.27646076679229736, |
| "step": 464, |
| "token_acc": 0.9283480238839921 |
| }, |
| { |
| "epoch": 0.0981012658227848, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.978394680322176e-07, |
| "loss": 0.22209137678146362, |
| "step": 465, |
| "token_acc": 0.9360902255639098 |
| }, |
| { |
| "epoch": 0.09831223628691983, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.978237379325818e-07, |
| "loss": 0.2588399648666382, |
| "step": 466, |
| "token_acc": 0.9257776408992916 |
| }, |
| { |
| "epoch": 0.09852320675105486, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.978079509025878e-07, |
| "loss": 0.3038383722305298, |
| "step": 467, |
| "token_acc": 0.9133605600933489 |
| }, |
| { |
| "epoch": 0.09873417721518987, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.977921069440415e-07, |
| "loss": 0.24923110008239746, |
| "step": 468, |
| "token_acc": 0.9317915690866511 |
| }, |
| { |
| "epoch": 0.0989451476793249, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.97776206058754e-07, |
| "loss": 0.23833706974983215, |
| "step": 469, |
| "token_acc": 0.9329593267882188 |
| }, |
| { |
| "epoch": 0.09915611814345991, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.977602482485445e-07, |
| "loss": 0.25747478008270264, |
| "step": 470, |
| "token_acc": 0.9295946357817738 |
| }, |
| { |
| "epoch": 0.09936708860759494, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.977442335152377e-07, |
| "loss": 0.2688140571117401, |
| "step": 471, |
| "token_acc": 0.9248041775456919 |
| }, |
| { |
| "epoch": 0.09957805907172995, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.977281618606649e-07, |
| "loss": 0.19290462136268616, |
| "step": 472, |
| "token_acc": 0.9412288512911843 |
| }, |
| { |
| "epoch": 0.09978902953586498, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.977120332866638e-07, |
| "loss": 0.24847334623336792, |
| "step": 473, |
| "token_acc": 0.9335578689528475 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.976958477950794e-07, |
| "loss": 0.24599069356918335, |
| "step": 474, |
| "token_acc": 0.9284507042253521 |
| }, |
| { |
| "epoch": 0.10021097046413502, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.976796053877622e-07, |
| "loss": 0.2468043714761734, |
| "step": 475, |
| "token_acc": 0.9286099137931034 |
| }, |
| { |
| "epoch": 0.10042194092827005, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.976633060665697e-07, |
| "loss": 0.2741178572177887, |
| "step": 476, |
| "token_acc": 0.9224250325945241 |
| }, |
| { |
| "epoch": 0.10063291139240506, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.97646949833366e-07, |
| "loss": 0.23784510791301727, |
| "step": 477, |
| "token_acc": 0.9328483491885842 |
| }, |
| { |
| "epoch": 0.10084388185654009, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.976305366900216e-07, |
| "loss": 0.23838309943675995, |
| "step": 478, |
| "token_acc": 0.9320939839917377 |
| }, |
| { |
| "epoch": 0.10105485232067511, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.976140666384134e-07, |
| "loss": 0.2787632346153259, |
| "step": 479, |
| "token_acc": 0.9210836277974087 |
| }, |
| { |
| "epoch": 0.10126582278481013, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.97597539680425e-07, |
| "loss": 0.2557927370071411, |
| "step": 480, |
| "token_acc": 0.9304747320061256 |
| }, |
| { |
| "epoch": 0.10147679324894515, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.975809558179463e-07, |
| "loss": 0.2617788314819336, |
| "step": 481, |
| "token_acc": 0.9297163995067818 |
| }, |
| { |
| "epoch": 0.10168776371308016, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.975643150528737e-07, |
| "loss": 0.24790287017822266, |
| "step": 482, |
| "token_acc": 0.9321890827236916 |
| }, |
| { |
| "epoch": 0.10189873417721519, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.975476173871102e-07, |
| "loss": 0.22530625760555267, |
| "step": 483, |
| "token_acc": 0.9375520399666945 |
| }, |
| { |
| "epoch": 0.1021097046413502, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.975308628225657e-07, |
| "loss": 0.23794007301330566, |
| "step": 484, |
| "token_acc": 0.9279202279202279 |
| }, |
| { |
| "epoch": 0.10232067510548523, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.975140513611558e-07, |
| "loss": 0.2270554155111313, |
| "step": 485, |
| "token_acc": 0.9365750528541226 |
| }, |
| { |
| "epoch": 0.10253164556962026, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.974971830048033e-07, |
| "loss": 0.23995614051818848, |
| "step": 486, |
| "token_acc": 0.9316101238556812 |
| }, |
| { |
| "epoch": 0.10274261603375527, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.974802577554372e-07, |
| "loss": 0.2599806487560272, |
| "step": 487, |
| "token_acc": 0.9297071129707113 |
| }, |
| { |
| "epoch": 0.1029535864978903, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.974632756149928e-07, |
| "loss": 0.2610231935977936, |
| "step": 488, |
| "token_acc": 0.9277639922801213 |
| }, |
| { |
| "epoch": 0.10316455696202531, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.974462365854124e-07, |
| "loss": 0.2433297038078308, |
| "step": 489, |
| "token_acc": 0.9279082468596396 |
| }, |
| { |
| "epoch": 0.10337552742616034, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.974291406686446e-07, |
| "loss": 0.21656793355941772, |
| "step": 490, |
| "token_acc": 0.9369074861065708 |
| }, |
| { |
| "epoch": 0.10358649789029536, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.974119878666442e-07, |
| "loss": 0.2721899151802063, |
| "step": 491, |
| "token_acc": 0.9287462605384824 |
| }, |
| { |
| "epoch": 0.10379746835443038, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.973947781813731e-07, |
| "loss": 0.25939926505088806, |
| "step": 492, |
| "token_acc": 0.9284844796104686 |
| }, |
| { |
| "epoch": 0.1040084388185654, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.973775116147992e-07, |
| "loss": 0.2712750732898712, |
| "step": 493, |
| "token_acc": 0.9242995689655172 |
| }, |
| { |
| "epoch": 0.10421940928270042, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.97360188168897e-07, |
| "loss": 0.2513953447341919, |
| "step": 494, |
| "token_acc": 0.929803328290469 |
| }, |
| { |
| "epoch": 0.10443037974683544, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.973428078456475e-07, |
| "loss": 0.2344273030757904, |
| "step": 495, |
| "token_acc": 0.9309220278683664 |
| }, |
| { |
| "epoch": 0.10464135021097046, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.973253706470388e-07, |
| "loss": 0.24709591269493103, |
| "step": 496, |
| "token_acc": 0.9282845668387837 |
| }, |
| { |
| "epoch": 0.10485232067510548, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.973078765750644e-07, |
| "loss": 0.26154980063438416, |
| "step": 497, |
| "token_acc": 0.9249655172413793 |
| }, |
| { |
| "epoch": 0.10506329113924051, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.972903256317251e-07, |
| "loss": 0.2260134369134903, |
| "step": 498, |
| "token_acc": 0.9395458566794456 |
| }, |
| { |
| "epoch": 0.10527426160337552, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.972727178190281e-07, |
| "loss": 0.33081650733947754, |
| "step": 499, |
| "token_acc": 0.9097568121886903 |
| }, |
| { |
| "epoch": 0.10548523206751055, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.97255053138987e-07, |
| "loss": 0.23815643787384033, |
| "step": 500, |
| "token_acc": 0.929639889196676 |
| }, |
| { |
| "epoch": 0.10569620253164556, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.972373315936218e-07, |
| "loss": 0.2648988962173462, |
| "step": 501, |
| "token_acc": 0.9245283018867925 |
| }, |
| { |
| "epoch": 0.10590717299578059, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.972195531849592e-07, |
| "loss": 0.2421625256538391, |
| "step": 502, |
| "token_acc": 0.9339531901250401 |
| }, |
| { |
| "epoch": 0.10611814345991562, |
| "grad_norm": 0.53125, |
| "learning_rate": 9.97201717915032e-07, |
| "loss": 0.2174941599369049, |
| "step": 503, |
| "token_acc": 0.9379822806516147 |
| }, |
| { |
| "epoch": 0.10632911392405063, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.971838257858804e-07, |
| "loss": 0.24187928438186646, |
| "step": 504, |
| "token_acc": 0.924503742271396 |
| }, |
| { |
| "epoch": 0.10654008438818566, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.9716587679955e-07, |
| "loss": 0.2514258921146393, |
| "step": 505, |
| "token_acc": 0.9230990783410138 |
| }, |
| { |
| "epoch": 0.10675105485232067, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.971478709580937e-07, |
| "loss": 0.282311350107193, |
| "step": 506, |
| "token_acc": 0.9201725997842503 |
| }, |
| { |
| "epoch": 0.1069620253164557, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.971298082635705e-07, |
| "loss": 0.2298332005739212, |
| "step": 507, |
| "token_acc": 0.936689779921616 |
| }, |
| { |
| "epoch": 0.10717299578059072, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.971116887180461e-07, |
| "loss": 0.26396387815475464, |
| "step": 508, |
| "token_acc": 0.9267986176562991 |
| }, |
| { |
| "epoch": 0.10738396624472574, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.970935123235926e-07, |
| "loss": 0.2639835476875305, |
| "step": 509, |
| "token_acc": 0.9236704326260677 |
| }, |
| { |
| "epoch": 0.10759493670886076, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.970752790822886e-07, |
| "loss": 0.27394697070121765, |
| "step": 510, |
| "token_acc": 0.9261565836298933 |
| }, |
| { |
| "epoch": 0.10780590717299578, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.97056988996219e-07, |
| "loss": 0.2229761779308319, |
| "step": 511, |
| "token_acc": 0.9351882160392798 |
| }, |
| { |
| "epoch": 0.1080168776371308, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.970386420674758e-07, |
| "loss": 0.26045358180999756, |
| "step": 512, |
| "token_acc": 0.9279547484370348 |
| }, |
| { |
| "epoch": 0.10822784810126582, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.97020238298157e-07, |
| "loss": 0.23026743531227112, |
| "step": 513, |
| "token_acc": 0.9363579080025205 |
| }, |
| { |
| "epoch": 0.10843881856540084, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.970017776903671e-07, |
| "loss": 0.2587951421737671, |
| "step": 514, |
| "token_acc": 0.9307073030477285 |
| }, |
| { |
| "epoch": 0.10864978902953587, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.969832602462174e-07, |
| "loss": 0.22050908207893372, |
| "step": 515, |
| "token_acc": 0.9343434343434344 |
| }, |
| { |
| "epoch": 0.10886075949367088, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.969646859678256e-07, |
| "loss": 0.25485992431640625, |
| "step": 516, |
| "token_acc": 0.9255828808687321 |
| }, |
| { |
| "epoch": 0.10907172995780591, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.969460548573156e-07, |
| "loss": 0.24492983520030975, |
| "step": 517, |
| "token_acc": 0.9348314606741573 |
| }, |
| { |
| "epoch": 0.10928270042194092, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.96927366916818e-07, |
| "loss": 0.28373780846595764, |
| "step": 518, |
| "token_acc": 0.9258015267175572 |
| }, |
| { |
| "epoch": 0.10949367088607595, |
| "grad_norm": 0.75, |
| "learning_rate": 9.969086221484701e-07, |
| "loss": 0.2899026870727539, |
| "step": 519, |
| "token_acc": 0.9206824304100568 |
| }, |
| { |
| "epoch": 0.10970464135021098, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.968898205544153e-07, |
| "loss": 0.2812725305557251, |
| "step": 520, |
| "token_acc": 0.9226502311248074 |
| }, |
| { |
| "epoch": 0.10991561181434599, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.968709621368041e-07, |
| "loss": 0.24981510639190674, |
| "step": 521, |
| "token_acc": 0.9326704545454545 |
| }, |
| { |
| "epoch": 0.11012658227848102, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.96852046897793e-07, |
| "loss": 0.3039143681526184, |
| "step": 522, |
| "token_acc": 0.9203347799132052 |
| }, |
| { |
| "epoch": 0.11033755274261603, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.968330748395448e-07, |
| "loss": 0.2633418142795563, |
| "step": 523, |
| "token_acc": 0.9283835135925168 |
| }, |
| { |
| "epoch": 0.11054852320675106, |
| "grad_norm": 1.4609375, |
| "learning_rate": 9.968140459642294e-07, |
| "loss": 0.24586576223373413, |
| "step": 524, |
| "token_acc": 0.9281601316150261 |
| }, |
| { |
| "epoch": 0.11075949367088607, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.967949602740228e-07, |
| "loss": 0.2739730477333069, |
| "step": 525, |
| "token_acc": 0.9166417687481326 |
| }, |
| { |
| "epoch": 0.1109704641350211, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.967758177711076e-07, |
| "loss": 0.2627703845500946, |
| "step": 526, |
| "token_acc": 0.9227409227409228 |
| }, |
| { |
| "epoch": 0.11118143459915612, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.967566184576732e-07, |
| "loss": 0.26023009419441223, |
| "step": 527, |
| "token_acc": 0.927381745502998 |
| }, |
| { |
| "epoch": 0.11139240506329114, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.967373623359148e-07, |
| "loss": 0.24462240934371948, |
| "step": 528, |
| "token_acc": 0.9283416203568294 |
| }, |
| { |
| "epoch": 0.11160337552742616, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.967180494080347e-07, |
| "loss": 0.24981698393821716, |
| "step": 529, |
| "token_acc": 0.9291265153870065 |
| }, |
| { |
| "epoch": 0.11181434599156118, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.966986796762414e-07, |
| "loss": 0.2446298450231552, |
| "step": 530, |
| "token_acc": 0.9370728929384966 |
| }, |
| { |
| "epoch": 0.1120253164556962, |
| "grad_norm": 2.421875, |
| "learning_rate": 9.9667925314275e-07, |
| "loss": 0.24656617641448975, |
| "step": 531, |
| "token_acc": 0.9356594110115237 |
| }, |
| { |
| "epoch": 0.11223628691983123, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.966597698097823e-07, |
| "loss": 0.2559359073638916, |
| "step": 532, |
| "token_acc": 0.9327158812312721 |
| }, |
| { |
| "epoch": 0.11244725738396624, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.966402296795661e-07, |
| "loss": 0.2284064143896103, |
| "step": 533, |
| "token_acc": 0.9354838709677419 |
| }, |
| { |
| "epoch": 0.11265822784810127, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.966206327543362e-07, |
| "loss": 0.2628895938396454, |
| "step": 534, |
| "token_acc": 0.923578751164958 |
| }, |
| { |
| "epoch": 0.11286919831223628, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.966009790363337e-07, |
| "loss": 0.2363075464963913, |
| "step": 535, |
| "token_acc": 0.9275167785234899 |
| }, |
| { |
| "epoch": 0.11308016877637131, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.965812685278059e-07, |
| "loss": 0.2766547203063965, |
| "step": 536, |
| "token_acc": 0.9212160836874795 |
| }, |
| { |
| "epoch": 0.11329113924050632, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.96561501231007e-07, |
| "loss": 0.24981704354286194, |
| "step": 537, |
| "token_acc": 0.9287794545935928 |
| }, |
| { |
| "epoch": 0.11350210970464135, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.965416771481975e-07, |
| "loss": 0.2477213591337204, |
| "step": 538, |
| "token_acc": 0.9247878255779924 |
| }, |
| { |
| "epoch": 0.11371308016877638, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.965217962816446e-07, |
| "loss": 0.2585391104221344, |
| "step": 539, |
| "token_acc": 0.9276463963963963 |
| }, |
| { |
| "epoch": 0.11392405063291139, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.965018586336218e-07, |
| "loss": 0.24559935927391052, |
| "step": 540, |
| "token_acc": 0.9349939246658566 |
| }, |
| { |
| "epoch": 0.11413502109704642, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.96481864206409e-07, |
| "loss": 0.22781570255756378, |
| "step": 541, |
| "token_acc": 0.9306022623051055 |
| }, |
| { |
| "epoch": 0.11434599156118143, |
| "grad_norm": 1.0390625, |
| "learning_rate": 9.964618130022931e-07, |
| "loss": 0.2166275829076767, |
| "step": 542, |
| "token_acc": 0.9374454466104161 |
| }, |
| { |
| "epoch": 0.11455696202531646, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.964417050235665e-07, |
| "loss": 0.267704039812088, |
| "step": 543, |
| "token_acc": 0.9261410788381743 |
| }, |
| { |
| "epoch": 0.11476793248945148, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.964215402725294e-07, |
| "loss": 0.23303918540477753, |
| "step": 544, |
| "token_acc": 0.9341683658607631 |
| }, |
| { |
| "epoch": 0.1149789029535865, |
| "grad_norm": 0.921875, |
| "learning_rate": 9.964013187514872e-07, |
| "loss": 0.33097875118255615, |
| "step": 545, |
| "token_acc": 0.9122987324426174 |
| }, |
| { |
| "epoch": 0.11518987341772152, |
| "grad_norm": 1.9453125, |
| "learning_rate": 9.963810404627529e-07, |
| "loss": 0.2524172067642212, |
| "step": 546, |
| "token_acc": 0.9373441396508728 |
| }, |
| { |
| "epoch": 0.11540084388185654, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.963607054086453e-07, |
| "loss": 0.25729498267173767, |
| "step": 547, |
| "token_acc": 0.9215870040612308 |
| }, |
| { |
| "epoch": 0.11561181434599156, |
| "grad_norm": 0.75, |
| "learning_rate": 9.963403135914898e-07, |
| "loss": 0.2928774356842041, |
| "step": 548, |
| "token_acc": 0.9192731605600238 |
| }, |
| { |
| "epoch": 0.11582278481012659, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.963198650136184e-07, |
| "loss": 0.25337544083595276, |
| "step": 549, |
| "token_acc": 0.9240650870682272 |
| }, |
| { |
| "epoch": 0.1160337552742616, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.962993596773697e-07, |
| "loss": 0.27310362458229065, |
| "step": 550, |
| "token_acc": 0.9247853124074622 |
| }, |
| { |
| "epoch": 0.11624472573839663, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.962787975850886e-07, |
| "loss": 0.22571566700935364, |
| "step": 551, |
| "token_acc": 0.9384902143522833 |
| }, |
| { |
| "epoch": 0.11645569620253164, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.962581787391265e-07, |
| "loss": 0.25049251317977905, |
| "step": 552, |
| "token_acc": 0.9287239722370528 |
| }, |
| { |
| "epoch": 0.11666666666666667, |
| "grad_norm": 1.2734375, |
| "learning_rate": 9.962375031418413e-07, |
| "loss": 0.24676430225372314, |
| "step": 553, |
| "token_acc": 0.9325946445060018 |
| }, |
| { |
| "epoch": 0.11687763713080168, |
| "grad_norm": 0.75, |
| "learning_rate": 9.962167707955977e-07, |
| "loss": 0.22018642723560333, |
| "step": 554, |
| "token_acc": 0.9440233236151604 |
| }, |
| { |
| "epoch": 0.11708860759493671, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.96195981702766e-07, |
| "loss": 0.2333768904209137, |
| "step": 555, |
| "token_acc": 0.9352249928346231 |
| }, |
| { |
| "epoch": 0.11729957805907174, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.961751358657244e-07, |
| "loss": 0.2830660939216614, |
| "step": 556, |
| "token_acc": 0.9188869153345175 |
| }, |
| { |
| "epoch": 0.11751054852320675, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.961542332868564e-07, |
| "loss": 0.26290833950042725, |
| "step": 557, |
| "token_acc": 0.9261704681872749 |
| }, |
| { |
| "epoch": 0.11772151898734177, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.961332739685523e-07, |
| "loss": 0.2768633961677551, |
| "step": 558, |
| "token_acc": 0.9245508982035928 |
| }, |
| { |
| "epoch": 0.11793248945147679, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.96112257913209e-07, |
| "loss": 0.2084287852048874, |
| "step": 559, |
| "token_acc": 0.9381261048909841 |
| }, |
| { |
| "epoch": 0.11814345991561181, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.960911851232301e-07, |
| "loss": 0.2791953682899475, |
| "step": 560, |
| "token_acc": 0.924936025021325 |
| }, |
| { |
| "epoch": 0.11835443037974684, |
| "grad_norm": 1.34375, |
| "learning_rate": 9.960700556010253e-07, |
| "loss": 0.319602370262146, |
| "step": 561, |
| "token_acc": 0.9191286183228887 |
| }, |
| { |
| "epoch": 0.11856540084388185, |
| "grad_norm": 0.8515625, |
| "learning_rate": 9.960488693490108e-07, |
| "loss": 0.21053284406661987, |
| "step": 562, |
| "token_acc": 0.9407194244604317 |
| }, |
| { |
| "epoch": 0.11877637130801688, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.960276263696097e-07, |
| "loss": 0.27438345551490784, |
| "step": 563, |
| "token_acc": 0.9290875033449291 |
| }, |
| { |
| "epoch": 0.1189873417721519, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.960063266652512e-07, |
| "loss": 0.2969055771827698, |
| "step": 564, |
| "token_acc": 0.918200408997955 |
| }, |
| { |
| "epoch": 0.11919831223628692, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.95984970238371e-07, |
| "loss": 0.3027820587158203, |
| "step": 565, |
| "token_acc": 0.924516531503431 |
| }, |
| { |
| "epoch": 0.11940928270042193, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.959635570914115e-07, |
| "loss": 0.26206478476524353, |
| "step": 566, |
| "token_acc": 0.9267332727823191 |
| }, |
| { |
| "epoch": 0.11962025316455696, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.959420872268214e-07, |
| "loss": 0.22268003225326538, |
| "step": 567, |
| "token_acc": 0.9377475947934352 |
| }, |
| { |
| "epoch": 0.11983122362869199, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.95920560647056e-07, |
| "loss": 0.2821509838104248, |
| "step": 568, |
| "token_acc": 0.9257203277821835 |
| }, |
| { |
| "epoch": 0.120042194092827, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.958989773545772e-07, |
| "loss": 0.2774399518966675, |
| "step": 569, |
| "token_acc": 0.9208851167020309 |
| }, |
| { |
| "epoch": 0.12025316455696203, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.95877337351853e-07, |
| "loss": 0.20950725674629211, |
| "step": 570, |
| "token_acc": 0.9383966244725739 |
| }, |
| { |
| "epoch": 0.12046413502109704, |
| "grad_norm": 2.375, |
| "learning_rate": 9.95855640641358e-07, |
| "loss": 0.24889153242111206, |
| "step": 571, |
| "token_acc": 0.9338555265448216 |
| }, |
| { |
| "epoch": 0.12067510548523207, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.958338872255738e-07, |
| "loss": 0.27347537875175476, |
| "step": 572, |
| "token_acc": 0.9261669024045261 |
| }, |
| { |
| "epoch": 0.1208860759493671, |
| "grad_norm": 0.625, |
| "learning_rate": 9.958120771069878e-07, |
| "loss": 0.2640995383262634, |
| "step": 573, |
| "token_acc": 0.9278178789561354 |
| }, |
| { |
| "epoch": 0.12109704641350211, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.957902102880945e-07, |
| "loss": 0.23652713000774384, |
| "step": 574, |
| "token_acc": 0.9364988558352403 |
| }, |
| { |
| "epoch": 0.12130801687763713, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.957682867713942e-07, |
| "loss": 0.291990727186203, |
| "step": 575, |
| "token_acc": 0.9223826714801444 |
| }, |
| { |
| "epoch": 0.12151898734177215, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.95746306559394e-07, |
| "loss": 0.21727707982063293, |
| "step": 576, |
| "token_acc": 0.9308996088657105 |
| }, |
| { |
| "epoch": 0.12172995780590717, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.957242696546077e-07, |
| "loss": 0.2906607985496521, |
| "step": 577, |
| "token_acc": 0.9158725837190308 |
| }, |
| { |
| "epoch": 0.1219409282700422, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.957021760595556e-07, |
| "loss": 0.226593479514122, |
| "step": 578, |
| "token_acc": 0.9271303824149353 |
| }, |
| { |
| "epoch": 0.12215189873417721, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.956800257767639e-07, |
| "loss": 0.26656001806259155, |
| "step": 579, |
| "token_acc": 0.930952380952381 |
| }, |
| { |
| "epoch": 0.12236286919831224, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.956578188087658e-07, |
| "loss": 0.2880259156227112, |
| "step": 580, |
| "token_acc": 0.9256572982774252 |
| }, |
| { |
| "epoch": 0.12257383966244725, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.95635555158101e-07, |
| "loss": 0.24349641799926758, |
| "step": 581, |
| "token_acc": 0.9316569954867827 |
| }, |
| { |
| "epoch": 0.12278481012658228, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.956132348273157e-07, |
| "loss": 0.24496349692344666, |
| "step": 582, |
| "token_acc": 0.9309711286089238 |
| }, |
| { |
| "epoch": 0.1229957805907173, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.955908578189619e-07, |
| "loss": 0.2652456760406494, |
| "step": 583, |
| "token_acc": 0.9269628727936701 |
| }, |
| { |
| "epoch": 0.12320675105485232, |
| "grad_norm": 0.87109375, |
| "learning_rate": 9.955684241355988e-07, |
| "loss": 0.2777090072631836, |
| "step": 584, |
| "token_acc": 0.9167887489012599 |
| }, |
| { |
| "epoch": 0.12341772151898735, |
| "grad_norm": 1.2109375, |
| "learning_rate": 9.95545933779792e-07, |
| "loss": 0.3164791464805603, |
| "step": 585, |
| "token_acc": 0.919442072302875 |
| }, |
| { |
| "epoch": 0.12362869198312236, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.955233867541134e-07, |
| "loss": 0.26809951663017273, |
| "step": 586, |
| "token_acc": 0.9227027027027027 |
| }, |
| { |
| "epoch": 0.12383966244725739, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.955007830611414e-07, |
| "loss": 0.25988298654556274, |
| "step": 587, |
| "token_acc": 0.9277679100059206 |
| }, |
| { |
| "epoch": 0.1240506329113924, |
| "grad_norm": 1.453125, |
| "learning_rate": 9.954781227034612e-07, |
| "loss": 0.22518092393875122, |
| "step": 588, |
| "token_acc": 0.935474701534963 |
| }, |
| { |
| "epoch": 0.12426160337552743, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.954554056836637e-07, |
| "loss": 0.23757173120975494, |
| "step": 589, |
| "token_acc": 0.929593589009731 |
| }, |
| { |
| "epoch": 0.12447257383966245, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.954326320043472e-07, |
| "loss": 0.2483949363231659, |
| "step": 590, |
| "token_acc": 0.9315320847405588 |
| }, |
| { |
| "epoch": 0.12468354430379747, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.95409801668116e-07, |
| "loss": 0.26530349254608154, |
| "step": 591, |
| "token_acc": 0.9214407260351674 |
| }, |
| { |
| "epoch": 0.1248945147679325, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.953869146775806e-07, |
| "loss": 0.2826001048088074, |
| "step": 592, |
| "token_acc": 0.9162200282087447 |
| }, |
| { |
| "epoch": 0.12510548523206752, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.953639710353589e-07, |
| "loss": 0.1961941421031952, |
| "step": 593, |
| "token_acc": 0.9426091825307951 |
| }, |
| { |
| "epoch": 0.12531645569620253, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.953409707440742e-07, |
| "loss": 0.26104363799095154, |
| "step": 594, |
| "token_acc": 0.9215813350615684 |
| }, |
| { |
| "epoch": 0.12552742616033755, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.95317913806357e-07, |
| "loss": 0.30334994196891785, |
| "step": 595, |
| "token_acc": 0.9229497354497355 |
| }, |
| { |
| "epoch": 0.1257383966244726, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.95294800224844e-07, |
| "loss": 0.27462461590766907, |
| "step": 596, |
| "token_acc": 0.9222963177732676 |
| }, |
| { |
| "epoch": 0.1259493670886076, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.952716300021784e-07, |
| "loss": 0.25919121503829956, |
| "step": 597, |
| "token_acc": 0.9274289099526066 |
| }, |
| { |
| "epoch": 0.1261603375527426, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.952484031410102e-07, |
| "loss": 0.24202126264572144, |
| "step": 598, |
| "token_acc": 0.9379543094496365 |
| }, |
| { |
| "epoch": 0.12637130801687763, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.95225119643995e-07, |
| "loss": 0.22879423201084137, |
| "step": 599, |
| "token_acc": 0.9303405572755418 |
| }, |
| { |
| "epoch": 0.12658227848101267, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.952017795137962e-07, |
| "loss": 0.2557697892189026, |
| "step": 600, |
| "token_acc": 0.9307598039215687 |
| }, |
| { |
| "epoch": 0.12658227848101267, |
| "eval_loss": 0.4336538016796112, |
| "eval_runtime": 246.0329, |
| "eval_samples_per_second": 136.994, |
| "eval_steps_per_second": 2.142, |
| "eval_token_acc": 0.899098088486045, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12679324894514768, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.951783827530821e-07, |
| "loss": 0.24460609257221222, |
| "step": 601, |
| "token_acc": 0.935226264418811 |
| }, |
| { |
| "epoch": 0.1270042194092827, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.951549293645292e-07, |
| "loss": 0.24832656979560852, |
| "step": 602, |
| "token_acc": 0.9309408926417371 |
| }, |
| { |
| "epoch": 0.12721518987341773, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.95131419350819e-07, |
| "loss": 0.23030316829681396, |
| "step": 603, |
| "token_acc": 0.9332206255283179 |
| }, |
| { |
| "epoch": 0.12742616033755275, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.951078527146403e-07, |
| "loss": 0.2880566418170929, |
| "step": 604, |
| "token_acc": 0.9192907367777438 |
| }, |
| { |
| "epoch": 0.12763713080168776, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.95084229458688e-07, |
| "loss": 0.24888081848621368, |
| "step": 605, |
| "token_acc": 0.929957805907173 |
| }, |
| { |
| "epoch": 0.12784810126582277, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.950605495856637e-07, |
| "loss": 0.2833850681781769, |
| "step": 606, |
| "token_acc": 0.9213352685050799 |
| }, |
| { |
| "epoch": 0.1280590717299578, |
| "grad_norm": 0.875, |
| "learning_rate": 9.950368130982755e-07, |
| "loss": 0.26693737506866455, |
| "step": 607, |
| "token_acc": 0.9250070482097548 |
| }, |
| { |
| "epoch": 0.12827004219409283, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.950130199992377e-07, |
| "loss": 0.23543164134025574, |
| "step": 608, |
| "token_acc": 0.9348515422311905 |
| }, |
| { |
| "epoch": 0.12848101265822784, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.949891702912712e-07, |
| "loss": 0.22989103198051453, |
| "step": 609, |
| "token_acc": 0.9318626082099972 |
| }, |
| { |
| "epoch": 0.12869198312236288, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.949652639771036e-07, |
| "loss": 0.24984115362167358, |
| "step": 610, |
| "token_acc": 0.9261559696342305 |
| }, |
| { |
| "epoch": 0.1289029535864979, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.94941301059469e-07, |
| "loss": 0.2549693286418915, |
| "step": 611, |
| "token_acc": 0.9259363559560687 |
| }, |
| { |
| "epoch": 0.1291139240506329, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.94917281541107e-07, |
| "loss": 0.2592216432094574, |
| "step": 612, |
| "token_acc": 0.9255747126436782 |
| }, |
| { |
| "epoch": 0.12932489451476795, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.948932054247652e-07, |
| "loss": 0.2784273624420166, |
| "step": 613, |
| "token_acc": 0.9198324022346369 |
| }, |
| { |
| "epoch": 0.12953586497890296, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.948690727131965e-07, |
| "loss": 0.2754824161529541, |
| "step": 614, |
| "token_acc": 0.9211413748378728 |
| }, |
| { |
| "epoch": 0.12974683544303797, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.948448834091608e-07, |
| "loss": 0.22421778738498688, |
| "step": 615, |
| "token_acc": 0.9337539432176656 |
| }, |
| { |
| "epoch": 0.12995780590717299, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.948206375154244e-07, |
| "loss": 0.22916918992996216, |
| "step": 616, |
| "token_acc": 0.933944374209861 |
| }, |
| { |
| "epoch": 0.13016877637130803, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.947963350347598e-07, |
| "loss": 0.23158694803714752, |
| "step": 617, |
| "token_acc": 0.9291338582677166 |
| }, |
| { |
| "epoch": 0.13037974683544304, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.947719759699466e-07, |
| "loss": 0.2788570523262024, |
| "step": 618, |
| "token_acc": 0.9231003039513678 |
| }, |
| { |
| "epoch": 0.13059071729957805, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.947475603237702e-07, |
| "loss": 0.28004133701324463, |
| "step": 619, |
| "token_acc": 0.9180544541369283 |
| }, |
| { |
| "epoch": 0.1308016877637131, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.947230880990227e-07, |
| "loss": 0.2773160934448242, |
| "step": 620, |
| "token_acc": 0.9248013620885358 |
| }, |
| { |
| "epoch": 0.1310126582278481, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.946985592985028e-07, |
| "loss": 0.2508021593093872, |
| "step": 621, |
| "token_acc": 0.9334818586887333 |
| }, |
| { |
| "epoch": 0.13122362869198312, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.946739739250156e-07, |
| "loss": 0.23769596219062805, |
| "step": 622, |
| "token_acc": 0.9302249755461363 |
| }, |
| { |
| "epoch": 0.13143459915611813, |
| "grad_norm": 1.2109375, |
| "learning_rate": 9.946493319813725e-07, |
| "loss": 0.21937592327594757, |
| "step": 623, |
| "token_acc": 0.9373860182370821 |
| }, |
| { |
| "epoch": 0.13164556962025317, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.946246334703916e-07, |
| "loss": 0.27754154801368713, |
| "step": 624, |
| "token_acc": 0.9261245159368484 |
| }, |
| { |
| "epoch": 0.13185654008438819, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.945998783948975e-07, |
| "loss": 0.2924942672252655, |
| "step": 625, |
| "token_acc": 0.9139688249400479 |
| }, |
| { |
| "epoch": 0.1320675105485232, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.945750667577209e-07, |
| "loss": 0.2303755283355713, |
| "step": 626, |
| "token_acc": 0.9341597796143251 |
| }, |
| { |
| "epoch": 0.13227848101265824, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.945501985616995e-07, |
| "loss": 0.20424559712409973, |
| "step": 627, |
| "token_acc": 0.9430594900849858 |
| }, |
| { |
| "epoch": 0.13248945147679325, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.94525273809677e-07, |
| "loss": 0.2620590031147003, |
| "step": 628, |
| "token_acc": 0.9274099883855982 |
| }, |
| { |
| "epoch": 0.13270042194092826, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.945002925045038e-07, |
| "loss": 0.2684752643108368, |
| "step": 629, |
| "token_acc": 0.9270194986072423 |
| }, |
| { |
| "epoch": 0.13291139240506328, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.944752546490367e-07, |
| "loss": 0.23374760150909424, |
| "step": 630, |
| "token_acc": 0.9325842696629213 |
| }, |
| { |
| "epoch": 0.13312236286919832, |
| "grad_norm": 1.1953125, |
| "learning_rate": 9.94450160246139e-07, |
| "loss": 0.22228139638900757, |
| "step": 631, |
| "token_acc": 0.9354534005037783 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.944250092986807e-07, |
| "loss": 0.2018851488828659, |
| "step": 632, |
| "token_acc": 0.9405840886203424 |
| }, |
| { |
| "epoch": 0.13354430379746834, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.943998018095377e-07, |
| "loss": 0.26342812180519104, |
| "step": 633, |
| "token_acc": 0.9279495646952867 |
| }, |
| { |
| "epoch": 0.13375527426160339, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.943745377815927e-07, |
| "loss": 0.24731256067752838, |
| "step": 634, |
| "token_acc": 0.9329608938547486 |
| }, |
| { |
| "epoch": 0.1339662447257384, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.94349217217735e-07, |
| "loss": 0.22763285040855408, |
| "step": 635, |
| "token_acc": 0.9298531810766721 |
| }, |
| { |
| "epoch": 0.1341772151898734, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.943238401208602e-07, |
| "loss": 0.25396978855133057, |
| "step": 636, |
| "token_acc": 0.9292196007259528 |
| }, |
| { |
| "epoch": 0.13438818565400845, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.942984064938705e-07, |
| "loss": 0.30096304416656494, |
| "step": 637, |
| "token_acc": 0.9151027703306523 |
| }, |
| { |
| "epoch": 0.13459915611814346, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.942729163396741e-07, |
| "loss": 0.29584643244743347, |
| "step": 638, |
| "token_acc": 0.919442072302875 |
| }, |
| { |
| "epoch": 0.13481012658227848, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.942473696611862e-07, |
| "loss": 0.2260875701904297, |
| "step": 639, |
| "token_acc": 0.934560327198364 |
| }, |
| { |
| "epoch": 0.1350210970464135, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.942217664613284e-07, |
| "loss": 0.25592464208602905, |
| "step": 640, |
| "token_acc": 0.926865671641791 |
| }, |
| { |
| "epoch": 0.13523206751054853, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.941961067430285e-07, |
| "loss": 0.24965469539165497, |
| "step": 641, |
| "token_acc": 0.92850705917693 |
| }, |
| { |
| "epoch": 0.13544303797468354, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.94170390509221e-07, |
| "loss": 0.2692350149154663, |
| "step": 642, |
| "token_acc": 0.9312614259597807 |
| }, |
| { |
| "epoch": 0.13565400843881856, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.941446177628467e-07, |
| "loss": 0.2497376799583435, |
| "step": 643, |
| "token_acc": 0.925979262672811 |
| }, |
| { |
| "epoch": 0.1358649789029536, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.94118788506853e-07, |
| "loss": 0.27541112899780273, |
| "step": 644, |
| "token_acc": 0.9211567732115677 |
| }, |
| { |
| "epoch": 0.1360759493670886, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.940929027441936e-07, |
| "loss": 0.2414344996213913, |
| "step": 645, |
| "token_acc": 0.9313432835820895 |
| }, |
| { |
| "epoch": 0.13628691983122362, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.940669604778288e-07, |
| "loss": 0.2977514863014221, |
| "step": 646, |
| "token_acc": 0.922911547911548 |
| }, |
| { |
| "epoch": 0.13649789029535864, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.940409617107252e-07, |
| "loss": 0.2718917727470398, |
| "step": 647, |
| "token_acc": 0.9186241610738255 |
| }, |
| { |
| "epoch": 0.13670886075949368, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.940149064458563e-07, |
| "loss": 0.2234637290239334, |
| "step": 648, |
| "token_acc": 0.9408866995073891 |
| }, |
| { |
| "epoch": 0.1369198312236287, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.939887946862017e-07, |
| "loss": 0.2774735689163208, |
| "step": 649, |
| "token_acc": 0.9290869155946031 |
| }, |
| { |
| "epoch": 0.1371308016877637, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.93962626434747e-07, |
| "loss": 0.2615464925765991, |
| "step": 650, |
| "token_acc": 0.9272910881090634 |
| }, |
| { |
| "epoch": 0.13734177215189874, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.939364016944852e-07, |
| "loss": 0.23573726415634155, |
| "step": 651, |
| "token_acc": 0.9329399141630901 |
| }, |
| { |
| "epoch": 0.13755274261603376, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.939101204684151e-07, |
| "loss": 0.2487039864063263, |
| "step": 652, |
| "token_acc": 0.932972972972973 |
| }, |
| { |
| "epoch": 0.13776371308016877, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.938837827595424e-07, |
| "loss": 0.26214438676834106, |
| "step": 653, |
| "token_acc": 0.9278832116788321 |
| }, |
| { |
| "epoch": 0.1379746835443038, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.938573885708792e-07, |
| "loss": 0.24997225403785706, |
| "step": 654, |
| "token_acc": 0.9322553666016169 |
| }, |
| { |
| "epoch": 0.13818565400843882, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.938309379054433e-07, |
| "loss": 0.28863316774368286, |
| "step": 655, |
| "token_acc": 0.9217944831767202 |
| }, |
| { |
| "epoch": 0.13839662447257384, |
| "grad_norm": 0.625, |
| "learning_rate": 9.9380443076626e-07, |
| "loss": 0.23938237130641937, |
| "step": 656, |
| "token_acc": 0.9307647740440325 |
| }, |
| { |
| "epoch": 0.13860759493670885, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.937778671563606e-07, |
| "loss": 0.26946017146110535, |
| "step": 657, |
| "token_acc": 0.9250278706800446 |
| }, |
| { |
| "epoch": 0.1388185654008439, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.937512470787827e-07, |
| "loss": 0.25879329442977905, |
| "step": 658, |
| "token_acc": 0.9263622974963182 |
| }, |
| { |
| "epoch": 0.1390295358649789, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.937245705365707e-07, |
| "loss": 0.26367712020874023, |
| "step": 659, |
| "token_acc": 0.9273255813953488 |
| }, |
| { |
| "epoch": 0.13924050632911392, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.93697837532775e-07, |
| "loss": 0.27003130316734314, |
| "step": 660, |
| "token_acc": 0.9255381035485748 |
| }, |
| { |
| "epoch": 0.13945147679324896, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.936710480704531e-07, |
| "loss": 0.3241864740848541, |
| "step": 661, |
| "token_acc": 0.9117466174661747 |
| }, |
| { |
| "epoch": 0.13966244725738397, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.936442021526685e-07, |
| "loss": 0.254525363445282, |
| "step": 662, |
| "token_acc": 0.9274787535410765 |
| }, |
| { |
| "epoch": 0.13987341772151898, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.936172997824912e-07, |
| "loss": 0.22039127349853516, |
| "step": 663, |
| "token_acc": 0.9337885985748219 |
| }, |
| { |
| "epoch": 0.140084388185654, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.935903409629977e-07, |
| "loss": 0.26330018043518066, |
| "step": 664, |
| "token_acc": 0.9245566576495341 |
| }, |
| { |
| "epoch": 0.14029535864978904, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.93563325697271e-07, |
| "loss": 0.25053250789642334, |
| "step": 665, |
| "token_acc": 0.9321644150617994 |
| }, |
| { |
| "epoch": 0.14050632911392405, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.935362539884004e-07, |
| "loss": 0.23359492421150208, |
| "step": 666, |
| "token_acc": 0.9295649600473513 |
| }, |
| { |
| "epoch": 0.14071729957805906, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.935091258394821e-07, |
| "loss": 0.3050011098384857, |
| "step": 667, |
| "token_acc": 0.9222160044767768 |
| }, |
| { |
| "epoch": 0.1409282700421941, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.93481941253618e-07, |
| "loss": 0.24552345275878906, |
| "step": 668, |
| "token_acc": 0.9327153110047847 |
| }, |
| { |
| "epoch": 0.14113924050632912, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.934547002339174e-07, |
| "loss": 0.2593832015991211, |
| "step": 669, |
| "token_acc": 0.9236835410836938 |
| }, |
| { |
| "epoch": 0.14135021097046413, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.93427402783495e-07, |
| "loss": 0.2546125054359436, |
| "step": 670, |
| "token_acc": 0.9320175438596491 |
| }, |
| { |
| "epoch": 0.14156118143459914, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.93400048905473e-07, |
| "loss": 0.27444595098495483, |
| "step": 671, |
| "token_acc": 0.9244940321743643 |
| }, |
| { |
| "epoch": 0.14177215189873418, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.93372638602979e-07, |
| "loss": 0.2675279378890991, |
| "step": 672, |
| "token_acc": 0.9254424136930665 |
| }, |
| { |
| "epoch": 0.1419831223628692, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.933451718791481e-07, |
| "loss": 0.22922030091285706, |
| "step": 673, |
| "token_acc": 0.9329147389292796 |
| }, |
| { |
| "epoch": 0.1421940928270042, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.933176487371213e-07, |
| "loss": 0.3030126094818115, |
| "step": 674, |
| "token_acc": 0.9166399487015069 |
| }, |
| { |
| "epoch": 0.14240506329113925, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.932900691800457e-07, |
| "loss": 0.2756281793117523, |
| "step": 675, |
| "token_acc": 0.921146953405018 |
| }, |
| { |
| "epoch": 0.14261603375527426, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.932624332110758e-07, |
| "loss": 0.23098278045654297, |
| "step": 676, |
| "token_acc": 0.937677859988617 |
| }, |
| { |
| "epoch": 0.14282700421940928, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.932347408333715e-07, |
| "loss": 0.22887524962425232, |
| "step": 677, |
| "token_acc": 0.9341611319665031 |
| }, |
| { |
| "epoch": 0.14303797468354432, |
| "grad_norm": 0.98046875, |
| "learning_rate": 9.932069920501e-07, |
| "loss": 0.2759955823421478, |
| "step": 678, |
| "token_acc": 0.9240579710144927 |
| }, |
| { |
| "epoch": 0.14324894514767933, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.931791868644341e-07, |
| "loss": 0.2028590440750122, |
| "step": 679, |
| "token_acc": 0.9378352792679079 |
| }, |
| { |
| "epoch": 0.14345991561181434, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.931513252795543e-07, |
| "loss": 0.30346542596817017, |
| "step": 680, |
| "token_acc": 0.9129104062326099 |
| }, |
| { |
| "epoch": 0.14367088607594936, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.931234072986466e-07, |
| "loss": 0.27435851097106934, |
| "step": 681, |
| "token_acc": 0.9276353276353276 |
| }, |
| { |
| "epoch": 0.1438818565400844, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.930954329249032e-07, |
| "loss": 0.2799455523490906, |
| "step": 682, |
| "token_acc": 0.9241399588356366 |
| }, |
| { |
| "epoch": 0.1440928270042194, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.930674021615237e-07, |
| "loss": 0.28436923027038574, |
| "step": 683, |
| "token_acc": 0.9250146455770357 |
| }, |
| { |
| "epoch": 0.14430379746835442, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.930393150117133e-07, |
| "loss": 0.29506832361221313, |
| "step": 684, |
| "token_acc": 0.9232 |
| }, |
| { |
| "epoch": 0.14451476793248946, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.930111714786844e-07, |
| "loss": 0.27069365978240967, |
| "step": 685, |
| "token_acc": 0.926836079307456 |
| }, |
| { |
| "epoch": 0.14472573839662448, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.92982971565655e-07, |
| "loss": 0.21447613835334778, |
| "step": 686, |
| "token_acc": 0.9363425925925926 |
| }, |
| { |
| "epoch": 0.1449367088607595, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.929547152758505e-07, |
| "loss": 0.2686905264854431, |
| "step": 687, |
| "token_acc": 0.9303255282695603 |
| }, |
| { |
| "epoch": 0.1451476793248945, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.929264026125017e-07, |
| "loss": 0.27938973903656006, |
| "step": 688, |
| "token_acc": 0.92536881689326 |
| }, |
| { |
| "epoch": 0.14535864978902954, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.928980335788469e-07, |
| "loss": 0.2390938103199005, |
| "step": 689, |
| "token_acc": 0.9279638490164805 |
| }, |
| { |
| "epoch": 0.14556962025316456, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.928696081781299e-07, |
| "loss": 0.2756063640117645, |
| "step": 690, |
| "token_acc": 0.920317553660688 |
| }, |
| { |
| "epoch": 0.14578059071729957, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.928411264136017e-07, |
| "loss": 0.23743261396884918, |
| "step": 691, |
| "token_acc": 0.9318757921419518 |
| }, |
| { |
| "epoch": 0.1459915611814346, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.928125882885193e-07, |
| "loss": 0.2753446102142334, |
| "step": 692, |
| "token_acc": 0.9266131265577402 |
| }, |
| { |
| "epoch": 0.14620253164556962, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.927839938061461e-07, |
| "loss": 0.24559064209461212, |
| "step": 693, |
| "token_acc": 0.9276517473942366 |
| }, |
| { |
| "epoch": 0.14641350210970464, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.927553429697526e-07, |
| "loss": 0.24906222522258759, |
| "step": 694, |
| "token_acc": 0.9354838709677419 |
| }, |
| { |
| "epoch": 0.14662447257383968, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.92726635782615e-07, |
| "loss": 0.26196521520614624, |
| "step": 695, |
| "token_acc": 0.9294971487817522 |
| }, |
| { |
| "epoch": 0.1468354430379747, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.92697872248016e-07, |
| "loss": 0.28846031427383423, |
| "step": 696, |
| "token_acc": 0.9216428779493154 |
| }, |
| { |
| "epoch": 0.1470464135021097, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.926690523692454e-07, |
| "loss": 0.2781599164009094, |
| "step": 697, |
| "token_acc": 0.9191949534394713 |
| }, |
| { |
| "epoch": 0.14725738396624471, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.926401761495986e-07, |
| "loss": 0.24464154243469238, |
| "step": 698, |
| "token_acc": 0.9295774647887324 |
| }, |
| { |
| "epoch": 0.14746835443037976, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.926112435923778e-07, |
| "loss": 0.24627582728862762, |
| "step": 699, |
| "token_acc": 0.9308590242442383 |
| }, |
| { |
| "epoch": 0.14767932489451477, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.92582254700892e-07, |
| "loss": 0.27795839309692383, |
| "step": 700, |
| "token_acc": 0.9238838084991932 |
| }, |
| { |
| "epoch": 0.14789029535864978, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.925532094784563e-07, |
| "loss": 0.24588271975517273, |
| "step": 701, |
| "token_acc": 0.9284253578732107 |
| }, |
| { |
| "epoch": 0.14810126582278482, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.92524107928392e-07, |
| "loss": 0.24630197882652283, |
| "step": 702, |
| "token_acc": 0.9327267714364489 |
| }, |
| { |
| "epoch": 0.14831223628691984, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.924949500540275e-07, |
| "loss": 0.2578659653663635, |
| "step": 703, |
| "token_acc": 0.9267192784667418 |
| }, |
| { |
| "epoch": 0.14852320675105485, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.924657358586967e-07, |
| "loss": 0.25091686844825745, |
| "step": 704, |
| "token_acc": 0.9329545454545455 |
| }, |
| { |
| "epoch": 0.14873417721518986, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.924364653457411e-07, |
| "loss": 0.2511135935783386, |
| "step": 705, |
| "token_acc": 0.9301768055139347 |
| }, |
| { |
| "epoch": 0.1489451476793249, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.924071385185075e-07, |
| "loss": 0.2616545259952545, |
| "step": 706, |
| "token_acc": 0.927741935483871 |
| }, |
| { |
| "epoch": 0.14915611814345991, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.9237775538035e-07, |
| "loss": 0.2902517318725586, |
| "step": 707, |
| "token_acc": 0.9226774379688402 |
| }, |
| { |
| "epoch": 0.14936708860759493, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.92348315934629e-07, |
| "loss": 0.27046293020248413, |
| "step": 708, |
| "token_acc": 0.9296824368114064 |
| }, |
| { |
| "epoch": 0.14957805907172997, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.923188201847107e-07, |
| "loss": 0.20588457584381104, |
| "step": 709, |
| "token_acc": 0.9350493864112541 |
| }, |
| { |
| "epoch": 0.14978902953586498, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.92289268133968e-07, |
| "loss": 0.25359445810317993, |
| "step": 710, |
| "token_acc": 0.9285503395335105 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.922596597857811e-07, |
| "loss": 0.267612099647522, |
| "step": 711, |
| "token_acc": 0.9265569917743831 |
| }, |
| { |
| "epoch": 0.150210970464135, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.922299951435357e-07, |
| "loss": 0.2501794993877411, |
| "step": 712, |
| "token_acc": 0.9300184162062615 |
| }, |
| { |
| "epoch": 0.15042194092827005, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.922002742106242e-07, |
| "loss": 0.2614431381225586, |
| "step": 713, |
| "token_acc": 0.9250471825289835 |
| }, |
| { |
| "epoch": 0.15063291139240506, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.921704969904453e-07, |
| "loss": 0.2227068841457367, |
| "step": 714, |
| "token_acc": 0.934162192709805 |
| }, |
| { |
| "epoch": 0.15084388185654007, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.92140663486404e-07, |
| "loss": 0.2870650887489319, |
| "step": 715, |
| "token_acc": 0.9198347107438016 |
| }, |
| { |
| "epoch": 0.15105485232067511, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.92110773701913e-07, |
| "loss": 0.24414213001728058, |
| "step": 716, |
| "token_acc": 0.9290909090909091 |
| }, |
| { |
| "epoch": 0.15126582278481013, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.920808276403893e-07, |
| "loss": 0.27001482248306274, |
| "step": 717, |
| "token_acc": 0.9276070094800345 |
| }, |
| { |
| "epoch": 0.15147679324894514, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.920508253052584e-07, |
| "loss": 0.24048057198524475, |
| "step": 718, |
| "token_acc": 0.9305245535714286 |
| }, |
| { |
| "epoch": 0.15168776371308018, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.92020766699951e-07, |
| "loss": 0.267702579498291, |
| "step": 719, |
| "token_acc": 0.9254603916983338 |
| }, |
| { |
| "epoch": 0.1518987341772152, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.919906518279043e-07, |
| "loss": 0.23744544386863708, |
| "step": 720, |
| "token_acc": 0.9312857886517438 |
| }, |
| { |
| "epoch": 0.1521097046413502, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.919604806925623e-07, |
| "loss": 0.2514658570289612, |
| "step": 721, |
| "token_acc": 0.9293015332197615 |
| }, |
| { |
| "epoch": 0.15232067510548522, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.919302532973754e-07, |
| "loss": 0.2536316215991974, |
| "step": 722, |
| "token_acc": 0.9287620064034151 |
| }, |
| { |
| "epoch": 0.15253164556962026, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.918999696458006e-07, |
| "loss": 0.23538361489772797, |
| "step": 723, |
| "token_acc": 0.9311200744647844 |
| }, |
| { |
| "epoch": 0.15274261603375527, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.918696297413008e-07, |
| "loss": 0.2112676054239273, |
| "step": 724, |
| "token_acc": 0.9377406931964056 |
| }, |
| { |
| "epoch": 0.1529535864978903, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.918392335873457e-07, |
| "loss": 0.22141136229038239, |
| "step": 725, |
| "token_acc": 0.9383989145183175 |
| }, |
| { |
| "epoch": 0.15316455696202533, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.91808781187411e-07, |
| "loss": 0.27773499488830566, |
| "step": 726, |
| "token_acc": 0.9278820375335121 |
| }, |
| { |
| "epoch": 0.15337552742616034, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.917782725449799e-07, |
| "loss": 0.32096052169799805, |
| "step": 727, |
| "token_acc": 0.9175288205734555 |
| }, |
| { |
| "epoch": 0.15358649789029535, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.91747707663541e-07, |
| "loss": 0.2451501190662384, |
| "step": 728, |
| "token_acc": 0.9271503803393798 |
| }, |
| { |
| "epoch": 0.15379746835443037, |
| "grad_norm": 1.546875, |
| "learning_rate": 9.917170865465894e-07, |
| "loss": 0.29911404848098755, |
| "step": 729, |
| "token_acc": 0.9180237372343362 |
| }, |
| { |
| "epoch": 0.1540084388185654, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.91686409197627e-07, |
| "loss": 0.3019820749759674, |
| "step": 730, |
| "token_acc": 0.9172510518934082 |
| }, |
| { |
| "epoch": 0.15421940928270042, |
| "grad_norm": 1.1484375, |
| "learning_rate": 9.916556756201624e-07, |
| "loss": 0.281706839799881, |
| "step": 731, |
| "token_acc": 0.9272495213784301 |
| }, |
| { |
| "epoch": 0.15443037974683543, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.916248858177099e-07, |
| "loss": 0.27722233533859253, |
| "step": 732, |
| "token_acc": 0.9146039603960396 |
| }, |
| { |
| "epoch": 0.15464135021097047, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.915940397937906e-07, |
| "loss": 0.29295605421066284, |
| "step": 733, |
| "token_acc": 0.9225071225071225 |
| }, |
| { |
| "epoch": 0.1548523206751055, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.91563137551932e-07, |
| "loss": 0.24334131181240082, |
| "step": 734, |
| "token_acc": 0.9294417682062908 |
| }, |
| { |
| "epoch": 0.1550632911392405, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.91532179095668e-07, |
| "loss": 0.2572481334209442, |
| "step": 735, |
| "token_acc": 0.9311695579182988 |
| }, |
| { |
| "epoch": 0.15527426160337554, |
| "grad_norm": 0.75, |
| "learning_rate": 9.915011644285391e-07, |
| "loss": 0.26280131936073303, |
| "step": 736, |
| "token_acc": 0.9256795835743205 |
| }, |
| { |
| "epoch": 0.15548523206751055, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.91470093554092e-07, |
| "loss": 0.23156148195266724, |
| "step": 737, |
| "token_acc": 0.9377058999700509 |
| }, |
| { |
| "epoch": 0.15569620253164557, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.914389664758799e-07, |
| "loss": 0.24967870116233826, |
| "step": 738, |
| "token_acc": 0.9284906726964387 |
| }, |
| { |
| "epoch": 0.15590717299578058, |
| "grad_norm": 0.6484375, |
| "learning_rate": 9.914077831974626e-07, |
| "loss": 0.24829944968223572, |
| "step": 739, |
| "token_acc": 0.931044267877412 |
| }, |
| { |
| "epoch": 0.15611814345991562, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.91376543722406e-07, |
| "loss": 0.24180346727371216, |
| "step": 740, |
| "token_acc": 0.9371293001186239 |
| }, |
| { |
| "epoch": 0.15632911392405063, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.913452480542825e-07, |
| "loss": 0.26637858152389526, |
| "step": 741, |
| "token_acc": 0.9162153552086651 |
| }, |
| { |
| "epoch": 0.15654008438818565, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.913138961966715e-07, |
| "loss": 0.22019389271736145, |
| "step": 742, |
| "token_acc": 0.9336415556159913 |
| }, |
| { |
| "epoch": 0.1567510548523207, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.912824881531577e-07, |
| "loss": 0.2972027361392975, |
| "step": 743, |
| "token_acc": 0.9233983286908078 |
| }, |
| { |
| "epoch": 0.1569620253164557, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.912510239273332e-07, |
| "loss": 0.26124250888824463, |
| "step": 744, |
| "token_acc": 0.9267277268942548 |
| }, |
| { |
| "epoch": 0.1571729957805907, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.912195035227964e-07, |
| "loss": 0.32723483443260193, |
| "step": 745, |
| "token_acc": 0.9195718654434251 |
| }, |
| { |
| "epoch": 0.15738396624472573, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.911879269431517e-07, |
| "loss": 0.23630690574645996, |
| "step": 746, |
| "token_acc": 0.9347326049453709 |
| }, |
| { |
| "epoch": 0.15759493670886077, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.911562941920099e-07, |
| "loss": 0.21784129738807678, |
| "step": 747, |
| "token_acc": 0.9337892446378614 |
| }, |
| { |
| "epoch": 0.15780590717299578, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.911246052729891e-07, |
| "loss": 0.26233282685279846, |
| "step": 748, |
| "token_acc": 0.9323260937991816 |
| }, |
| { |
| "epoch": 0.1580168776371308, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.910928601897126e-07, |
| "loss": 0.2327466756105423, |
| "step": 749, |
| "token_acc": 0.9362054681027341 |
| }, |
| { |
| "epoch": 0.15822784810126583, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.91061058945811e-07, |
| "loss": 0.27062827348709106, |
| "step": 750, |
| "token_acc": 0.918719909374115 |
| }, |
| { |
| "epoch": 0.15843881856540085, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.910292015449211e-07, |
| "loss": 0.20303724706172943, |
| "step": 751, |
| "token_acc": 0.9412310547479121 |
| }, |
| { |
| "epoch": 0.15864978902953586, |
| "grad_norm": 0.65234375, |
| "learning_rate": 9.909972879906858e-07, |
| "loss": 0.24677664041519165, |
| "step": 752, |
| "token_acc": 0.925 |
| }, |
| { |
| "epoch": 0.15886075949367087, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.90965318286755e-07, |
| "loss": 0.2709593176841736, |
| "step": 753, |
| "token_acc": 0.9230769230769231 |
| }, |
| { |
| "epoch": 0.1590717299578059, |
| "grad_norm": 0.99609375, |
| "learning_rate": 9.909332924367846e-07, |
| "loss": 0.265384703874588, |
| "step": 754, |
| "token_acc": 0.9230769230769231 |
| }, |
| { |
| "epoch": 0.15928270042194093, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.909012104444368e-07, |
| "loss": 0.2868095636367798, |
| "step": 755, |
| "token_acc": 0.920038228735266 |
| }, |
| { |
| "epoch": 0.15949367088607594, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.908690723133807e-07, |
| "loss": 0.24986404180526733, |
| "step": 756, |
| "token_acc": 0.9256695756846224 |
| }, |
| { |
| "epoch": 0.15970464135021098, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.908368780472916e-07, |
| "loss": 0.20347082614898682, |
| "step": 757, |
| "token_acc": 0.9389263902282224 |
| }, |
| { |
| "epoch": 0.159915611814346, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.908046276498511e-07, |
| "loss": 0.2612215578556061, |
| "step": 758, |
| "token_acc": 0.9279416235937975 |
| }, |
| { |
| "epoch": 0.160126582278481, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.907723211247472e-07, |
| "loss": 0.23647598922252655, |
| "step": 759, |
| "token_acc": 0.9354395604395604 |
| }, |
| { |
| "epoch": 0.16033755274261605, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.907399584756744e-07, |
| "loss": 0.28146815299987793, |
| "step": 760, |
| "token_acc": 0.92171219374824 |
| }, |
| { |
| "epoch": 0.16054852320675106, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.90707539706334e-07, |
| "loss": 0.2895510494709015, |
| "step": 761, |
| "token_acc": 0.9175170068027211 |
| }, |
| { |
| "epoch": 0.16075949367088607, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.90675064820433e-07, |
| "loss": 0.25281795859336853, |
| "step": 762, |
| "token_acc": 0.9276477832512315 |
| }, |
| { |
| "epoch": 0.16097046413502109, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.906425338216852e-07, |
| "loss": 0.2702397108078003, |
| "step": 763, |
| "token_acc": 0.9337220006136852 |
| }, |
| { |
| "epoch": 0.16118143459915613, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.906099467138111e-07, |
| "loss": 0.3201596736907959, |
| "step": 764, |
| "token_acc": 0.9149093599704032 |
| }, |
| { |
| "epoch": 0.16139240506329114, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.90577303500537e-07, |
| "loss": 0.26031017303466797, |
| "step": 765, |
| "token_acc": 0.922656699252444 |
| }, |
| { |
| "epoch": 0.16160337552742615, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.90544604185596e-07, |
| "loss": 0.2320261150598526, |
| "step": 766, |
| "token_acc": 0.933118216485773 |
| }, |
| { |
| "epoch": 0.1618143459915612, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.905118487727277e-07, |
| "loss": 0.2794190049171448, |
| "step": 767, |
| "token_acc": 0.9201467268623025 |
| }, |
| { |
| "epoch": 0.1620253164556962, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.904790372656778e-07, |
| "loss": 0.2765384018421173, |
| "step": 768, |
| "token_acc": 0.9225014961101137 |
| }, |
| { |
| "epoch": 0.16223628691983122, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.904461696681984e-07, |
| "loss": 0.3068510890007019, |
| "step": 769, |
| "token_acc": 0.9177502267916541 |
| }, |
| { |
| "epoch": 0.16244725738396623, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.904132459840485e-07, |
| "loss": 0.28465330600738525, |
| "step": 770, |
| "token_acc": 0.9240544629349471 |
| }, |
| { |
| "epoch": 0.16265822784810127, |
| "grad_norm": 0.75, |
| "learning_rate": 9.903802662169932e-07, |
| "loss": 0.2329617142677307, |
| "step": 771, |
| "token_acc": 0.9319875776397516 |
| }, |
| { |
| "epoch": 0.16286919831223629, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.903472303708038e-07, |
| "loss": 0.2284744679927826, |
| "step": 772, |
| "token_acc": 0.931237721021611 |
| }, |
| { |
| "epoch": 0.1630801687763713, |
| "grad_norm": 1.140625, |
| "learning_rate": 9.903141384492583e-07, |
| "loss": 0.23831237852573395, |
| "step": 773, |
| "token_acc": 0.9291455790413814 |
| }, |
| { |
| "epoch": 0.16329113924050634, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.902809904561414e-07, |
| "loss": 0.23870491981506348, |
| "step": 774, |
| "token_acc": 0.9348308374930671 |
| }, |
| { |
| "epoch": 0.16350210970464135, |
| "grad_norm": 0.75, |
| "learning_rate": 9.902477863952431e-07, |
| "loss": 0.27838945388793945, |
| "step": 775, |
| "token_acc": 0.9261146496815287 |
| }, |
| { |
| "epoch": 0.16371308016877636, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.902145262703613e-07, |
| "loss": 0.2492181956768036, |
| "step": 776, |
| "token_acc": 0.9293759512937595 |
| }, |
| { |
| "epoch": 0.1639240506329114, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.901812100852993e-07, |
| "loss": 0.2085292637348175, |
| "step": 777, |
| "token_acc": 0.9389517569982132 |
| }, |
| { |
| "epoch": 0.16413502109704642, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.90147837843867e-07, |
| "loss": 0.2694481909275055, |
| "step": 778, |
| "token_acc": 0.9280432309442548 |
| }, |
| { |
| "epoch": 0.16434599156118143, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.901144095498808e-07, |
| "loss": 0.25209715962409973, |
| "step": 779, |
| "token_acc": 0.9302030456852792 |
| }, |
| { |
| "epoch": 0.16455696202531644, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.900809252071635e-07, |
| "loss": 0.31358861923217773, |
| "step": 780, |
| "token_acc": 0.9151069518716578 |
| }, |
| { |
| "epoch": 0.16476793248945149, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.900473848195446e-07, |
| "loss": 0.23959940671920776, |
| "step": 781, |
| "token_acc": 0.9326456310679612 |
| }, |
| { |
| "epoch": 0.1649789029535865, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.900137883908592e-07, |
| "loss": 0.29789382219314575, |
| "step": 782, |
| "token_acc": 0.9166666666666666 |
| }, |
| { |
| "epoch": 0.1651898734177215, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.8998013592495e-07, |
| "loss": 0.22469905018806458, |
| "step": 783, |
| "token_acc": 0.933932193567082 |
| }, |
| { |
| "epoch": 0.16540084388185655, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.89946427425665e-07, |
| "loss": 0.28561171889305115, |
| "step": 784, |
| "token_acc": 0.9251565167899829 |
| }, |
| { |
| "epoch": 0.16561181434599156, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.89912662896859e-07, |
| "loss": 0.28935301303863525, |
| "step": 785, |
| "token_acc": 0.9215164615896242 |
| }, |
| { |
| "epoch": 0.16582278481012658, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.898788423423935e-07, |
| "loss": 0.2708919048309326, |
| "step": 786, |
| "token_acc": 0.927591706539075 |
| }, |
| { |
| "epoch": 0.1660337552742616, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.898449657661362e-07, |
| "loss": 0.2672666311264038, |
| "step": 787, |
| "token_acc": 0.9263598326359833 |
| }, |
| { |
| "epoch": 0.16624472573839663, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.89811033171961e-07, |
| "loss": 0.2862321734428406, |
| "step": 788, |
| "token_acc": 0.9180470793374019 |
| }, |
| { |
| "epoch": 0.16645569620253164, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.897770445637483e-07, |
| "loss": 0.2871711850166321, |
| "step": 789, |
| "token_acc": 0.9249183895538629 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.897429999453852e-07, |
| "loss": 0.2397966980934143, |
| "step": 790, |
| "token_acc": 0.9377081945369754 |
| }, |
| { |
| "epoch": 0.1668776371308017, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.89708899320765e-07, |
| "loss": 0.2998412847518921, |
| "step": 791, |
| "token_acc": 0.9202363367799113 |
| }, |
| { |
| "epoch": 0.1670886075949367, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.89674742693787e-07, |
| "loss": 0.27035748958587646, |
| "step": 792, |
| "token_acc": 0.9229891614375356 |
| }, |
| { |
| "epoch": 0.16729957805907172, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.89640530068358e-07, |
| "loss": 0.17933598160743713, |
| "step": 793, |
| "token_acc": 0.9483188044831881 |
| }, |
| { |
| "epoch": 0.16751054852320676, |
| "grad_norm": 0.91796875, |
| "learning_rate": 9.896062614483898e-07, |
| "loss": 0.2540227472782135, |
| "step": 794, |
| "token_acc": 0.929927414852038 |
| }, |
| { |
| "epoch": 0.16772151898734178, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.895719368378016e-07, |
| "loss": 0.24861711263656616, |
| "step": 795, |
| "token_acc": 0.9333129397369226 |
| }, |
| { |
| "epoch": 0.1679324894514768, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.89537556240519e-07, |
| "loss": 0.2633477449417114, |
| "step": 796, |
| "token_acc": 0.9196560924992588 |
| }, |
| { |
| "epoch": 0.1681434599156118, |
| "grad_norm": 0.8671875, |
| "learning_rate": 9.89503119660473e-07, |
| "loss": 0.2524856925010681, |
| "step": 797, |
| "token_acc": 0.9313725490196079 |
| }, |
| { |
| "epoch": 0.16835443037974684, |
| "grad_norm": 0.91796875, |
| "learning_rate": 9.894686271016027e-07, |
| "loss": 0.30388563871383667, |
| "step": 798, |
| "token_acc": 0.9198352344740177 |
| }, |
| { |
| "epoch": 0.16856540084388186, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.894340785678517e-07, |
| "loss": 0.2910333573818207, |
| "step": 799, |
| "token_acc": 0.9234449760765551 |
| }, |
| { |
| "epoch": 0.16877637130801687, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.893994740631713e-07, |
| "loss": 0.25983309745788574, |
| "step": 800, |
| "token_acc": 0.9273416982783775 |
| }, |
| { |
| "epoch": 0.16877637130801687, |
| "eval_loss": 0.43369975686073303, |
| "eval_runtime": 245.7245, |
| "eval_samples_per_second": 137.166, |
| "eval_steps_per_second": 2.145, |
| "eval_token_acc": 0.8991631517544647, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1689873417721519, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.893648135915188e-07, |
| "loss": 0.26705414056777954, |
| "step": 801, |
| "token_acc": 0.9297355062783863 |
| }, |
| { |
| "epoch": 0.16919831223628692, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.893300971568578e-07, |
| "loss": 0.2386769950389862, |
| "step": 802, |
| "token_acc": 0.935367545076283 |
| }, |
| { |
| "epoch": 0.16940928270042194, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.892953247631589e-07, |
| "loss": 0.2654857337474823, |
| "step": 803, |
| "token_acc": 0.9260304912478825 |
| }, |
| { |
| "epoch": 0.16962025316455695, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.89260496414398e-07, |
| "loss": 0.2585882842540741, |
| "step": 804, |
| "token_acc": 0.9277403551745255 |
| }, |
| { |
| "epoch": 0.169831223628692, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.892256121145584e-07, |
| "loss": 0.25679513812065125, |
| "step": 805, |
| "token_acc": 0.928305133352452 |
| }, |
| { |
| "epoch": 0.170042194092827, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.891906718676291e-07, |
| "loss": 0.269248902797699, |
| "step": 806, |
| "token_acc": 0.9328318108543794 |
| }, |
| { |
| "epoch": 0.17025316455696202, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.89155675677606e-07, |
| "loss": 0.2557290196418762, |
| "step": 807, |
| "token_acc": 0.9275456919060052 |
| }, |
| { |
| "epoch": 0.17046413502109706, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.891206235484913e-07, |
| "loss": 0.23980513215065002, |
| "step": 808, |
| "token_acc": 0.9353355807539074 |
| }, |
| { |
| "epoch": 0.17067510548523207, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.890855154842935e-07, |
| "loss": 0.2392064481973648, |
| "step": 809, |
| "token_acc": 0.9303870595031773 |
| }, |
| { |
| "epoch": 0.17088607594936708, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.890503514890275e-07, |
| "loss": 0.23739401996135712, |
| "step": 810, |
| "token_acc": 0.9328014728444308 |
| }, |
| { |
| "epoch": 0.1710970464135021, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.89015131566714e-07, |
| "loss": 0.2900955080986023, |
| "step": 811, |
| "token_acc": 0.9233797698364627 |
| }, |
| { |
| "epoch": 0.17130801687763714, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.889798557213818e-07, |
| "loss": 0.27924585342407227, |
| "step": 812, |
| "token_acc": 0.9214012363850457 |
| }, |
| { |
| "epoch": 0.17151898734177215, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.88944523957064e-07, |
| "loss": 0.2851244807243347, |
| "step": 813, |
| "token_acc": 0.9264660254421346 |
| }, |
| { |
| "epoch": 0.17172995780590716, |
| "grad_norm": 1.1875, |
| "learning_rate": 9.889091362778017e-07, |
| "loss": 0.23967793583869934, |
| "step": 814, |
| "token_acc": 0.9278113316077078 |
| }, |
| { |
| "epoch": 0.1719409282700422, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.888736926876415e-07, |
| "loss": 0.23629070818424225, |
| "step": 815, |
| "token_acc": 0.9284259528658714 |
| }, |
| { |
| "epoch": 0.17215189873417722, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.88838193190637e-07, |
| "loss": 0.3324328064918518, |
| "step": 816, |
| "token_acc": 0.9166411277965063 |
| }, |
| { |
| "epoch": 0.17236286919831223, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.888026377908472e-07, |
| "loss": 0.2603840231895447, |
| "step": 817, |
| "token_acc": 0.9286516853932584 |
| }, |
| { |
| "epoch": 0.17257383966244727, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.887670264923387e-07, |
| "loss": 0.25500792264938354, |
| "step": 818, |
| "token_acc": 0.9334559950935296 |
| }, |
| { |
| "epoch": 0.17278481012658228, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.88731359299184e-07, |
| "loss": 0.25971531867980957, |
| "step": 819, |
| "token_acc": 0.928698752228164 |
| }, |
| { |
| "epoch": 0.1729957805907173, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.886956362154617e-07, |
| "loss": 0.2521659731864929, |
| "step": 820, |
| "token_acc": 0.9252336448598131 |
| }, |
| { |
| "epoch": 0.1732067510548523, |
| "grad_norm": 1.0078125, |
| "learning_rate": 9.88659857245257e-07, |
| "loss": 0.23598764836788177, |
| "step": 821, |
| "token_acc": 0.9364791288566243 |
| }, |
| { |
| "epoch": 0.17341772151898735, |
| "grad_norm": 2.28125, |
| "learning_rate": 9.886240223926617e-07, |
| "loss": 0.2466164529323578, |
| "step": 822, |
| "token_acc": 0.9289433384379786 |
| }, |
| { |
| "epoch": 0.17362869198312236, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.88588131661774e-07, |
| "loss": 0.2287391722202301, |
| "step": 823, |
| "token_acc": 0.9339788732394366 |
| }, |
| { |
| "epoch": 0.17383966244725738, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.885521850566977e-07, |
| "loss": 0.3011782765388489, |
| "step": 824, |
| "token_acc": 0.9158669225847729 |
| }, |
| { |
| "epoch": 0.17405063291139242, |
| "grad_norm": 0.88671875, |
| "learning_rate": 9.88516182581544e-07, |
| "loss": 0.24732713401317596, |
| "step": 825, |
| "token_acc": 0.9297990096125838 |
| }, |
| { |
| "epoch": 0.17426160337552743, |
| "grad_norm": 0.8046875, |
| "learning_rate": 9.884801242404303e-07, |
| "loss": 0.2557525038719177, |
| "step": 826, |
| "token_acc": 0.9272430668841762 |
| }, |
| { |
| "epoch": 0.17447257383966244, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.884440100374798e-07, |
| "loss": 0.2030971348285675, |
| "step": 827, |
| "token_acc": 0.9397905759162304 |
| }, |
| { |
| "epoch": 0.17468354430379746, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.884078399768226e-07, |
| "loss": 0.23457200825214386, |
| "step": 828, |
| "token_acc": 0.9320594479830149 |
| }, |
| { |
| "epoch": 0.1748945147679325, |
| "grad_norm": 0.75, |
| "learning_rate": 9.88371614062595e-07, |
| "loss": 0.27027198672294617, |
| "step": 829, |
| "token_acc": 0.9281524926686217 |
| }, |
| { |
| "epoch": 0.1751054852320675, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.8833533229894e-07, |
| "loss": 0.2117438018321991, |
| "step": 830, |
| "token_acc": 0.9373803664205633 |
| }, |
| { |
| "epoch": 0.17531645569620252, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.882989946900063e-07, |
| "loss": 0.21879255771636963, |
| "step": 831, |
| "token_acc": 0.9381270903010034 |
| }, |
| { |
| "epoch": 0.17552742616033756, |
| "grad_norm": 0.75, |
| "learning_rate": 9.882626012399495e-07, |
| "loss": 0.27527743577957153, |
| "step": 832, |
| "token_acc": 0.9288235294117647 |
| }, |
| { |
| "epoch": 0.17573839662447258, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.882261519529318e-07, |
| "loss": 0.2788648307323456, |
| "step": 833, |
| "token_acc": 0.9233587786259542 |
| }, |
| { |
| "epoch": 0.1759493670886076, |
| "grad_norm": 0.75, |
| "learning_rate": 9.881896468331215e-07, |
| "loss": 0.26945775747299194, |
| "step": 834, |
| "token_acc": 0.9267376330619912 |
| }, |
| { |
| "epoch": 0.17616033755274263, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.881530858846928e-07, |
| "loss": 0.30408790707588196, |
| "step": 835, |
| "token_acc": 0.9146719234018587 |
| }, |
| { |
| "epoch": 0.17637130801687764, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.88116469111827e-07, |
| "loss": 0.2584350109100342, |
| "step": 836, |
| "token_acc": 0.9246458923512748 |
| }, |
| { |
| "epoch": 0.17658227848101266, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.880797965187119e-07, |
| "loss": 0.3014131784439087, |
| "step": 837, |
| "token_acc": 0.9151281344723065 |
| }, |
| { |
| "epoch": 0.17679324894514767, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.880430681095407e-07, |
| "loss": 0.21773123741149902, |
| "step": 838, |
| "token_acc": 0.9367166004280036 |
| }, |
| { |
| "epoch": 0.1770042194092827, |
| "grad_norm": 1.9609375, |
| "learning_rate": 9.88006283888514e-07, |
| "loss": 0.26315808296203613, |
| "step": 839, |
| "token_acc": 0.9259569712210115 |
| }, |
| { |
| "epoch": 0.17721518987341772, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.879694438598383e-07, |
| "loss": 0.2646620571613312, |
| "step": 840, |
| "token_acc": 0.9318522966076254 |
| }, |
| { |
| "epoch": 0.17742616033755274, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.879325480277266e-07, |
| "loss": 0.2713755965232849, |
| "step": 841, |
| "token_acc": 0.9206214689265537 |
| }, |
| { |
| "epoch": 0.17763713080168778, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.878955963963979e-07, |
| "loss": 0.26667535305023193, |
| "step": 842, |
| "token_acc": 0.9262686567164179 |
| }, |
| { |
| "epoch": 0.1778481012658228, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.878585889700785e-07, |
| "loss": 0.24986431002616882, |
| "step": 843, |
| "token_acc": 0.9370695053224797 |
| }, |
| { |
| "epoch": 0.1780590717299578, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.878215257530004e-07, |
| "loss": 0.2651556730270386, |
| "step": 844, |
| "token_acc": 0.9286946520989074 |
| }, |
| { |
| "epoch": 0.17827004219409281, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.877844067494017e-07, |
| "loss": 0.2608075737953186, |
| "step": 845, |
| "token_acc": 0.9263001485884101 |
| }, |
| { |
| "epoch": 0.17848101265822786, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.877472319635275e-07, |
| "loss": 0.28958860039711, |
| "step": 846, |
| "token_acc": 0.9233128834355828 |
| }, |
| { |
| "epoch": 0.17869198312236287, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.877100013996291e-07, |
| "loss": 0.2941049039363861, |
| "step": 847, |
| "token_acc": 0.9213372664700098 |
| }, |
| { |
| "epoch": 0.17890295358649788, |
| "grad_norm": 0.84375, |
| "learning_rate": 9.876727150619642e-07, |
| "loss": 0.2620714604854584, |
| "step": 848, |
| "token_acc": 0.9288433382137629 |
| }, |
| { |
| "epoch": 0.17911392405063292, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.876353729547968e-07, |
| "loss": 0.2020449936389923, |
| "step": 849, |
| "token_acc": 0.9392366412213741 |
| }, |
| { |
| "epoch": 0.17932489451476794, |
| "grad_norm": 0.953125, |
| "learning_rate": 9.875979750823969e-07, |
| "loss": 0.2892880439758301, |
| "step": 850, |
| "token_acc": 0.9246448424953675 |
| }, |
| { |
| "epoch": 0.17953586497890295, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.875605214490417e-07, |
| "loss": 0.2778629660606384, |
| "step": 851, |
| "token_acc": 0.923582580115037 |
| }, |
| { |
| "epoch": 0.17974683544303796, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.875230120590142e-07, |
| "loss": 0.23370903730392456, |
| "step": 852, |
| "token_acc": 0.9249401117913228 |
| }, |
| { |
| "epoch": 0.179957805907173, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.874854469166038e-07, |
| "loss": 0.28334856033325195, |
| "step": 853, |
| "token_acc": 0.9264705882352942 |
| }, |
| { |
| "epoch": 0.18016877637130801, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.874478260261067e-07, |
| "loss": 0.2282511293888092, |
| "step": 854, |
| "token_acc": 0.933620159803319 |
| }, |
| { |
| "epoch": 0.18037974683544303, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.874101493918249e-07, |
| "loss": 0.27366286516189575, |
| "step": 855, |
| "token_acc": 0.9260089686098655 |
| }, |
| { |
| "epoch": 0.18059071729957807, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.87372417018067e-07, |
| "loss": 0.25619056820869446, |
| "step": 856, |
| "token_acc": 0.9255610290093049 |
| }, |
| { |
| "epoch": 0.18080168776371308, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.873346289091483e-07, |
| "loss": 0.270757257938385, |
| "step": 857, |
| "token_acc": 0.923998738568275 |
| }, |
| { |
| "epoch": 0.1810126582278481, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.8729678506939e-07, |
| "loss": 0.28628918528556824, |
| "step": 858, |
| "token_acc": 0.9234957020057306 |
| }, |
| { |
| "epoch": 0.18122362869198314, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.872588855031197e-07, |
| "loss": 0.2525092661380768, |
| "step": 859, |
| "token_acc": 0.9317073170731708 |
| }, |
| { |
| "epoch": 0.18143459915611815, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.872209302146718e-07, |
| "loss": 0.28244319558143616, |
| "step": 860, |
| "token_acc": 0.9267202859696158 |
| }, |
| { |
| "epoch": 0.18164556962025316, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.871829192083867e-07, |
| "loss": 0.254133403301239, |
| "step": 861, |
| "token_acc": 0.928436911487759 |
| }, |
| { |
| "epoch": 0.18185654008438817, |
| "grad_norm": 0.75, |
| "learning_rate": 9.871448524886113e-07, |
| "loss": 0.2619815468788147, |
| "step": 862, |
| "token_acc": 0.9243888573052871 |
| }, |
| { |
| "epoch": 0.18206751054852321, |
| "grad_norm": 0.91796875, |
| "learning_rate": 9.87106730059699e-07, |
| "loss": 0.2682092487812042, |
| "step": 863, |
| "token_acc": 0.9261158021712907 |
| }, |
| { |
| "epoch": 0.18227848101265823, |
| "grad_norm": 0.6015625, |
| "learning_rate": 9.870685519260092e-07, |
| "loss": 0.245108962059021, |
| "step": 864, |
| "token_acc": 0.9272880404267265 |
| }, |
| { |
| "epoch": 0.18248945147679324, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.870303180919078e-07, |
| "loss": 0.2907876670360565, |
| "step": 865, |
| "token_acc": 0.9245337159253946 |
| }, |
| { |
| "epoch": 0.18270042194092828, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.869920285617676e-07, |
| "loss": 0.24249601364135742, |
| "step": 866, |
| "token_acc": 0.931304347826087 |
| }, |
| { |
| "epoch": 0.1829113924050633, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.869536833399669e-07, |
| "loss": 0.2370653748512268, |
| "step": 867, |
| "token_acc": 0.9323520200438459 |
| }, |
| { |
| "epoch": 0.1831223628691983, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.869152824308912e-07, |
| "loss": 0.3008883595466614, |
| "step": 868, |
| "token_acc": 0.9213197969543148 |
| }, |
| { |
| "epoch": 0.18333333333333332, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.868768258389314e-07, |
| "loss": 0.2317754030227661, |
| "step": 869, |
| "token_acc": 0.9351635514018691 |
| }, |
| { |
| "epoch": 0.18354430379746836, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.868383135684857e-07, |
| "loss": 0.2313736081123352, |
| "step": 870, |
| "token_acc": 0.9373202990224266 |
| }, |
| { |
| "epoch": 0.18375527426160337, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.867997456239586e-07, |
| "loss": 0.28026607632637024, |
| "step": 871, |
| "token_acc": 0.9176308539944904 |
| }, |
| { |
| "epoch": 0.1839662447257384, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.8676112200976e-07, |
| "loss": 0.254774272441864, |
| "step": 872, |
| "token_acc": 0.9306763962952568 |
| }, |
| { |
| "epoch": 0.18417721518987343, |
| "grad_norm": 0.75, |
| "learning_rate": 9.867224427303073e-07, |
| "loss": 0.24183842539787292, |
| "step": 873, |
| "token_acc": 0.9341463414634147 |
| }, |
| { |
| "epoch": 0.18438818565400844, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.86683707790024e-07, |
| "loss": 0.23453059792518616, |
| "step": 874, |
| "token_acc": 0.9323812299621101 |
| }, |
| { |
| "epoch": 0.18459915611814345, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.86644917193339e-07, |
| "loss": 0.24839141964912415, |
| "step": 875, |
| "token_acc": 0.9287037037037037 |
| }, |
| { |
| "epoch": 0.1848101265822785, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.86606070944689e-07, |
| "loss": 0.2521136403083801, |
| "step": 876, |
| "token_acc": 0.9300189993666877 |
| }, |
| { |
| "epoch": 0.1850210970464135, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.865671690485162e-07, |
| "loss": 0.3050832748413086, |
| "step": 877, |
| "token_acc": 0.9174647887323943 |
| }, |
| { |
| "epoch": 0.18523206751054852, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.865282115092692e-07, |
| "loss": 0.2835577726364136, |
| "step": 878, |
| "token_acc": 0.9225286643941741 |
| }, |
| { |
| "epoch": 0.18544303797468353, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.864891983314033e-07, |
| "loss": 0.29184651374816895, |
| "step": 879, |
| "token_acc": 0.9218231210383339 |
| }, |
| { |
| "epoch": 0.18565400843881857, |
| "grad_norm": 2.859375, |
| "learning_rate": 9.8645012951938e-07, |
| "loss": 0.2807004451751709, |
| "step": 880, |
| "token_acc": 0.9238008500303583 |
| }, |
| { |
| "epoch": 0.1858649789029536, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.864110050776672e-07, |
| "loss": 0.25495046377182007, |
| "step": 881, |
| "token_acc": 0.9281183932346723 |
| }, |
| { |
| "epoch": 0.1860759493670886, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.86371825010739e-07, |
| "loss": 0.26357853412628174, |
| "step": 882, |
| "token_acc": 0.9274457329765091 |
| }, |
| { |
| "epoch": 0.18628691983122364, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.86332589323076e-07, |
| "loss": 0.2856602966785431, |
| "step": 883, |
| "token_acc": 0.9247496423462088 |
| }, |
| { |
| "epoch": 0.18649789029535865, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.862932980191652e-07, |
| "loss": 0.26217591762542725, |
| "step": 884, |
| "token_acc": 0.9308156140907649 |
| }, |
| { |
| "epoch": 0.18670886075949367, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.862539511034997e-07, |
| "loss": 0.2957126498222351, |
| "step": 885, |
| "token_acc": 0.9175007582650895 |
| }, |
| { |
| "epoch": 0.18691983122362868, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.862145485805793e-07, |
| "loss": 0.2381889373064041, |
| "step": 886, |
| "token_acc": 0.9338040600176523 |
| }, |
| { |
| "epoch": 0.18713080168776372, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.861750904549099e-07, |
| "loss": 0.23038305342197418, |
| "step": 887, |
| "token_acc": 0.933588010578901 |
| }, |
| { |
| "epoch": 0.18734177215189873, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.86135576731004e-07, |
| "loss": 0.2670343518257141, |
| "step": 888, |
| "token_acc": 0.9209346991037132 |
| }, |
| { |
| "epoch": 0.18755274261603375, |
| "grad_norm": 0.875, |
| "learning_rate": 9.860960074133802e-07, |
| "loss": 0.3037135899066925, |
| "step": 889, |
| "token_acc": 0.9173450219160927 |
| }, |
| { |
| "epoch": 0.1877637130801688, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.860563825065637e-07, |
| "loss": 0.23587052524089813, |
| "step": 890, |
| "token_acc": 0.9326950971859588 |
| }, |
| { |
| "epoch": 0.1879746835443038, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.86016702015086e-07, |
| "loss": 0.2837037444114685, |
| "step": 891, |
| "token_acc": 0.9186681222707423 |
| }, |
| { |
| "epoch": 0.1881856540084388, |
| "grad_norm": 0.80078125, |
| "learning_rate": 9.85976965943485e-07, |
| "loss": 0.273685485124588, |
| "step": 892, |
| "token_acc": 0.9198871650211565 |
| }, |
| { |
| "epoch": 0.18839662447257383, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.859371742963043e-07, |
| "loss": 0.24621078372001648, |
| "step": 893, |
| "token_acc": 0.9370564640543042 |
| }, |
| { |
| "epoch": 0.18860759493670887, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.85897327078095e-07, |
| "loss": 0.2355155646800995, |
| "step": 894, |
| "token_acc": 0.933295647258338 |
| }, |
| { |
| "epoch": 0.18881856540084388, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.858574242934136e-07, |
| "loss": 0.29725679755210876, |
| "step": 895, |
| "token_acc": 0.9205632306057385 |
| }, |
| { |
| "epoch": 0.1890295358649789, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.858174659468237e-07, |
| "loss": 0.23919257521629333, |
| "step": 896, |
| "token_acc": 0.9367167919799498 |
| }, |
| { |
| "epoch": 0.18924050632911393, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.857774520428945e-07, |
| "loss": 0.2421645075082779, |
| "step": 897, |
| "token_acc": 0.928311057108141 |
| }, |
| { |
| "epoch": 0.18945147679324895, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.85737382586202e-07, |
| "loss": 0.22805655002593994, |
| "step": 898, |
| "token_acc": 0.9353342428376534 |
| }, |
| { |
| "epoch": 0.18966244725738396, |
| "grad_norm": 1.125, |
| "learning_rate": 9.856972575813285e-07, |
| "loss": 0.2736568748950958, |
| "step": 899, |
| "token_acc": 0.9199507389162561 |
| }, |
| { |
| "epoch": 0.189873417721519, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.85657077032863e-07, |
| "loss": 0.251034677028656, |
| "step": 900, |
| "token_acc": 0.933903806432576 |
| }, |
| { |
| "epoch": 0.190084388185654, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.856168409454e-07, |
| "loss": 0.2377174347639084, |
| "step": 901, |
| "token_acc": 0.9313658201784488 |
| }, |
| { |
| "epoch": 0.19029535864978903, |
| "grad_norm": 0.625, |
| "learning_rate": 9.855765493235408e-07, |
| "loss": 0.27164188027381897, |
| "step": 902, |
| "token_acc": 0.9263128176171654 |
| }, |
| { |
| "epoch": 0.19050632911392404, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.855362021718936e-07, |
| "loss": 0.250331312417984, |
| "step": 903, |
| "token_acc": 0.9330877839165131 |
| }, |
| { |
| "epoch": 0.19071729957805908, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.85495799495072e-07, |
| "loss": 0.20965948700904846, |
| "step": 904, |
| "token_acc": 0.936726272352132 |
| }, |
| { |
| "epoch": 0.1909282700421941, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.854553412976965e-07, |
| "loss": 0.24447084963321686, |
| "step": 905, |
| "token_acc": 0.9254159495123351 |
| }, |
| { |
| "epoch": 0.1911392405063291, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.854148275843939e-07, |
| "loss": 0.2490314543247223, |
| "step": 906, |
| "token_acc": 0.9285930408472012 |
| }, |
| { |
| "epoch": 0.19135021097046415, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.853742583597973e-07, |
| "loss": 0.21816563606262207, |
| "step": 907, |
| "token_acc": 0.9387067116150781 |
| }, |
| { |
| "epoch": 0.19156118143459916, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.853336336285461e-07, |
| "loss": 0.24077807366847992, |
| "step": 908, |
| "token_acc": 0.9278679026651216 |
| }, |
| { |
| "epoch": 0.19177215189873417, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.852929533952858e-07, |
| "loss": 0.2617112696170807, |
| "step": 909, |
| "token_acc": 0.9296264118158123 |
| }, |
| { |
| "epoch": 0.19198312236286919, |
| "grad_norm": 0.75, |
| "learning_rate": 9.852522176646692e-07, |
| "loss": 0.22484534978866577, |
| "step": 910, |
| "token_acc": 0.9368040926873308 |
| }, |
| { |
| "epoch": 0.19219409282700423, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.85211426441354e-07, |
| "loss": 0.2532415986061096, |
| "step": 911, |
| "token_acc": 0.9334488734835356 |
| }, |
| { |
| "epoch": 0.19240506329113924, |
| "grad_norm": 0.87890625, |
| "learning_rate": 9.851705797300056e-07, |
| "loss": 0.31424853205680847, |
| "step": 912, |
| "token_acc": 0.9197608558842039 |
| }, |
| { |
| "epoch": 0.19261603375527425, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.851296775352948e-07, |
| "loss": 0.29285135865211487, |
| "step": 913, |
| "token_acc": 0.9182986536107711 |
| }, |
| { |
| "epoch": 0.1928270042194093, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.850887198618996e-07, |
| "loss": 0.21450576186180115, |
| "step": 914, |
| "token_acc": 0.9373626373626374 |
| }, |
| { |
| "epoch": 0.1930379746835443, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.850477067145031e-07, |
| "loss": 0.2844701111316681, |
| "step": 915, |
| "token_acc": 0.919885094158953 |
| }, |
| { |
| "epoch": 0.19324894514767932, |
| "grad_norm": 0.68359375, |
| "learning_rate": 9.850066380977961e-07, |
| "loss": 0.26549211144447327, |
| "step": 916, |
| "token_acc": 0.9245901639344263 |
| }, |
| { |
| "epoch": 0.19345991561181436, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.849655140164752e-07, |
| "loss": 0.258350133895874, |
| "step": 917, |
| "token_acc": 0.918646080760095 |
| }, |
| { |
| "epoch": 0.19367088607594937, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.849243344752427e-07, |
| "loss": 0.2719504237174988, |
| "step": 918, |
| "token_acc": 0.9267654751525719 |
| }, |
| { |
| "epoch": 0.19388185654008439, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.848830994788083e-07, |
| "loss": 0.27195435762405396, |
| "step": 919, |
| "token_acc": 0.9220445459737293 |
| }, |
| { |
| "epoch": 0.1940928270042194, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.848418090318876e-07, |
| "loss": 0.24952857196331024, |
| "step": 920, |
| "token_acc": 0.9340033500837521 |
| }, |
| { |
| "epoch": 0.19430379746835444, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.848004631392022e-07, |
| "loss": 0.22502082586288452, |
| "step": 921, |
| "token_acc": 0.935454267360049 |
| }, |
| { |
| "epoch": 0.19451476793248945, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.847590618054806e-07, |
| "loss": 0.30236607789993286, |
| "step": 922, |
| "token_acc": 0.9160954208938854 |
| }, |
| { |
| "epoch": 0.19472573839662446, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.847176050354573e-07, |
| "loss": 0.26875466108322144, |
| "step": 923, |
| "token_acc": 0.9231622746185852 |
| }, |
| { |
| "epoch": 0.1949367088607595, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.846760928338734e-07, |
| "loss": 0.21099932491779327, |
| "step": 924, |
| "token_acc": 0.9381474710542352 |
| }, |
| { |
| "epoch": 0.19514767932489452, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.846345252054758e-07, |
| "loss": 0.24902689456939697, |
| "step": 925, |
| "token_acc": 0.9338211899459116 |
| }, |
| { |
| "epoch": 0.19535864978902953, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.845929021550184e-07, |
| "loss": 0.22670647501945496, |
| "step": 926, |
| "token_acc": 0.9382314694408322 |
| }, |
| { |
| "epoch": 0.19556962025316454, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.84551223687261e-07, |
| "loss": 0.24977506697177887, |
| "step": 927, |
| "token_acc": 0.927700089259149 |
| }, |
| { |
| "epoch": 0.19578059071729959, |
| "grad_norm": 0.7109375, |
| "learning_rate": 9.8450948980697e-07, |
| "loss": 0.2701714038848877, |
| "step": 928, |
| "token_acc": 0.9202069716775599 |
| }, |
| { |
| "epoch": 0.1959915611814346, |
| "grad_norm": 0.82421875, |
| "learning_rate": 9.844677005189182e-07, |
| "loss": 0.2738378643989563, |
| "step": 929, |
| "token_acc": 0.9230093676814989 |
| }, |
| { |
| "epoch": 0.1962025316455696, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.844258558278842e-07, |
| "loss": 0.2802038788795471, |
| "step": 930, |
| "token_acc": 0.9218846869187849 |
| }, |
| { |
| "epoch": 0.19641350210970465, |
| "grad_norm": 1.125, |
| "learning_rate": 9.843839557386534e-07, |
| "loss": 0.28460338711738586, |
| "step": 931, |
| "token_acc": 0.9176701204144497 |
| }, |
| { |
| "epoch": 0.19662447257383966, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.843420002560173e-07, |
| "loss": 0.2364983856678009, |
| "step": 932, |
| "token_acc": 0.9339788732394366 |
| }, |
| { |
| "epoch": 0.19683544303797468, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.842999893847744e-07, |
| "loss": 0.24972565472126007, |
| "step": 933, |
| "token_acc": 0.9282414536495226 |
| }, |
| { |
| "epoch": 0.19704641350210972, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.842579231297284e-07, |
| "loss": 0.23772844672203064, |
| "step": 934, |
| "token_acc": 0.9301578024547048 |
| }, |
| { |
| "epoch": 0.19725738396624473, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.842158014956901e-07, |
| "loss": 0.2724204659461975, |
| "step": 935, |
| "token_acc": 0.920952380952381 |
| }, |
| { |
| "epoch": 0.19746835443037974, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.841736244874769e-07, |
| "loss": 0.20951035618782043, |
| "step": 936, |
| "token_acc": 0.9367752622860298 |
| }, |
| { |
| "epoch": 0.19767932489451476, |
| "grad_norm": 1.84375, |
| "learning_rate": 9.841313921099112e-07, |
| "loss": 0.2654408812522888, |
| "step": 937, |
| "token_acc": 0.9252709640616087 |
| }, |
| { |
| "epoch": 0.1978902953586498, |
| "grad_norm": 0.609375, |
| "learning_rate": 9.840891043678235e-07, |
| "loss": 0.20615626871585846, |
| "step": 938, |
| "token_acc": 0.9382829208677055 |
| }, |
| { |
| "epoch": 0.1981012658227848, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.840467612660494e-07, |
| "loss": 0.24997380375862122, |
| "step": 939, |
| "token_acc": 0.9305555555555556 |
| }, |
| { |
| "epoch": 0.19831223628691982, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.84004362809431e-07, |
| "loss": 0.2462070733308792, |
| "step": 940, |
| "token_acc": 0.9301426872770512 |
| }, |
| { |
| "epoch": 0.19852320675105486, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.839619090028173e-07, |
| "loss": 0.28827589750289917, |
| "step": 941, |
| "token_acc": 0.9256148770245951 |
| }, |
| { |
| "epoch": 0.19873417721518988, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.83919399851063e-07, |
| "loss": 0.27797433733940125, |
| "step": 942, |
| "token_acc": 0.9241207421766824 |
| }, |
| { |
| "epoch": 0.1989451476793249, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.838768353590297e-07, |
| "loss": 0.3198699951171875, |
| "step": 943, |
| "token_acc": 0.909556313993174 |
| }, |
| { |
| "epoch": 0.1991561181434599, |
| "grad_norm": 0.625, |
| "learning_rate": 9.838342155315847e-07, |
| "loss": 0.23603345453739166, |
| "step": 944, |
| "token_acc": 0.9322977725674091 |
| }, |
| { |
| "epoch": 0.19936708860759494, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.837915403736017e-07, |
| "loss": 0.1939564049243927, |
| "step": 945, |
| "token_acc": 0.9405116002379535 |
| }, |
| { |
| "epoch": 0.19957805907172996, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.837488098899616e-07, |
| "loss": 0.2682676911354065, |
| "step": 946, |
| "token_acc": 0.9287606711804534 |
| }, |
| { |
| "epoch": 0.19978902953586497, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.837060240855506e-07, |
| "loss": 0.264107882976532, |
| "step": 947, |
| "token_acc": 0.9270292429625581 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.836631829652617e-07, |
| "loss": 0.31936952471733093, |
| "step": 948, |
| "token_acc": 0.9171994884910486 |
| }, |
| { |
| "epoch": 0.20021097046413502, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.83620286533994e-07, |
| "loss": 0.2610703706741333, |
| "step": 949, |
| "token_acc": 0.9247813411078717 |
| }, |
| { |
| "epoch": 0.20042194092827004, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.835773347966535e-07, |
| "loss": 0.27383172512054443, |
| "step": 950, |
| "token_acc": 0.9266730707652898 |
| }, |
| { |
| "epoch": 0.20063291139240505, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.835343277581513e-07, |
| "loss": 0.253266841173172, |
| "step": 951, |
| "token_acc": 0.9326241134751773 |
| }, |
| { |
| "epoch": 0.2008438818565401, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.834912654234065e-07, |
| "loss": 0.24679061770439148, |
| "step": 952, |
| "token_acc": 0.9246597024374802 |
| }, |
| { |
| "epoch": 0.2010548523206751, |
| "grad_norm": 0.92578125, |
| "learning_rate": 9.834481477973433e-07, |
| "loss": 0.25128480792045593, |
| "step": 953, |
| "token_acc": 0.9299694189602447 |
| }, |
| { |
| "epoch": 0.20126582278481012, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.834049748848924e-07, |
| "loss": 0.26062366366386414, |
| "step": 954, |
| "token_acc": 0.9283121597096189 |
| }, |
| { |
| "epoch": 0.20147679324894516, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.833617466909912e-07, |
| "loss": 0.2557450234889984, |
| "step": 955, |
| "token_acc": 0.92808867261422 |
| }, |
| { |
| "epoch": 0.20168776371308017, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.83318463220583e-07, |
| "loss": 0.28644537925720215, |
| "step": 956, |
| "token_acc": 0.9155513065646909 |
| }, |
| { |
| "epoch": 0.20189873417721518, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.832751244786178e-07, |
| "loss": 0.27308011054992676, |
| "step": 957, |
| "token_acc": 0.9252837326607818 |
| }, |
| { |
| "epoch": 0.20210970464135022, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.832317304700517e-07, |
| "loss": 0.2365753948688507, |
| "step": 958, |
| "token_acc": 0.9307948860478044 |
| }, |
| { |
| "epoch": 0.20232067510548524, |
| "grad_norm": 1.2265625, |
| "learning_rate": 9.831882811998472e-07, |
| "loss": 0.25882843136787415, |
| "step": 959, |
| "token_acc": 0.9280114041339986 |
| }, |
| { |
| "epoch": 0.20253164556962025, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.83144776672973e-07, |
| "loss": 0.24906384944915771, |
| "step": 960, |
| "token_acc": 0.9268585131894485 |
| }, |
| { |
| "epoch": 0.20274261603375526, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.831012168944045e-07, |
| "loss": 0.25317683815956116, |
| "step": 961, |
| "token_acc": 0.9272997032640949 |
| }, |
| { |
| "epoch": 0.2029535864978903, |
| "grad_norm": 1.40625, |
| "learning_rate": 9.830576018691227e-07, |
| "loss": 0.2348695993423462, |
| "step": 962, |
| "token_acc": 0.9317293233082706 |
| }, |
| { |
| "epoch": 0.20316455696202532, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.830139316021155e-07, |
| "loss": 0.22149190306663513, |
| "step": 963, |
| "token_acc": 0.9351633986928105 |
| }, |
| { |
| "epoch": 0.20337552742616033, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.829702060983772e-07, |
| "loss": 0.2570660710334778, |
| "step": 964, |
| "token_acc": 0.9295127183573398 |
| }, |
| { |
| "epoch": 0.20358649789029537, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.829264253629079e-07, |
| "loss": 0.2847985625267029, |
| "step": 965, |
| "token_acc": 0.9199036434808793 |
| }, |
| { |
| "epoch": 0.20379746835443038, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.828825894007146e-07, |
| "loss": 0.267423540353775, |
| "step": 966, |
| "token_acc": 0.9270031365839749 |
| }, |
| { |
| "epoch": 0.2040084388185654, |
| "grad_norm": 6.0625, |
| "learning_rate": 9.8283869821681e-07, |
| "loss": 0.2526509165763855, |
| "step": 967, |
| "token_acc": 0.9267187106522287 |
| }, |
| { |
| "epoch": 0.2042194092827004, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.827947518162135e-07, |
| "loss": 0.22644475102424622, |
| "step": 968, |
| "token_acc": 0.9335453100158982 |
| }, |
| { |
| "epoch": 0.20443037974683545, |
| "grad_norm": 0.859375, |
| "learning_rate": 9.827507502039507e-07, |
| "loss": 0.313146710395813, |
| "step": 969, |
| "token_acc": 0.9218163195629908 |
| }, |
| { |
| "epoch": 0.20464135021097046, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.82706693385054e-07, |
| "loss": 0.2515157163143158, |
| "step": 970, |
| "token_acc": 0.9318849089841457 |
| }, |
| { |
| "epoch": 0.20485232067510548, |
| "grad_norm": 0.7890625, |
| "learning_rate": 9.82662581364561e-07, |
| "loss": 0.33044931292533875, |
| "step": 971, |
| "token_acc": 0.911062906724512 |
| }, |
| { |
| "epoch": 0.20506329113924052, |
| "grad_norm": 1.09375, |
| "learning_rate": 9.826184141475165e-07, |
| "loss": 0.3272978961467743, |
| "step": 972, |
| "token_acc": 0.9140117537890504 |
| }, |
| { |
| "epoch": 0.20527426160337553, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.825741917389717e-07, |
| "loss": 0.21767356991767883, |
| "step": 973, |
| "token_acc": 0.9358974358974359 |
| }, |
| { |
| "epoch": 0.20548523206751054, |
| "grad_norm": 0.734375, |
| "learning_rate": 9.825299141439835e-07, |
| "loss": 0.28333914279937744, |
| "step": 974, |
| "token_acc": 0.9232728430436167 |
| }, |
| { |
| "epoch": 0.20569620253164558, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.824855813676157e-07, |
| "loss": 0.23762467503547668, |
| "step": 975, |
| "token_acc": 0.9316065192083819 |
| }, |
| { |
| "epoch": 0.2059071729957806, |
| "grad_norm": 0.80859375, |
| "learning_rate": 9.824411934149377e-07, |
| "loss": 0.2822648882865906, |
| "step": 976, |
| "token_acc": 0.9237147595356551 |
| }, |
| { |
| "epoch": 0.2061181434599156, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.823967502910259e-07, |
| "loss": 0.2508828043937683, |
| "step": 977, |
| "token_acc": 0.9297841726618705 |
| }, |
| { |
| "epoch": 0.20632911392405062, |
| "grad_norm": 0.625, |
| "learning_rate": 9.82352252000963e-07, |
| "loss": 0.2554951608181, |
| "step": 978, |
| "token_acc": 0.9255730872283418 |
| }, |
| { |
| "epoch": 0.20654008438818566, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.823076985498373e-07, |
| "loss": 0.2603085935115814, |
| "step": 979, |
| "token_acc": 0.927246790299572 |
| }, |
| { |
| "epoch": 0.20675105485232068, |
| "grad_norm": 0.75390625, |
| "learning_rate": 9.82263089942744e-07, |
| "loss": 0.27707576751708984, |
| "step": 980, |
| "token_acc": 0.9242250287026407 |
| }, |
| { |
| "epoch": 0.2069620253164557, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.822184261847847e-07, |
| "loss": 0.23693615198135376, |
| "step": 981, |
| "token_acc": 0.9334923948702655 |
| }, |
| { |
| "epoch": 0.20717299578059073, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.821737072810668e-07, |
| "loss": 0.2479907125234604, |
| "step": 982, |
| "token_acc": 0.9293939393939394 |
| }, |
| { |
| "epoch": 0.20738396624472574, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.821289332367043e-07, |
| "loss": 0.25571757555007935, |
| "step": 983, |
| "token_acc": 0.9304549405969285 |
| }, |
| { |
| "epoch": 0.20759493670886076, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.820841040568177e-07, |
| "loss": 0.2608758807182312, |
| "step": 984, |
| "token_acc": 0.9285266457680251 |
| }, |
| { |
| "epoch": 0.20780590717299577, |
| "grad_norm": 0.7578125, |
| "learning_rate": 9.820392197465335e-07, |
| "loss": 0.28490036725997925, |
| "step": 985, |
| "token_acc": 0.920461445051609 |
| }, |
| { |
| "epoch": 0.2080168776371308, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.819942803109844e-07, |
| "loss": 0.2503746449947357, |
| "step": 986, |
| "token_acc": 0.9277822689302075 |
| }, |
| { |
| "epoch": 0.20822784810126582, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.8194928575531e-07, |
| "loss": 0.2538071870803833, |
| "step": 987, |
| "token_acc": 0.9264251614714968 |
| }, |
| { |
| "epoch": 0.20843881856540084, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.819042360846554e-07, |
| "loss": 0.2641909718513489, |
| "step": 988, |
| "token_acc": 0.9284467713787086 |
| }, |
| { |
| "epoch": 0.20864978902953588, |
| "grad_norm": 0.75, |
| "learning_rate": 9.818591313041727e-07, |
| "loss": 0.2759447395801544, |
| "step": 989, |
| "token_acc": 0.9222654081066074 |
| }, |
| { |
| "epoch": 0.2088607594936709, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.818139714190198e-07, |
| "loss": 0.23161228001117706, |
| "step": 990, |
| "token_acc": 0.9333521604066648 |
| }, |
| { |
| "epoch": 0.2090717299578059, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.817687564343615e-07, |
| "loss": 0.2939218580722809, |
| "step": 991, |
| "token_acc": 0.9156313204276221 |
| }, |
| { |
| "epoch": 0.20928270042194091, |
| "grad_norm": 0.7265625, |
| "learning_rate": 9.817234863553681e-07, |
| "loss": 0.259197473526001, |
| "step": 992, |
| "token_acc": 0.9242243436754176 |
| }, |
| { |
| "epoch": 0.20949367088607596, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.816781611872167e-07, |
| "loss": 0.27298709750175476, |
| "step": 993, |
| "token_acc": 0.9176136363636364 |
| }, |
| { |
| "epoch": 0.20970464135021097, |
| "grad_norm": 0.6328125, |
| "learning_rate": 9.816327809350907e-07, |
| "loss": 0.2868914008140564, |
| "step": 994, |
| "token_acc": 0.9294675216057987 |
| }, |
| { |
| "epoch": 0.20991561181434598, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.815873456041797e-07, |
| "loss": 0.26026803255081177, |
| "step": 995, |
| "token_acc": 0.9263676432460461 |
| }, |
| { |
| "epoch": 0.21012658227848102, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.815418551996795e-07, |
| "loss": 0.2792215049266815, |
| "step": 996, |
| "token_acc": 0.9241547365214743 |
| }, |
| { |
| "epoch": 0.21033755274261604, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.814963097267925e-07, |
| "loss": 0.23070243000984192, |
| "step": 997, |
| "token_acc": 0.9354066985645934 |
| }, |
| { |
| "epoch": 0.21054852320675105, |
| "grad_norm": 0.640625, |
| "learning_rate": 9.814507091907271e-07, |
| "loss": 0.2509482502937317, |
| "step": 998, |
| "token_acc": 0.9299403078856425 |
| }, |
| { |
| "epoch": 0.2107594936708861, |
| "grad_norm": 0.73828125, |
| "learning_rate": 9.814050535966981e-07, |
| "loss": 0.24006497859954834, |
| "step": 999, |
| "token_acc": 0.9315068493150684 |
| }, |
| { |
| "epoch": 0.2109704641350211, |
| "grad_norm": 0.83984375, |
| "learning_rate": 9.813593429499268e-07, |
| "loss": 0.28949546813964844, |
| "step": 1000, |
| "token_acc": 0.9210992907801419 |
| }, |
| { |
| "epoch": 0.2109704641350211, |
| "eval_loss": 0.4336377680301666, |
| "eval_runtime": 245.5659, |
| "eval_samples_per_second": 137.254, |
| "eval_steps_per_second": 2.146, |
| "eval_token_acc": 0.8990386341200753, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 9480, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5165183244464292e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|