| { |
| "best_global_step": 3750, |
| "best_metric": 0.994845449924469, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-MxDNA/checkpoint-3750", |
| "epoch": 1.0635415927948373, |
| "eval_steps": 125, |
| "global_step": 3750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673356499539039, |
| "grad_norm": 8450.4345703125, |
| "loss": 876.9911, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000985992 |
| }, |
| { |
| "epoch": 0.0011346712999078079, |
| "grad_norm": 8980.888671875, |
| "loss": 779.4711, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.001968088 |
| }, |
| { |
| "epoch": 0.001702006949861712, |
| "grad_norm": 7489.92529296875, |
| "loss": 488.6157, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.002953808 |
| }, |
| { |
| "epoch": 0.0022693425998156157, |
| "grad_norm": 1952.1917724609375, |
| "loss": 237.0602, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.003935728 |
| }, |
| { |
| "epoch": 0.0028366782497695198, |
| "grad_norm": 1418.443603515625, |
| "loss": 159.0854, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.004916488 |
| }, |
| { |
| "epoch": 0.003404013899723424, |
| "grad_norm": 874.7195434570312, |
| "loss": 91.9563, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.005902792 |
| }, |
| { |
| "epoch": 0.003971349549677328, |
| "grad_norm": 1339.8248291015625, |
| "loss": 40.3366, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.0068856 |
| }, |
| { |
| "epoch": 0.0045386851996312315, |
| "grad_norm": 2936.7607421875, |
| "loss": 22.7436, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.007868248 |
| }, |
| { |
| "epoch": 0.005106020849585136, |
| "grad_norm": 1531.3807373046875, |
| "loss": 23.4797, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.008849296 |
| }, |
| { |
| "epoch": 0.0056733564995390395, |
| "grad_norm": 3027.4189453125, |
| "loss": 38.7379, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.009830984 |
| }, |
| { |
| "epoch": 0.006240692149492944, |
| "grad_norm": 2435.890625, |
| "loss": 26.2427, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.01081364 |
| }, |
| { |
| "epoch": 0.006808027799446848, |
| "grad_norm": 3217.990478515625, |
| "loss": 31.0263, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.01179036 |
| }, |
| { |
| "epoch": 0.007375363449400752, |
| "grad_norm": 3854.00634765625, |
| "loss": 33.8781, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.012774504 |
| }, |
| { |
| "epoch": 0.007942699099354656, |
| "grad_norm": 3197.489990234375, |
| "loss": 27.7927, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.013759992 |
| }, |
| { |
| "epoch": 0.00851003474930856, |
| "grad_norm": 3034.156494140625, |
| "loss": 37.9083, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.014740536 |
| }, |
| { |
| "epoch": 0.009077370399262463, |
| "grad_norm": 3040.314453125, |
| "loss": 34.0659, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.015725984 |
| }, |
| { |
| "epoch": 0.009644706049216368, |
| "grad_norm": 3065.5791015625, |
| "loss": 27.7768, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.016706864 |
| }, |
| { |
| "epoch": 0.010212041699170272, |
| "grad_norm": 2454.293701171875, |
| "loss": 35.1143, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.017688816 |
| }, |
| { |
| "epoch": 0.010779377349124175, |
| "grad_norm": 3100.7802734375, |
| "loss": 42.2603, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.018669072 |
| }, |
| { |
| "epoch": 0.011346712999078079, |
| "grad_norm": 2749.84423828125, |
| "loss": 39.3879, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.019652072 |
| }, |
| { |
| "epoch": 0.011914048649031984, |
| "grad_norm": 1519.9908447265625, |
| "loss": 35.0735, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.020633112 |
| }, |
| { |
| "epoch": 0.012481384298985888, |
| "grad_norm": 1474.4244384765625, |
| "loss": 25.8965, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.021616192 |
| }, |
| { |
| "epoch": 0.013048719948939792, |
| "grad_norm": 2962.500244140625, |
| "loss": 51.0784, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.022597288 |
| }, |
| { |
| "epoch": 0.013616055598893695, |
| "grad_norm": 2419.41455078125, |
| "loss": 43.0334, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.02357572 |
| }, |
| { |
| "epoch": 0.014183391248847599, |
| "grad_norm": 1267.87451171875, |
| "loss": 21.8063, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.024553376 |
| }, |
| { |
| "epoch": 0.014750726898801504, |
| "grad_norm": 1573.944091796875, |
| "loss": 52.9693, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.025536728 |
| }, |
| { |
| "epoch": 0.015318062548755408, |
| "grad_norm": 1509.650146484375, |
| "loss": 50.0825, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.026517 |
| }, |
| { |
| "epoch": 0.01588539819870931, |
| "grad_norm": 2334.765380859375, |
| "loss": 42.1982, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.027504728 |
| }, |
| { |
| "epoch": 0.016452733848663217, |
| "grad_norm": 1594.16259765625, |
| "loss": 39.0562, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.028485416 |
| }, |
| { |
| "epoch": 0.01702006949861712, |
| "grad_norm": 1628.082275390625, |
| "loss": 35.0488, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.029468696 |
| }, |
| { |
| "epoch": 0.017587405148571024, |
| "grad_norm": 2496.6455078125, |
| "loss": 49.4241, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.030453584 |
| }, |
| { |
| "epoch": 0.018154740798524926, |
| "grad_norm": 2521.721435546875, |
| "loss": 69.0275, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.031432864 |
| }, |
| { |
| "epoch": 0.01872207644847883, |
| "grad_norm": 2179.571533203125, |
| "loss": 63.1409, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.032418416 |
| }, |
| { |
| "epoch": 0.019289412098432736, |
| "grad_norm": 899.7137451171875, |
| "loss": 38.4131, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.033402136 |
| }, |
| { |
| "epoch": 0.01985674774838664, |
| "grad_norm": 2109.377685546875, |
| "loss": 51.0044, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.03438832 |
| }, |
| { |
| "epoch": 0.020424083398340544, |
| "grad_norm": 1649.1873779296875, |
| "loss": 32.1408, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.035374464 |
| }, |
| { |
| "epoch": 0.020991419048294446, |
| "grad_norm": 1807.994140625, |
| "loss": 28.8357, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.03635784 |
| }, |
| { |
| "epoch": 0.02155875469824835, |
| "grad_norm": 998.9485473632812, |
| "loss": 23.0343, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.037340248 |
| }, |
| { |
| "epoch": 0.022126090348202256, |
| "grad_norm": 2240.17578125, |
| "loss": 32.0397, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.038321968 |
| }, |
| { |
| "epoch": 0.022693425998156158, |
| "grad_norm": 1606.0067138671875, |
| "loss": 32.1776, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.039304992 |
| }, |
| { |
| "epoch": 0.023260761648110063, |
| "grad_norm": 1685.1015625, |
| "loss": 24.3428, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.040286808 |
| }, |
| { |
| "epoch": 0.02382809729806397, |
| "grad_norm": 1761.7890625, |
| "loss": 23.9261, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.041271776 |
| }, |
| { |
| "epoch": 0.02439543294801787, |
| "grad_norm": 2036.0982666015625, |
| "loss": 27.7196, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.042252784 |
| }, |
| { |
| "epoch": 0.024962768597971776, |
| "grad_norm": 1564.3870849609375, |
| "loss": 25.3722, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.04323596 |
| }, |
| { |
| "epoch": 0.025530104247925678, |
| "grad_norm": 1508.349853515625, |
| "loss": 18.4107, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.044218984 |
| }, |
| { |
| "epoch": 0.026097439897879583, |
| "grad_norm": 1955.011474609375, |
| "loss": 28.8456, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.045202144 |
| }, |
| { |
| "epoch": 0.02666477554783349, |
| "grad_norm": 1679.9423828125, |
| "loss": 23.6139, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.046192336 |
| }, |
| { |
| "epoch": 0.02723211119778739, |
| "grad_norm": 1517.5731201171875, |
| "loss": 42.145, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.047174312 |
| }, |
| { |
| "epoch": 0.027799446847741296, |
| "grad_norm": 1535.3076171875, |
| "loss": 31.9711, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.048158944 |
| }, |
| { |
| "epoch": 0.028366782497695198, |
| "grad_norm": 1475.2569580078125, |
| "loss": 37.645, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.04914364 |
| }, |
| { |
| "epoch": 0.028934118147649103, |
| "grad_norm": 1918.4088134765625, |
| "loss": 69.4053, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.050123488 |
| }, |
| { |
| "epoch": 0.02950145379760301, |
| "grad_norm": 1631.6231689453125, |
| "loss": 50.9725, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.051105512 |
| }, |
| { |
| "epoch": 0.03006878944755691, |
| "grad_norm": 1291.6376953125, |
| "loss": 22.6527, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.052091704 |
| }, |
| { |
| "epoch": 0.030636125097510816, |
| "grad_norm": 1224.9625244140625, |
| "loss": 60.2725, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.053074824 |
| }, |
| { |
| "epoch": 0.031203460747464717, |
| "grad_norm": 1218.2022705078125, |
| "loss": 75.8728, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.054057104 |
| }, |
| { |
| "epoch": 0.03177079639741862, |
| "grad_norm": 1761.8861083984375, |
| "loss": 61.6427, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.055039128 |
| }, |
| { |
| "epoch": 0.03233813204737253, |
| "grad_norm": 1482.4256591796875, |
| "loss": 35.3351, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.05602388 |
| }, |
| { |
| "epoch": 0.03290546769732643, |
| "grad_norm": 563.6399536132812, |
| "loss": 40.1461, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.057005376 |
| }, |
| { |
| "epoch": 0.03347280334728033, |
| "grad_norm": 1266.058837890625, |
| "loss": 24.0657, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.057985136 |
| }, |
| { |
| "epoch": 0.03404013899723424, |
| "grad_norm": 918.206298828125, |
| "loss": 23.9626, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.058968288 |
| }, |
| { |
| "epoch": 0.03460747464718814, |
| "grad_norm": 1495.7191162109375, |
| "loss": 19.798, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.05995348 |
| }, |
| { |
| "epoch": 0.03517481029714205, |
| "grad_norm": 1264.302734375, |
| "loss": 31.5342, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.060935832 |
| }, |
| { |
| "epoch": 0.035458478122119, |
| "eval_loss": 5.312118053436279, |
| "eval_runtime": 21.3065, |
| "step": 125, |
| "tokens_trained": 0.061426608 |
| }, |
| { |
| "epoch": 0.03574214594709595, |
| "grad_norm": 907.4861450195312, |
| "loss": 25.1262, |
| "lr": 0.00025, |
| "step": 126, |
| "tokens_trained": 0.061918184 |
| }, |
| { |
| "epoch": 0.03630948159704985, |
| "grad_norm": 1287.6158447265625, |
| "loss": 26.963, |
| "lr": 0.000254, |
| "step": 128, |
| "tokens_trained": 0.062902328 |
| }, |
| { |
| "epoch": 0.03687681724700376, |
| "grad_norm": 1260.570556640625, |
| "loss": 24.9633, |
| "lr": 0.00025800000000000004, |
| "step": 130, |
| "tokens_trained": 0.063883456 |
| }, |
| { |
| "epoch": 0.03744415289695766, |
| "grad_norm": 1436.82373046875, |
| "loss": 23.1028, |
| "lr": 0.000262, |
| "step": 132, |
| "tokens_trained": 0.06486748 |
| }, |
| { |
| "epoch": 0.03801148854691157, |
| "grad_norm": 812.9523315429688, |
| "loss": 20.5496, |
| "lr": 0.000266, |
| "step": 134, |
| "tokens_trained": 0.065847104 |
| }, |
| { |
| "epoch": 0.03857882419686547, |
| "grad_norm": 1336.5322265625, |
| "loss": 23.673, |
| "lr": 0.00027, |
| "step": 136, |
| "tokens_trained": 0.066829928 |
| }, |
| { |
| "epoch": 0.03914615984681937, |
| "grad_norm": 1381.282470703125, |
| "loss": 32.0373, |
| "lr": 0.00027400000000000005, |
| "step": 138, |
| "tokens_trained": 0.067814024 |
| }, |
| { |
| "epoch": 0.03971349549677328, |
| "grad_norm": 972.7861938476562, |
| "loss": 26.9454, |
| "lr": 0.00027800000000000004, |
| "step": 140, |
| "tokens_trained": 0.068797744 |
| }, |
| { |
| "epoch": 0.04028083114672718, |
| "grad_norm": 1347.2249755859375, |
| "loss": 22.3578, |
| "lr": 0.00028199999999999997, |
| "step": 142, |
| "tokens_trained": 0.069780072 |
| }, |
| { |
| "epoch": 0.04084816679668109, |
| "grad_norm": 829.525390625, |
| "loss": 37.9879, |
| "lr": 0.00028599999999999996, |
| "step": 144, |
| "tokens_trained": 0.070759896 |
| }, |
| { |
| "epoch": 0.04141550244663499, |
| "grad_norm": 1094.1033935546875, |
| "loss": 21.1972, |
| "lr": 0.00029, |
| "step": 146, |
| "tokens_trained": 0.0717452 |
| }, |
| { |
| "epoch": 0.04198283809658889, |
| "grad_norm": 717.107421875, |
| "loss": 21.7774, |
| "lr": 0.000294, |
| "step": 148, |
| "tokens_trained": 0.072727432 |
| }, |
| { |
| "epoch": 0.042550173746542796, |
| "grad_norm": 744.4456787109375, |
| "loss": 20.3235, |
| "lr": 0.000298, |
| "step": 150, |
| "tokens_trained": 0.073712128 |
| }, |
| { |
| "epoch": 0.0431175093964967, |
| "grad_norm": 904.1460571289062, |
| "loss": 22.7878, |
| "lr": 0.000302, |
| "step": 152, |
| "tokens_trained": 0.074695296 |
| }, |
| { |
| "epoch": 0.04368484504645061, |
| "grad_norm": 1352.303955078125, |
| "loss": 20.9757, |
| "lr": 0.000306, |
| "step": 154, |
| "tokens_trained": 0.0756798 |
| }, |
| { |
| "epoch": 0.04425218069640451, |
| "grad_norm": 997.0473022460938, |
| "loss": 17.4647, |
| "lr": 0.00031, |
| "step": 156, |
| "tokens_trained": 0.076666504 |
| }, |
| { |
| "epoch": 0.04481951634635841, |
| "grad_norm": 1206.387939453125, |
| "loss": 21.1846, |
| "lr": 0.000314, |
| "step": 158, |
| "tokens_trained": 0.07764868 |
| }, |
| { |
| "epoch": 0.045386851996312316, |
| "grad_norm": 1029.6807861328125, |
| "loss": 17.8853, |
| "lr": 0.00031800000000000003, |
| "step": 160, |
| "tokens_trained": 0.07863548 |
| }, |
| { |
| "epoch": 0.04595418764626622, |
| "grad_norm": 1136.4635009765625, |
| "loss": 30.057, |
| "lr": 0.000322, |
| "step": 162, |
| "tokens_trained": 0.079618928 |
| }, |
| { |
| "epoch": 0.04652152329622013, |
| "grad_norm": 834.3464965820312, |
| "loss": 28.1782, |
| "lr": 0.000326, |
| "step": 164, |
| "tokens_trained": 0.0806032 |
| }, |
| { |
| "epoch": 0.04708885894617403, |
| "grad_norm": 1177.8365478515625, |
| "loss": 16.4267, |
| "lr": 0.00033, |
| "step": 166, |
| "tokens_trained": 0.081583752 |
| }, |
| { |
| "epoch": 0.04765619459612794, |
| "grad_norm": 572.501708984375, |
| "loss": 16.5752, |
| "lr": 0.00033400000000000004, |
| "step": 168, |
| "tokens_trained": 0.082568184 |
| }, |
| { |
| "epoch": 0.048223530246081836, |
| "grad_norm": 437.6822814941406, |
| "loss": 11.5509, |
| "lr": 0.00033800000000000003, |
| "step": 170, |
| "tokens_trained": 0.083553352 |
| }, |
| { |
| "epoch": 0.04879086589603574, |
| "grad_norm": 1119.0416259765625, |
| "loss": 16.2689, |
| "lr": 0.000342, |
| "step": 172, |
| "tokens_trained": 0.084536352 |
| }, |
| { |
| "epoch": 0.04935820154598965, |
| "grad_norm": 895.4021606445312, |
| "loss": 12.6663, |
| "lr": 0.000346, |
| "step": 174, |
| "tokens_trained": 0.085517312 |
| }, |
| { |
| "epoch": 0.04992553719594355, |
| "grad_norm": 995.6289672851562, |
| "loss": 26.0663, |
| "lr": 0.00035, |
| "step": 176, |
| "tokens_trained": 0.086496088 |
| }, |
| { |
| "epoch": 0.05049287284589746, |
| "grad_norm": 839.6610717773438, |
| "loss": 21.5115, |
| "lr": 0.000354, |
| "step": 178, |
| "tokens_trained": 0.087480632 |
| }, |
| { |
| "epoch": 0.051060208495851356, |
| "grad_norm": 734.1155395507812, |
| "loss": 29.3287, |
| "lr": 0.000358, |
| "step": 180, |
| "tokens_trained": 0.088460408 |
| }, |
| { |
| "epoch": 0.05162754414580526, |
| "grad_norm": 721.4505615234375, |
| "loss": 26.0801, |
| "lr": 0.000362, |
| "step": 182, |
| "tokens_trained": 0.08944248 |
| }, |
| { |
| "epoch": 0.052194879795759166, |
| "grad_norm": 845.9672241210938, |
| "loss": 19.0639, |
| "lr": 0.000366, |
| "step": 184, |
| "tokens_trained": 0.090427832 |
| }, |
| { |
| "epoch": 0.05276221544571307, |
| "grad_norm": 1210.9969482421875, |
| "loss": 23.9036, |
| "lr": 0.00037, |
| "step": 186, |
| "tokens_trained": 0.091411504 |
| }, |
| { |
| "epoch": 0.05332955109566698, |
| "grad_norm": 1079.1690673828125, |
| "loss": 23.5588, |
| "lr": 0.000374, |
| "step": 188, |
| "tokens_trained": 0.092392672 |
| }, |
| { |
| "epoch": 0.053896886745620876, |
| "grad_norm": 596.111328125, |
| "loss": 20.8275, |
| "lr": 0.000378, |
| "step": 190, |
| "tokens_trained": 0.093374696 |
| }, |
| { |
| "epoch": 0.05446422239557478, |
| "grad_norm": 761.8096923828125, |
| "loss": 22.512, |
| "lr": 0.000382, |
| "step": 192, |
| "tokens_trained": 0.094361912 |
| }, |
| { |
| "epoch": 0.055031558045528686, |
| "grad_norm": 1081.9832763671875, |
| "loss": 32.335, |
| "lr": 0.000386, |
| "step": 194, |
| "tokens_trained": 0.095342992 |
| }, |
| { |
| "epoch": 0.05559889369548259, |
| "grad_norm": 304.3534240722656, |
| "loss": 11.5275, |
| "lr": 0.00039000000000000005, |
| "step": 196, |
| "tokens_trained": 0.096323512 |
| }, |
| { |
| "epoch": 0.0561662293454365, |
| "grad_norm": 586.6314086914062, |
| "loss": 16.2663, |
| "lr": 0.00039400000000000004, |
| "step": 198, |
| "tokens_trained": 0.097308864 |
| }, |
| { |
| "epoch": 0.056733564995390395, |
| "grad_norm": 624.9953002929688, |
| "loss": 16.627, |
| "lr": 0.000398, |
| "step": 200, |
| "tokens_trained": 0.098289064 |
| }, |
| { |
| "epoch": 0.0573009006453443, |
| "grad_norm": 585.9645385742188, |
| "loss": 15.8359, |
| "lr": 0.000402, |
| "step": 202, |
| "tokens_trained": 0.099269696 |
| }, |
| { |
| "epoch": 0.057868236295298206, |
| "grad_norm": 537.9913330078125, |
| "loss": 20.0779, |
| "lr": 0.00040600000000000006, |
| "step": 204, |
| "tokens_trained": 0.100248448 |
| }, |
| { |
| "epoch": 0.05843557194525211, |
| "grad_norm": 805.04931640625, |
| "loss": 21.4524, |
| "lr": 0.00041, |
| "step": 206, |
| "tokens_trained": 0.101231248 |
| }, |
| { |
| "epoch": 0.05900290759520602, |
| "grad_norm": 439.1418151855469, |
| "loss": 23.9852, |
| "lr": 0.000414, |
| "step": 208, |
| "tokens_trained": 0.102210688 |
| }, |
| { |
| "epoch": 0.059570243245159915, |
| "grad_norm": 502.684814453125, |
| "loss": 17.6273, |
| "lr": 0.00041799999999999997, |
| "step": 210, |
| "tokens_trained": 0.103192176 |
| }, |
| { |
| "epoch": 0.06013757889511382, |
| "grad_norm": 849.9979858398438, |
| "loss": 33.7517, |
| "lr": 0.000422, |
| "step": 212, |
| "tokens_trained": 0.104172824 |
| }, |
| { |
| "epoch": 0.060704914545067726, |
| "grad_norm": 939.583740234375, |
| "loss": 26.2559, |
| "lr": 0.000426, |
| "step": 214, |
| "tokens_trained": 0.105156672 |
| }, |
| { |
| "epoch": 0.06127225019502163, |
| "grad_norm": 525.0505981445312, |
| "loss": 20.0923, |
| "lr": 0.00043, |
| "step": 216, |
| "tokens_trained": 0.106141368 |
| }, |
| { |
| "epoch": 0.061839585844975536, |
| "grad_norm": 420.296630859375, |
| "loss": 17.9608, |
| "lr": 0.00043400000000000003, |
| "step": 218, |
| "tokens_trained": 0.107124088 |
| }, |
| { |
| "epoch": 0.062406921494929435, |
| "grad_norm": 711.3380737304688, |
| "loss": 19.387, |
| "lr": 0.000438, |
| "step": 220, |
| "tokens_trained": 0.108112632 |
| }, |
| { |
| "epoch": 0.06297425714488335, |
| "grad_norm": 759.183349609375, |
| "loss": 17.8061, |
| "lr": 0.000442, |
| "step": 222, |
| "tokens_trained": 0.1090934 |
| }, |
| { |
| "epoch": 0.06354159279483725, |
| "grad_norm": 790.025146484375, |
| "loss": 13.8539, |
| "lr": 0.000446, |
| "step": 224, |
| "tokens_trained": 0.110079512 |
| }, |
| { |
| "epoch": 0.06410892844479114, |
| "grad_norm": 769.8306274414062, |
| "loss": 22.1258, |
| "lr": 0.00045000000000000004, |
| "step": 226, |
| "tokens_trained": 0.111060152 |
| }, |
| { |
| "epoch": 0.06467626409474506, |
| "grad_norm": 656.8352661132812, |
| "loss": 14.8646, |
| "lr": 0.00045400000000000003, |
| "step": 228, |
| "tokens_trained": 0.112044144 |
| }, |
| { |
| "epoch": 0.06524359974469895, |
| "grad_norm": 498.92010498046875, |
| "loss": 23.1558, |
| "lr": 0.000458, |
| "step": 230, |
| "tokens_trained": 0.113022928 |
| }, |
| { |
| "epoch": 0.06581093539465287, |
| "grad_norm": 764.0186157226562, |
| "loss": 16.7089, |
| "lr": 0.000462, |
| "step": 232, |
| "tokens_trained": 0.114003832 |
| }, |
| { |
| "epoch": 0.06637827104460677, |
| "grad_norm": 491.5793762207031, |
| "loss": 12.3979, |
| "lr": 0.00046600000000000005, |
| "step": 234, |
| "tokens_trained": 0.114991008 |
| }, |
| { |
| "epoch": 0.06694560669456066, |
| "grad_norm": 679.9217529296875, |
| "loss": 14.9037, |
| "lr": 0.00047, |
| "step": 236, |
| "tokens_trained": 0.115971888 |
| }, |
| { |
| "epoch": 0.06751294234451458, |
| "grad_norm": 491.0369567871094, |
| "loss": 7.7603, |
| "lr": 0.000474, |
| "step": 238, |
| "tokens_trained": 0.116952616 |
| }, |
| { |
| "epoch": 0.06808027799446847, |
| "grad_norm": 369.2186279296875, |
| "loss": 8.2256, |
| "lr": 0.00047799999999999996, |
| "step": 240, |
| "tokens_trained": 0.117935816 |
| }, |
| { |
| "epoch": 0.06864761364442239, |
| "grad_norm": 312.72137451171875, |
| "loss": 7.5486, |
| "lr": 0.000482, |
| "step": 242, |
| "tokens_trained": 0.118919392 |
| }, |
| { |
| "epoch": 0.06921494929437629, |
| "grad_norm": 596.1439208984375, |
| "loss": 11.7351, |
| "lr": 0.000486, |
| "step": 244, |
| "tokens_trained": 0.119901856 |
| }, |
| { |
| "epoch": 0.06978228494433018, |
| "grad_norm": 467.5667419433594, |
| "loss": 11.8403, |
| "lr": 0.00049, |
| "step": 246, |
| "tokens_trained": 0.120884624 |
| }, |
| { |
| "epoch": 0.0703496205942841, |
| "grad_norm": 430.50048828125, |
| "loss": 13.8081, |
| "lr": 0.000494, |
| "step": 248, |
| "tokens_trained": 0.121869224 |
| }, |
| { |
| "epoch": 0.070916956244238, |
| "grad_norm": 522.242919921875, |
| "loss": 14.1892, |
| "lr": 0.000498, |
| "step": 250, |
| "tokens_trained": 0.122853584 |
| }, |
| { |
| "epoch": 0.070916956244238, |
| "eval_loss": 1.9294606447219849, |
| "eval_runtime": 20.4162, |
| "step": 250, |
| "tokens_trained": 0.122853584 |
| }, |
| { |
| "epoch": 0.0714842918941919, |
| "grad_norm": 835.2765502929688, |
| "loss": 13.2462, |
| "lr": 0.0005020000000000001, |
| "step": 252, |
| "tokens_trained": 0.123835544 |
| }, |
| { |
| "epoch": 0.0720516275441458, |
| "grad_norm": 714.8098754882812, |
| "loss": 20.0498, |
| "lr": 0.000506, |
| "step": 254, |
| "tokens_trained": 0.124821616 |
| }, |
| { |
| "epoch": 0.0726189631940997, |
| "grad_norm": 701.512939453125, |
| "loss": 18.3664, |
| "lr": 0.00051, |
| "step": 256, |
| "tokens_trained": 0.125807608 |
| }, |
| { |
| "epoch": 0.07318629884405362, |
| "grad_norm": 773.987060546875, |
| "loss": 21.3807, |
| "lr": 0.000514, |
| "step": 258, |
| "tokens_trained": 0.126791464 |
| }, |
| { |
| "epoch": 0.07375363449400751, |
| "grad_norm": 826.422119140625, |
| "loss": 22.6403, |
| "lr": 0.000518, |
| "step": 260, |
| "tokens_trained": 0.127771752 |
| }, |
| { |
| "epoch": 0.07432097014396143, |
| "grad_norm": 742.8673095703125, |
| "loss": 20.1504, |
| "lr": 0.000522, |
| "step": 262, |
| "tokens_trained": 0.128755448 |
| }, |
| { |
| "epoch": 0.07488830579391532, |
| "grad_norm": 797.79296875, |
| "loss": 26.7343, |
| "lr": 0.000526, |
| "step": 264, |
| "tokens_trained": 0.129741088 |
| }, |
| { |
| "epoch": 0.07545564144386922, |
| "grad_norm": 673.9141235351562, |
| "loss": 12.505, |
| "lr": 0.0005300000000000001, |
| "step": 266, |
| "tokens_trained": 0.130727504 |
| }, |
| { |
| "epoch": 0.07602297709382314, |
| "grad_norm": 310.6510925292969, |
| "loss": 12.6344, |
| "lr": 0.0005340000000000001, |
| "step": 268, |
| "tokens_trained": 0.131710296 |
| }, |
| { |
| "epoch": 0.07659031274377703, |
| "grad_norm": 312.40966796875, |
| "loss": 14.254, |
| "lr": 0.0005380000000000001, |
| "step": 270, |
| "tokens_trained": 0.132695352 |
| }, |
| { |
| "epoch": 0.07715764839373095, |
| "grad_norm": 492.2834777832031, |
| "loss": 19.0979, |
| "lr": 0.0005420000000000001, |
| "step": 272, |
| "tokens_trained": 0.133677928 |
| }, |
| { |
| "epoch": 0.07772498404368484, |
| "grad_norm": 628.457763671875, |
| "loss": 21.7735, |
| "lr": 0.000546, |
| "step": 274, |
| "tokens_trained": 0.134655504 |
| }, |
| { |
| "epoch": 0.07829231969363874, |
| "grad_norm": 382.8389892578125, |
| "loss": 12.5128, |
| "lr": 0.00055, |
| "step": 276, |
| "tokens_trained": 0.135640208 |
| }, |
| { |
| "epoch": 0.07885965534359266, |
| "grad_norm": 483.12335205078125, |
| "loss": 15.2589, |
| "lr": 0.000554, |
| "step": 278, |
| "tokens_trained": 0.136624232 |
| }, |
| { |
| "epoch": 0.07942699099354655, |
| "grad_norm": 640.658447265625, |
| "loss": 12.1341, |
| "lr": 0.000558, |
| "step": 280, |
| "tokens_trained": 0.13760628 |
| }, |
| { |
| "epoch": 0.07999432664350047, |
| "grad_norm": 410.0824279785156, |
| "loss": 12.5723, |
| "lr": 0.0005620000000000001, |
| "step": 282, |
| "tokens_trained": 0.13858832 |
| }, |
| { |
| "epoch": 0.08056166229345436, |
| "grad_norm": 513.2861328125, |
| "loss": 14.8461, |
| "lr": 0.000566, |
| "step": 284, |
| "tokens_trained": 0.139568424 |
| }, |
| { |
| "epoch": 0.08112899794340826, |
| "grad_norm": 564.547607421875, |
| "loss": 12.5792, |
| "lr": 0.00057, |
| "step": 286, |
| "tokens_trained": 0.140557016 |
| }, |
| { |
| "epoch": 0.08169633359336217, |
| "grad_norm": 451.3592834472656, |
| "loss": 16.5433, |
| "lr": 0.000574, |
| "step": 288, |
| "tokens_trained": 0.141540248 |
| }, |
| { |
| "epoch": 0.08226366924331607, |
| "grad_norm": 404.2495422363281, |
| "loss": 16.4138, |
| "lr": 0.000578, |
| "step": 290, |
| "tokens_trained": 0.142528272 |
| }, |
| { |
| "epoch": 0.08283100489326999, |
| "grad_norm": 566.5219116210938, |
| "loss": 16.4743, |
| "lr": 0.0005819999999999999, |
| "step": 292, |
| "tokens_trained": 0.143513096 |
| }, |
| { |
| "epoch": 0.08339834054322388, |
| "grad_norm": 559.6517333984375, |
| "loss": 16.421, |
| "lr": 0.0005859999999999999, |
| "step": 294, |
| "tokens_trained": 0.144494472 |
| }, |
| { |
| "epoch": 0.08396567619317778, |
| "grad_norm": 260.874755859375, |
| "loss": 11.2214, |
| "lr": 0.00059, |
| "step": 296, |
| "tokens_trained": 0.14547876 |
| }, |
| { |
| "epoch": 0.0845330118431317, |
| "grad_norm": 272.02899169921875, |
| "loss": 10.3491, |
| "lr": 0.000594, |
| "step": 298, |
| "tokens_trained": 0.146465864 |
| }, |
| { |
| "epoch": 0.08510034749308559, |
| "grad_norm": 556.9845581054688, |
| "loss": 10.4348, |
| "lr": 0.000598, |
| "step": 300, |
| "tokens_trained": 0.147446344 |
| }, |
| { |
| "epoch": 0.0856676831430395, |
| "grad_norm": 273.35772705078125, |
| "loss": 8.3292, |
| "lr": 0.000602, |
| "step": 302, |
| "tokens_trained": 0.14843244 |
| }, |
| { |
| "epoch": 0.0862350187929934, |
| "grad_norm": 246.6316680908203, |
| "loss": 9.9362, |
| "lr": 0.000606, |
| "step": 304, |
| "tokens_trained": 0.149415976 |
| }, |
| { |
| "epoch": 0.0868023544429473, |
| "grad_norm": 564.4365844726562, |
| "loss": 9.2621, |
| "lr": 0.00061, |
| "step": 306, |
| "tokens_trained": 0.150398728 |
| }, |
| { |
| "epoch": 0.08736969009290121, |
| "grad_norm": 396.0948791503906, |
| "loss": 11.8526, |
| "lr": 0.000614, |
| "step": 308, |
| "tokens_trained": 0.151385104 |
| }, |
| { |
| "epoch": 0.08793702574285511, |
| "grad_norm": 488.6072692871094, |
| "loss": 11.8473, |
| "lr": 0.0006180000000000001, |
| "step": 310, |
| "tokens_trained": 0.152373672 |
| }, |
| { |
| "epoch": 0.08850436139280903, |
| "grad_norm": 346.70660400390625, |
| "loss": 12.0897, |
| "lr": 0.000622, |
| "step": 312, |
| "tokens_trained": 0.153356256 |
| }, |
| { |
| "epoch": 0.08907169704276292, |
| "grad_norm": 382.40679931640625, |
| "loss": 9.271, |
| "lr": 0.000626, |
| "step": 314, |
| "tokens_trained": 0.154342632 |
| }, |
| { |
| "epoch": 0.08963903269271682, |
| "grad_norm": 288.7908935546875, |
| "loss": 9.185, |
| "lr": 0.00063, |
| "step": 316, |
| "tokens_trained": 0.1553238 |
| }, |
| { |
| "epoch": 0.09020636834267073, |
| "grad_norm": 337.5335388183594, |
| "loss": 12.0555, |
| "lr": 0.000634, |
| "step": 318, |
| "tokens_trained": 0.156313168 |
| }, |
| { |
| "epoch": 0.09077370399262463, |
| "grad_norm": 349.25531005859375, |
| "loss": 8.51, |
| "lr": 0.000638, |
| "step": 320, |
| "tokens_trained": 0.157299448 |
| }, |
| { |
| "epoch": 0.09134103964257854, |
| "grad_norm": 471.7824401855469, |
| "loss": 14.1888, |
| "lr": 0.000642, |
| "step": 322, |
| "tokens_trained": 0.158285264 |
| }, |
| { |
| "epoch": 0.09190837529253244, |
| "grad_norm": 284.94036865234375, |
| "loss": 10.1593, |
| "lr": 0.000646, |
| "step": 324, |
| "tokens_trained": 0.159267512 |
| }, |
| { |
| "epoch": 0.09247571094248634, |
| "grad_norm": 510.90478515625, |
| "loss": 13.5744, |
| "lr": 0.0006500000000000001, |
| "step": 326, |
| "tokens_trained": 0.160250856 |
| }, |
| { |
| "epoch": 0.09304304659244025, |
| "grad_norm": 373.82965087890625, |
| "loss": 8.4999, |
| "lr": 0.0006540000000000001, |
| "step": 328, |
| "tokens_trained": 0.161231832 |
| }, |
| { |
| "epoch": 0.09361038224239415, |
| "grad_norm": 219.3827362060547, |
| "loss": 8.4436, |
| "lr": 0.0006580000000000001, |
| "step": 330, |
| "tokens_trained": 0.162217656 |
| }, |
| { |
| "epoch": 0.09417771789234806, |
| "grad_norm": 433.0914001464844, |
| "loss": 11.2019, |
| "lr": 0.000662, |
| "step": 332, |
| "tokens_trained": 0.163199096 |
| }, |
| { |
| "epoch": 0.09474505354230196, |
| "grad_norm": 242.65907287597656, |
| "loss": 9.0666, |
| "lr": 0.000666, |
| "step": 334, |
| "tokens_trained": 0.164178512 |
| }, |
| { |
| "epoch": 0.09531238919225588, |
| "grad_norm": 446.07916259765625, |
| "loss": 8.6546, |
| "lr": 0.00067, |
| "step": 336, |
| "tokens_trained": 0.165162464 |
| }, |
| { |
| "epoch": 0.09587972484220977, |
| "grad_norm": 231.8892364501953, |
| "loss": 7.5819, |
| "lr": 0.000674, |
| "step": 338, |
| "tokens_trained": 0.166141536 |
| }, |
| { |
| "epoch": 0.09644706049216367, |
| "grad_norm": 100.7306137084961, |
| "loss": 6.7047, |
| "lr": 0.0006780000000000001, |
| "step": 340, |
| "tokens_trained": 0.167123944 |
| }, |
| { |
| "epoch": 0.09701439614211758, |
| "grad_norm": 78.11279296875, |
| "loss": 5.9308, |
| "lr": 0.0006820000000000001, |
| "step": 342, |
| "tokens_trained": 0.168105264 |
| }, |
| { |
| "epoch": 0.09758173179207148, |
| "grad_norm": 271.466064453125, |
| "loss": 6.9141, |
| "lr": 0.0006860000000000001, |
| "step": 344, |
| "tokens_trained": 0.169088912 |
| }, |
| { |
| "epoch": 0.0981490674420254, |
| "grad_norm": 252.54478454589844, |
| "loss": 6.3281, |
| "lr": 0.00069, |
| "step": 346, |
| "tokens_trained": 0.170077368 |
| }, |
| { |
| "epoch": 0.0987164030919793, |
| "grad_norm": 305.8559875488281, |
| "loss": 6.443, |
| "lr": 0.000694, |
| "step": 348, |
| "tokens_trained": 0.171057232 |
| }, |
| { |
| "epoch": 0.09928373874193319, |
| "grad_norm": 227.74374389648438, |
| "loss": 6.552, |
| "lr": 0.0006979999999999999, |
| "step": 350, |
| "tokens_trained": 0.172041376 |
| }, |
| { |
| "epoch": 0.0998510743918871, |
| "grad_norm": 446.7601623535156, |
| "loss": 10.8184, |
| "lr": 0.0007019999999999999, |
| "step": 352, |
| "tokens_trained": 0.173023624 |
| }, |
| { |
| "epoch": 0.100418410041841, |
| "grad_norm": 353.0849609375, |
| "loss": 8.6327, |
| "lr": 0.0007059999999999999, |
| "step": 354, |
| "tokens_trained": 0.174005992 |
| }, |
| { |
| "epoch": 0.10098574569179491, |
| "grad_norm": 367.9427185058594, |
| "loss": 9.3898, |
| "lr": 0.00071, |
| "step": 356, |
| "tokens_trained": 0.174988304 |
| }, |
| { |
| "epoch": 0.10155308134174881, |
| "grad_norm": 224.4961700439453, |
| "loss": 8.284, |
| "lr": 0.000714, |
| "step": 358, |
| "tokens_trained": 0.175969816 |
| }, |
| { |
| "epoch": 0.10212041699170271, |
| "grad_norm": 221.86537170410156, |
| "loss": 7.0578, |
| "lr": 0.000718, |
| "step": 360, |
| "tokens_trained": 0.176952688 |
| }, |
| { |
| "epoch": 0.10268775264165662, |
| "grad_norm": 331.0989685058594, |
| "loss": 6.9561, |
| "lr": 0.000722, |
| "step": 362, |
| "tokens_trained": 0.177935144 |
| }, |
| { |
| "epoch": 0.10325508829161052, |
| "grad_norm": 171.6498260498047, |
| "loss": 7.203, |
| "lr": 0.000726, |
| "step": 364, |
| "tokens_trained": 0.178916776 |
| }, |
| { |
| "epoch": 0.10382242394156443, |
| "grad_norm": 284.2208557128906, |
| "loss": 10.3517, |
| "lr": 0.00073, |
| "step": 366, |
| "tokens_trained": 0.179903432 |
| }, |
| { |
| "epoch": 0.10438975959151833, |
| "grad_norm": 354.8574523925781, |
| "loss": 9.3888, |
| "lr": 0.000734, |
| "step": 368, |
| "tokens_trained": 0.180883224 |
| }, |
| { |
| "epoch": 0.10495709524147223, |
| "grad_norm": 344.82574462890625, |
| "loss": 10.5933, |
| "lr": 0.000738, |
| "step": 370, |
| "tokens_trained": 0.181863808 |
| }, |
| { |
| "epoch": 0.10552443089142614, |
| "grad_norm": 302.6838073730469, |
| "loss": 10.2832, |
| "lr": 0.000742, |
| "step": 372, |
| "tokens_trained": 0.182843712 |
| }, |
| { |
| "epoch": 0.10609176654138004, |
| "grad_norm": 323.0387878417969, |
| "loss": 6.4864, |
| "lr": 0.000746, |
| "step": 374, |
| "tokens_trained": 0.183825832 |
| }, |
| { |
| "epoch": 0.10637543436635699, |
| "eval_loss": 1.4430732727050781, |
| "eval_runtime": 20.5468, |
| "step": 375, |
| "tokens_trained": 0.184317744 |
| }, |
| { |
| "epoch": 0.10665910219133395, |
| "grad_norm": 133.74822998046875, |
| "loss": 5.4176, |
| "lr": 0.00075, |
| "step": 376, |
| "tokens_trained": 0.184811352 |
| }, |
| { |
| "epoch": 0.10722643784128785, |
| "grad_norm": 180.3372344970703, |
| "loss": 5.5641, |
| "lr": 0.000754, |
| "step": 378, |
| "tokens_trained": 0.185792528 |
| }, |
| { |
| "epoch": 0.10779377349124175, |
| "grad_norm": 250.83999633789062, |
| "loss": 5.8612, |
| "lr": 0.000758, |
| "step": 380, |
| "tokens_trained": 0.186777112 |
| }, |
| { |
| "epoch": 0.10836110914119566, |
| "grad_norm": 293.51959228515625, |
| "loss": 6.0418, |
| "lr": 0.000762, |
| "step": 382, |
| "tokens_trained": 0.18775724 |
| }, |
| { |
| "epoch": 0.10892844479114956, |
| "grad_norm": 292.56207275390625, |
| "loss": 6.1812, |
| "lr": 0.0007660000000000001, |
| "step": 384, |
| "tokens_trained": 0.188733568 |
| }, |
| { |
| "epoch": 0.10949578044110347, |
| "grad_norm": 121.82467651367188, |
| "loss": 6.0855, |
| "lr": 0.0007700000000000001, |
| "step": 386, |
| "tokens_trained": 0.189718512 |
| }, |
| { |
| "epoch": 0.11006311609105737, |
| "grad_norm": 124.30497741699219, |
| "loss": 5.7734, |
| "lr": 0.0007740000000000001, |
| "step": 388, |
| "tokens_trained": 0.190703776 |
| }, |
| { |
| "epoch": 0.11063045174101127, |
| "grad_norm": 143.64004516601562, |
| "loss": 5.7641, |
| "lr": 0.000778, |
| "step": 390, |
| "tokens_trained": 0.191689888 |
| }, |
| { |
| "epoch": 0.11119778739096518, |
| "grad_norm": 160.06784057617188, |
| "loss": 5.6025, |
| "lr": 0.000782, |
| "step": 392, |
| "tokens_trained": 0.192673992 |
| }, |
| { |
| "epoch": 0.11176512304091908, |
| "grad_norm": 226.97988891601562, |
| "loss": 6.0049, |
| "lr": 0.000786, |
| "step": 394, |
| "tokens_trained": 0.193656272 |
| }, |
| { |
| "epoch": 0.112332458690873, |
| "grad_norm": 223.26898193359375, |
| "loss": 5.6972, |
| "lr": 0.00079, |
| "step": 396, |
| "tokens_trained": 0.194639144 |
| }, |
| { |
| "epoch": 0.11289979434082689, |
| "grad_norm": 249.34912109375, |
| "loss": 5.7348, |
| "lr": 0.0007940000000000001, |
| "step": 398, |
| "tokens_trained": 0.195621256 |
| }, |
| { |
| "epoch": 0.11346712999078079, |
| "grad_norm": 161.34271240234375, |
| "loss": 5.6689, |
| "lr": 0.0007980000000000001, |
| "step": 400, |
| "tokens_trained": 0.196604136 |
| }, |
| { |
| "epoch": 0.1140344656407347, |
| "grad_norm": 148.53176879882812, |
| "loss": 5.702, |
| "lr": 0.0008020000000000001, |
| "step": 402, |
| "tokens_trained": 0.197586784 |
| }, |
| { |
| "epoch": 0.1146018012906886, |
| "grad_norm": 144.40835571289062, |
| "loss": 6.2402, |
| "lr": 0.0008060000000000001, |
| "step": 404, |
| "tokens_trained": 0.198570824 |
| }, |
| { |
| "epoch": 0.11516913694064251, |
| "grad_norm": 306.57562255859375, |
| "loss": 7.1739, |
| "lr": 0.0008100000000000001, |
| "step": 406, |
| "tokens_trained": 0.199548328 |
| }, |
| { |
| "epoch": 0.11573647259059641, |
| "grad_norm": 308.79180908203125, |
| "loss": 6.0972, |
| "lr": 0.0008139999999999999, |
| "step": 408, |
| "tokens_trained": 0.200532496 |
| }, |
| { |
| "epoch": 0.11630380824055031, |
| "grad_norm": 197.76791381835938, |
| "loss": 6.3533, |
| "lr": 0.0008179999999999999, |
| "step": 410, |
| "tokens_trained": 0.201514648 |
| }, |
| { |
| "epoch": 0.11687114389050422, |
| "grad_norm": 129.5694580078125, |
| "loss": 6.9628, |
| "lr": 0.0008219999999999999, |
| "step": 412, |
| "tokens_trained": 0.2024994 |
| }, |
| { |
| "epoch": 0.11743847954045812, |
| "grad_norm": 446.0195617675781, |
| "loss": 11.7562, |
| "lr": 0.000826, |
| "step": 414, |
| "tokens_trained": 0.20348012 |
| }, |
| { |
| "epoch": 0.11800581519041203, |
| "grad_norm": 355.5342712402344, |
| "loss": 8.8055, |
| "lr": 0.00083, |
| "step": 416, |
| "tokens_trained": 0.20446356 |
| }, |
| { |
| "epoch": 0.11857315084036593, |
| "grad_norm": 456.2491149902344, |
| "loss": 9.606, |
| "lr": 0.000834, |
| "step": 418, |
| "tokens_trained": 0.205445288 |
| }, |
| { |
| "epoch": 0.11914048649031983, |
| "grad_norm": 369.8676452636719, |
| "loss": 8.385, |
| "lr": 0.000838, |
| "step": 420, |
| "tokens_trained": 0.206427832 |
| }, |
| { |
| "epoch": 0.11970782214027374, |
| "grad_norm": 262.19073486328125, |
| "loss": 9.0956, |
| "lr": 0.000842, |
| "step": 422, |
| "tokens_trained": 0.207409848 |
| }, |
| { |
| "epoch": 0.12027515779022764, |
| "grad_norm": 120.3193130493164, |
| "loss": 5.4937, |
| "lr": 0.000846, |
| "step": 424, |
| "tokens_trained": 0.208391752 |
| }, |
| { |
| "epoch": 0.12084249344018155, |
| "grad_norm": 222.1111297607422, |
| "loss": 8.9367, |
| "lr": 0.00085, |
| "step": 426, |
| "tokens_trained": 0.20937384 |
| }, |
| { |
| "epoch": 0.12140982909013545, |
| "grad_norm": 137.16819763183594, |
| "loss": 7.5876, |
| "lr": 0.000854, |
| "step": 428, |
| "tokens_trained": 0.210358576 |
| }, |
| { |
| "epoch": 0.12197716474008935, |
| "grad_norm": 267.61846923828125, |
| "loss": 8.817, |
| "lr": 0.000858, |
| "step": 430, |
| "tokens_trained": 0.211340064 |
| }, |
| { |
| "epoch": 0.12254450039004326, |
| "grad_norm": 472.72906494140625, |
| "loss": 8.203, |
| "lr": 0.000862, |
| "step": 432, |
| "tokens_trained": 0.212321144 |
| }, |
| { |
| "epoch": 0.12311183603999716, |
| "grad_norm": 297.1420593261719, |
| "loss": 10.987, |
| "lr": 0.000866, |
| "step": 434, |
| "tokens_trained": 0.213300312 |
| }, |
| { |
| "epoch": 0.12367917168995107, |
| "grad_norm": 281.7297668457031, |
| "loss": 7.6117, |
| "lr": 0.00087, |
| "step": 436, |
| "tokens_trained": 0.214287624 |
| }, |
| { |
| "epoch": 0.12424650733990497, |
| "grad_norm": 203.09678649902344, |
| "loss": 6.5638, |
| "lr": 0.000874, |
| "step": 438, |
| "tokens_trained": 0.215272136 |
| }, |
| { |
| "epoch": 0.12481384298985887, |
| "grad_norm": 155.7823944091797, |
| "loss": 6.1131, |
| "lr": 0.000878, |
| "step": 440, |
| "tokens_trained": 0.216256392 |
| }, |
| { |
| "epoch": 0.12538117863981277, |
| "grad_norm": 189.86196899414062, |
| "loss": 8.2565, |
| "lr": 0.000882, |
| "step": 442, |
| "tokens_trained": 0.217242504 |
| }, |
| { |
| "epoch": 0.1259485142897667, |
| "grad_norm": 247.4568634033203, |
| "loss": 7.1005, |
| "lr": 0.0008860000000000001, |
| "step": 444, |
| "tokens_trained": 0.218226008 |
| }, |
| { |
| "epoch": 0.1265158499397206, |
| "grad_norm": 179.72825622558594, |
| "loss": 6.3379, |
| "lr": 0.0008900000000000001, |
| "step": 446, |
| "tokens_trained": 0.219210584 |
| }, |
| { |
| "epoch": 0.1270831855896745, |
| "grad_norm": 212.96356201171875, |
| "loss": 7.2514, |
| "lr": 0.000894, |
| "step": 448, |
| "tokens_trained": 0.220193952 |
| }, |
| { |
| "epoch": 0.1276505212396284, |
| "grad_norm": 105.67095947265625, |
| "loss": 5.456, |
| "lr": 0.000898, |
| "step": 450, |
| "tokens_trained": 0.221176936 |
| }, |
| { |
| "epoch": 0.1282178568895823, |
| "grad_norm": 302.9122619628906, |
| "loss": 6.4018, |
| "lr": 0.000902, |
| "step": 452, |
| "tokens_trained": 0.222161952 |
| }, |
| { |
| "epoch": 0.12878519253953621, |
| "grad_norm": 215.66561889648438, |
| "loss": 6.2853, |
| "lr": 0.000906, |
| "step": 454, |
| "tokens_trained": 0.223144912 |
| }, |
| { |
| "epoch": 0.1293525281894901, |
| "grad_norm": 272.9984130859375, |
| "loss": 7.3902, |
| "lr": 0.00091, |
| "step": 456, |
| "tokens_trained": 0.224127392 |
| }, |
| { |
| "epoch": 0.129919863839444, |
| "grad_norm": 200.7503662109375, |
| "loss": 6.1637, |
| "lr": 0.0009140000000000001, |
| "step": 458, |
| "tokens_trained": 0.22511648 |
| }, |
| { |
| "epoch": 0.1304871994893979, |
| "grad_norm": 93.23990631103516, |
| "loss": 6.4867, |
| "lr": 0.0009180000000000001, |
| "step": 460, |
| "tokens_trained": 0.226098144 |
| }, |
| { |
| "epoch": 0.1310545351393518, |
| "grad_norm": 274.37164306640625, |
| "loss": 8.99, |
| "lr": 0.0009220000000000001, |
| "step": 462, |
| "tokens_trained": 0.227081848 |
| }, |
| { |
| "epoch": 0.13162187078930573, |
| "grad_norm": 186.66322326660156, |
| "loss": 8.7122, |
| "lr": 0.0009260000000000001, |
| "step": 464, |
| "tokens_trained": 0.22806636 |
| }, |
| { |
| "epoch": 0.13218920643925963, |
| "grad_norm": 586.1035766601562, |
| "loss": 9.1045, |
| "lr": 0.00093, |
| "step": 466, |
| "tokens_trained": 0.229047872 |
| }, |
| { |
| "epoch": 0.13275654208921353, |
| "grad_norm": 227.55996704101562, |
| "loss": 9.7276, |
| "lr": 0.000934, |
| "step": 468, |
| "tokens_trained": 0.230031144 |
| }, |
| { |
| "epoch": 0.13332387773916743, |
| "grad_norm": 229.26609802246094, |
| "loss": 6.6244, |
| "lr": 0.0009379999999999999, |
| "step": 470, |
| "tokens_trained": 0.2310158 |
| }, |
| { |
| "epoch": 0.13389121338912133, |
| "grad_norm": 145.16331481933594, |
| "loss": 5.759, |
| "lr": 0.000942, |
| "step": 472, |
| "tokens_trained": 0.2319996 |
| }, |
| { |
| "epoch": 0.13445854903907525, |
| "grad_norm": 109.9937744140625, |
| "loss": 5.4838, |
| "lr": 0.000946, |
| "step": 474, |
| "tokens_trained": 0.232983808 |
| }, |
| { |
| "epoch": 0.13502588468902915, |
| "grad_norm": 135.74899291992188, |
| "loss": 6.2738, |
| "lr": 0.00095, |
| "step": 476, |
| "tokens_trained": 0.233963016 |
| }, |
| { |
| "epoch": 0.13559322033898305, |
| "grad_norm": 142.99449157714844, |
| "loss": 5.8459, |
| "lr": 0.000954, |
| "step": 478, |
| "tokens_trained": 0.234948864 |
| }, |
| { |
| "epoch": 0.13616055598893695, |
| "grad_norm": 198.66883850097656, |
| "loss": 6.6626, |
| "lr": 0.000958, |
| "step": 480, |
| "tokens_trained": 0.235932392 |
| }, |
| { |
| "epoch": 0.13672789163889085, |
| "grad_norm": 260.76507568359375, |
| "loss": 6.9299, |
| "lr": 0.000962, |
| "step": 482, |
| "tokens_trained": 0.236915664 |
| }, |
| { |
| "epoch": 0.13729522728884477, |
| "grad_norm": 267.97589111328125, |
| "loss": 6.4343, |
| "lr": 0.000966, |
| "step": 484, |
| "tokens_trained": 0.237896904 |
| }, |
| { |
| "epoch": 0.13786256293879867, |
| "grad_norm": 89.8781967163086, |
| "loss": 6.3203, |
| "lr": 0.0009699999999999999, |
| "step": 486, |
| "tokens_trained": 0.238874528 |
| }, |
| { |
| "epoch": 0.13842989858875257, |
| "grad_norm": 225.62985229492188, |
| "loss": 6.2778, |
| "lr": 0.000974, |
| "step": 488, |
| "tokens_trained": 0.2398588 |
| }, |
| { |
| "epoch": 0.13899723423870647, |
| "grad_norm": 85.84110260009766, |
| "loss": 5.2786, |
| "lr": 0.000978, |
| "step": 490, |
| "tokens_trained": 0.240839968 |
| }, |
| { |
| "epoch": 0.13956456988866037, |
| "grad_norm": 141.4368438720703, |
| "loss": 5.5525, |
| "lr": 0.000982, |
| "step": 492, |
| "tokens_trained": 0.241823544 |
| }, |
| { |
| "epoch": 0.1401319055386143, |
| "grad_norm": 94.9535140991211, |
| "loss": 5.4386, |
| "lr": 0.0009860000000000001, |
| "step": 494, |
| "tokens_trained": 0.242805456 |
| }, |
| { |
| "epoch": 0.1406992411885682, |
| "grad_norm": 157.4557647705078, |
| "loss": 5.9786, |
| "lr": 0.00099, |
| "step": 496, |
| "tokens_trained": 0.243792496 |
| }, |
| { |
| "epoch": 0.1412665768385221, |
| "grad_norm": 319.5025634765625, |
| "loss": 7.04, |
| "lr": 0.000994, |
| "step": 498, |
| "tokens_trained": 0.244772472 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "grad_norm": 282.26824951171875, |
| "loss": 9.4037, |
| "lr": 0.000998, |
| "step": 500, |
| "tokens_trained": 0.245758968 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "eval_loss": 2.152184247970581, |
| "eval_runtime": 21.2772, |
| "step": 500, |
| "tokens_trained": 0.245758968 |
| }, |
| { |
| "epoch": 0.1424012481384299, |
| "grad_norm": 306.0666809082031, |
| "loss": 7.8845, |
| "lr": 0.00099986013986014, |
| "step": 502, |
| "tokens_trained": 0.246739024 |
| }, |
| { |
| "epoch": 0.1429685837883838, |
| "grad_norm": 188.89024353027344, |
| "loss": 6.8118, |
| "lr": 0.0009995804195804196, |
| "step": 504, |
| "tokens_trained": 0.247726552 |
| }, |
| { |
| "epoch": 0.1435359194383377, |
| "grad_norm": 228.97474670410156, |
| "loss": 6.8475, |
| "lr": 0.0009993006993006994, |
| "step": 506, |
| "tokens_trained": 0.24870688 |
| }, |
| { |
| "epoch": 0.1441032550882916, |
| "grad_norm": 229.80029296875, |
| "loss": 6.2171, |
| "lr": 0.000999020979020979, |
| "step": 508, |
| "tokens_trained": 0.249689096 |
| }, |
| { |
| "epoch": 0.1446705907382455, |
| "grad_norm": 157.30340576171875, |
| "loss": 6.2281, |
| "lr": 0.0009987412587412587, |
| "step": 510, |
| "tokens_trained": 0.250671768 |
| }, |
| { |
| "epoch": 0.1452379263881994, |
| "grad_norm": 176.64683532714844, |
| "loss": 6.5993, |
| "lr": 0.0009984615384615386, |
| "step": 512, |
| "tokens_trained": 0.25165608 |
| }, |
| { |
| "epoch": 0.14580526203815333, |
| "grad_norm": 197.20526123046875, |
| "loss": 5.7267, |
| "lr": 0.0009981818181818182, |
| "step": 514, |
| "tokens_trained": 0.252639712 |
| }, |
| { |
| "epoch": 0.14637259768810723, |
| "grad_norm": 54.713260650634766, |
| "loss": 5.7911, |
| "lr": 0.000997902097902098, |
| "step": 516, |
| "tokens_trained": 0.253622816 |
| }, |
| { |
| "epoch": 0.14693993333806113, |
| "grad_norm": 185.74923706054688, |
| "loss": 7.0055, |
| "lr": 0.0009976223776223777, |
| "step": 518, |
| "tokens_trained": 0.254602792 |
| }, |
| { |
| "epoch": 0.14750726898801503, |
| "grad_norm": 240.31021118164062, |
| "loss": 6.452, |
| "lr": 0.0009973426573426573, |
| "step": 520, |
| "tokens_trained": 0.255584736 |
| }, |
| { |
| "epoch": 0.14807460463796893, |
| "grad_norm": 160.2477264404297, |
| "loss": 7.6556, |
| "lr": 0.000997062937062937, |
| "step": 522, |
| "tokens_trained": 0.256563792 |
| }, |
| { |
| "epoch": 0.14864194028792285, |
| "grad_norm": 283.0034484863281, |
| "loss": 6.5345, |
| "lr": 0.0009967832167832168, |
| "step": 524, |
| "tokens_trained": 0.257546656 |
| }, |
| { |
| "epoch": 0.14920927593787675, |
| "grad_norm": 245.537109375, |
| "loss": 6.3281, |
| "lr": 0.0009965034965034964, |
| "step": 526, |
| "tokens_trained": 0.258530832 |
| }, |
| { |
| "epoch": 0.14977661158783065, |
| "grad_norm": 162.1538848876953, |
| "loss": 7.4072, |
| "lr": 0.0009962237762237763, |
| "step": 528, |
| "tokens_trained": 0.259514528 |
| }, |
| { |
| "epoch": 0.15034394723778455, |
| "grad_norm": 107.25792694091797, |
| "loss": 5.356, |
| "lr": 0.000995944055944056, |
| "step": 530, |
| "tokens_trained": 0.260500912 |
| }, |
| { |
| "epoch": 0.15091128288773845, |
| "grad_norm": 173.73353576660156, |
| "loss": 6.8625, |
| "lr": 0.0009956643356643356, |
| "step": 532, |
| "tokens_trained": 0.26148632 |
| }, |
| { |
| "epoch": 0.15147861853769237, |
| "grad_norm": 178.33541870117188, |
| "loss": 5.8794, |
| "lr": 0.0009953846153846154, |
| "step": 534, |
| "tokens_trained": 0.262468816 |
| }, |
| { |
| "epoch": 0.15204595418764627, |
| "grad_norm": 181.2533416748047, |
| "loss": 7.0243, |
| "lr": 0.000995104895104895, |
| "step": 536, |
| "tokens_trained": 0.263446696 |
| }, |
| { |
| "epoch": 0.15261328983760017, |
| "grad_norm": 208.79293823242188, |
| "loss": 5.8908, |
| "lr": 0.000994825174825175, |
| "step": 538, |
| "tokens_trained": 0.26443108 |
| }, |
| { |
| "epoch": 0.15318062548755407, |
| "grad_norm": 148.66285705566406, |
| "loss": 6.0831, |
| "lr": 0.0009945454545454546, |
| "step": 540, |
| "tokens_trained": 0.265414496 |
| }, |
| { |
| "epoch": 0.15374796113750797, |
| "grad_norm": 165.044189453125, |
| "loss": 5.5594, |
| "lr": 0.0009942657342657344, |
| "step": 542, |
| "tokens_trained": 0.266394128 |
| }, |
| { |
| "epoch": 0.1543152967874619, |
| "grad_norm": 124.5405502319336, |
| "loss": 5.2442, |
| "lr": 0.000993986013986014, |
| "step": 544, |
| "tokens_trained": 0.267378768 |
| }, |
| { |
| "epoch": 0.1548826324374158, |
| "grad_norm": 68.66510772705078, |
| "loss": 5.1173, |
| "lr": 0.0009937062937062937, |
| "step": 546, |
| "tokens_trained": 0.268360184 |
| }, |
| { |
| "epoch": 0.1554499680873697, |
| "grad_norm": 57.052860260009766, |
| "loss": 5.2348, |
| "lr": 0.0009934265734265735, |
| "step": 548, |
| "tokens_trained": 0.269345672 |
| }, |
| { |
| "epoch": 0.1560173037373236, |
| "grad_norm": 184.9175567626953, |
| "loss": 6.7748, |
| "lr": 0.0009931468531468532, |
| "step": 550, |
| "tokens_trained": 0.2703288 |
| }, |
| { |
| "epoch": 0.15658463938727749, |
| "grad_norm": 72.9861831665039, |
| "loss": 5.7387, |
| "lr": 0.000992867132867133, |
| "step": 552, |
| "tokens_trained": 0.271309176 |
| }, |
| { |
| "epoch": 0.1571519750372314, |
| "grad_norm": 135.864501953125, |
| "loss": 6.3035, |
| "lr": 0.0009925874125874127, |
| "step": 554, |
| "tokens_trained": 0.27229644 |
| }, |
| { |
| "epoch": 0.1577193106871853, |
| "grad_norm": 130.579833984375, |
| "loss": 5.4434, |
| "lr": 0.0009923076923076923, |
| "step": 556, |
| "tokens_trained": 0.273277904 |
| }, |
| { |
| "epoch": 0.1582866463371392, |
| "grad_norm": 206.77345275878906, |
| "loss": 5.8649, |
| "lr": 0.000992027972027972, |
| "step": 558, |
| "tokens_trained": 0.274261712 |
| }, |
| { |
| "epoch": 0.1588539819870931, |
| "grad_norm": 144.0505828857422, |
| "loss": 5.3459, |
| "lr": 0.0009917482517482518, |
| "step": 560, |
| "tokens_trained": 0.2752468 |
| }, |
| { |
| "epoch": 0.159421317637047, |
| "grad_norm": 87.56634521484375, |
| "loss": 5.6321, |
| "lr": 0.0009914685314685314, |
| "step": 562, |
| "tokens_trained": 0.276232384 |
| }, |
| { |
| "epoch": 0.15998865328700093, |
| "grad_norm": 275.2727355957031, |
| "loss": 6.7515, |
| "lr": 0.0009911888111888113, |
| "step": 564, |
| "tokens_trained": 0.277211608 |
| }, |
| { |
| "epoch": 0.16055598893695483, |
| "grad_norm": 97.00019836425781, |
| "loss": 5.4374, |
| "lr": 0.000990909090909091, |
| "step": 566, |
| "tokens_trained": 0.278196336 |
| }, |
| { |
| "epoch": 0.16112332458690873, |
| "grad_norm": 102.91439056396484, |
| "loss": 5.729, |
| "lr": 0.0009906293706293705, |
| "step": 568, |
| "tokens_trained": 0.279175672 |
| }, |
| { |
| "epoch": 0.16169066023686263, |
| "grad_norm": 151.12432861328125, |
| "loss": 5.4189, |
| "lr": 0.0009903496503496504, |
| "step": 570, |
| "tokens_trained": 0.280161088 |
| }, |
| { |
| "epoch": 0.16225799588681653, |
| "grad_norm": 86.6823959350586, |
| "loss": 5.1704, |
| "lr": 0.00099006993006993, |
| "step": 572, |
| "tokens_trained": 0.28114256 |
| }, |
| { |
| "epoch": 0.16282533153677045, |
| "grad_norm": 90.7052230834961, |
| "loss": 5.3673, |
| "lr": 0.0009897902097902099, |
| "step": 574, |
| "tokens_trained": 0.282128904 |
| }, |
| { |
| "epoch": 0.16339266718672435, |
| "grad_norm": 146.92874145507812, |
| "loss": 5.5971, |
| "lr": 0.0009895104895104895, |
| "step": 576, |
| "tokens_trained": 0.28311528 |
| }, |
| { |
| "epoch": 0.16396000283667825, |
| "grad_norm": 189.76296997070312, |
| "loss": 5.3109, |
| "lr": 0.0009892307692307694, |
| "step": 578, |
| "tokens_trained": 0.284098528 |
| }, |
| { |
| "epoch": 0.16452733848663215, |
| "grad_norm": 174.48092651367188, |
| "loss": 5.68, |
| "lr": 0.000988951048951049, |
| "step": 580, |
| "tokens_trained": 0.285081064 |
| }, |
| { |
| "epoch": 0.16509467413658604, |
| "grad_norm": 154.10816955566406, |
| "loss": 5.3307, |
| "lr": 0.0009886713286713286, |
| "step": 582, |
| "tokens_trained": 0.286067952 |
| }, |
| { |
| "epoch": 0.16566200978653997, |
| "grad_norm": 64.28263092041016, |
| "loss": 5.1676, |
| "lr": 0.0009883916083916085, |
| "step": 584, |
| "tokens_trained": 0.287051384 |
| }, |
| { |
| "epoch": 0.16622934543649387, |
| "grad_norm": 103.81795501708984, |
| "loss": 5.3436, |
| "lr": 0.0009881118881118881, |
| "step": 586, |
| "tokens_trained": 0.28803284 |
| }, |
| { |
| "epoch": 0.16679668108644777, |
| "grad_norm": 144.0076904296875, |
| "loss": 5.3033, |
| "lr": 0.000987832167832168, |
| "step": 588, |
| "tokens_trained": 0.289014824 |
| }, |
| { |
| "epoch": 0.16736401673640167, |
| "grad_norm": 88.31237030029297, |
| "loss": 5.0609, |
| "lr": 0.0009875524475524476, |
| "step": 590, |
| "tokens_trained": 0.289999864 |
| }, |
| { |
| "epoch": 0.16793135238635556, |
| "grad_norm": 68.4583740234375, |
| "loss": 5.0702, |
| "lr": 0.0009872727272727273, |
| "step": 592, |
| "tokens_trained": 0.290983888 |
| }, |
| { |
| "epoch": 0.1684986880363095, |
| "grad_norm": 135.28665161132812, |
| "loss": 5.3962, |
| "lr": 0.000986993006993007, |
| "step": 594, |
| "tokens_trained": 0.291965752 |
| }, |
| { |
| "epoch": 0.1690660236862634, |
| "grad_norm": 80.0412368774414, |
| "loss": 5.0246, |
| "lr": 0.0009867132867132867, |
| "step": 596, |
| "tokens_trained": 0.292946952 |
| }, |
| { |
| "epoch": 0.1696333593362173, |
| "grad_norm": 43.29194641113281, |
| "loss": 5.0051, |
| "lr": 0.0009864335664335664, |
| "step": 598, |
| "tokens_trained": 0.293928976 |
| }, |
| { |
| "epoch": 0.17020069498617119, |
| "grad_norm": 220.88687133789062, |
| "loss": 6.0798, |
| "lr": 0.0009861538461538462, |
| "step": 600, |
| "tokens_trained": 0.294912408 |
| }, |
| { |
| "epoch": 0.17076803063612508, |
| "grad_norm": 102.58654022216797, |
| "loss": 5.1271, |
| "lr": 0.0009858741258741259, |
| "step": 602, |
| "tokens_trained": 0.29589416 |
| }, |
| { |
| "epoch": 0.171335366286079, |
| "grad_norm": 119.0067138671875, |
| "loss": 5.7402, |
| "lr": 0.0009855944055944055, |
| "step": 604, |
| "tokens_trained": 0.296878584 |
| }, |
| { |
| "epoch": 0.1719027019360329, |
| "grad_norm": 138.8656005859375, |
| "loss": 5.1951, |
| "lr": 0.0009853146853146854, |
| "step": 606, |
| "tokens_trained": 0.297864552 |
| }, |
| { |
| "epoch": 0.1724700375859868, |
| "grad_norm": 73.5890884399414, |
| "loss": 5.2522, |
| "lr": 0.000985034965034965, |
| "step": 608, |
| "tokens_trained": 0.298854088 |
| }, |
| { |
| "epoch": 0.1730373732359407, |
| "grad_norm": 113.78330993652344, |
| "loss": 5.6683, |
| "lr": 0.0009847552447552449, |
| "step": 610, |
| "tokens_trained": 0.299835024 |
| }, |
| { |
| "epoch": 0.1736047088858946, |
| "grad_norm": 125.20297241210938, |
| "loss": 5.1812, |
| "lr": 0.0009844755244755245, |
| "step": 612, |
| "tokens_trained": 0.30082032 |
| }, |
| { |
| "epoch": 0.17417204453584853, |
| "grad_norm": 67.46041870117188, |
| "loss": 5.0417, |
| "lr": 0.0009841958041958043, |
| "step": 614, |
| "tokens_trained": 0.301808456 |
| }, |
| { |
| "epoch": 0.17473938018580243, |
| "grad_norm": 117.30754852294922, |
| "loss": 5.3064, |
| "lr": 0.000983916083916084, |
| "step": 616, |
| "tokens_trained": 0.302794456 |
| }, |
| { |
| "epoch": 0.17530671583575633, |
| "grad_norm": 124.30754089355469, |
| "loss": 5.1614, |
| "lr": 0.0009836363636363636, |
| "step": 618, |
| "tokens_trained": 0.303777376 |
| }, |
| { |
| "epoch": 0.17587405148571023, |
| "grad_norm": 102.72042083740234, |
| "loss": 5.1265, |
| "lr": 0.0009833566433566435, |
| "step": 620, |
| "tokens_trained": 0.304758864 |
| }, |
| { |
| "epoch": 0.17644138713566412, |
| "grad_norm": 39.332252502441406, |
| "loss": 5.1078, |
| "lr": 0.000983076923076923, |
| "step": 622, |
| "tokens_trained": 0.30574392 |
| }, |
| { |
| "epoch": 0.17700872278561805, |
| "grad_norm": 153.84811401367188, |
| "loss": 5.7696, |
| "lr": 0.000982797202797203, |
| "step": 624, |
| "tokens_trained": 0.306727584 |
| }, |
| { |
| "epoch": 0.17729239061059499, |
| "eval_loss": 1.3463915586471558, |
| "eval_runtime": 20.8357, |
| "step": 625, |
| "tokens_trained": 0.307220496 |
| }, |
| { |
| "epoch": 0.17757605843557195, |
| "grad_norm": 160.2552490234375, |
| "loss": 5.2283, |
| "lr": 0.0009825174825174826, |
| "step": 626, |
| "tokens_trained": 0.307713024 |
| }, |
| { |
| "epoch": 0.17814339408552585, |
| "grad_norm": 186.77407836914062, |
| "loss": 5.2866, |
| "lr": 0.0009822377622377622, |
| "step": 628, |
| "tokens_trained": 0.308700128 |
| }, |
| { |
| "epoch": 0.17871072973547975, |
| "grad_norm": 84.55519104003906, |
| "loss": 5.1106, |
| "lr": 0.0009819580419580419, |
| "step": 630, |
| "tokens_trained": 0.309681208 |
| }, |
| { |
| "epoch": 0.17927806538543364, |
| "grad_norm": 20.617040634155273, |
| "loss": 4.8327, |
| "lr": 0.0009816783216783217, |
| "step": 632, |
| "tokens_trained": 0.310662224 |
| }, |
| { |
| "epoch": 0.17984540103538757, |
| "grad_norm": 168.06039428710938, |
| "loss": 6.0704, |
| "lr": 0.0009813986013986014, |
| "step": 634, |
| "tokens_trained": 0.31164064 |
| }, |
| { |
| "epoch": 0.18041273668534147, |
| "grad_norm": 238.23736572265625, |
| "loss": 5.6188, |
| "lr": 0.0009811188811188812, |
| "step": 636, |
| "tokens_trained": 0.312622568 |
| }, |
| { |
| "epoch": 0.18098007233529537, |
| "grad_norm": 140.0707550048828, |
| "loss": 6.4034, |
| "lr": 0.0009808391608391608, |
| "step": 638, |
| "tokens_trained": 0.313604944 |
| }, |
| { |
| "epoch": 0.18154740798524927, |
| "grad_norm": 161.19302368164062, |
| "loss": 5.4906, |
| "lr": 0.0009805594405594405, |
| "step": 640, |
| "tokens_trained": 0.314592072 |
| }, |
| { |
| "epoch": 0.18211474363520316, |
| "grad_norm": 121.9577407836914, |
| "loss": 5.2097, |
| "lr": 0.0009802797202797203, |
| "step": 642, |
| "tokens_trained": 0.315574392 |
| }, |
| { |
| "epoch": 0.1826820792851571, |
| "grad_norm": 121.25574493408203, |
| "loss": 5.0317, |
| "lr": 0.00098, |
| "step": 644, |
| "tokens_trained": 0.316559008 |
| }, |
| { |
| "epoch": 0.183249414935111, |
| "grad_norm": 28.328269958496094, |
| "loss": 4.932, |
| "lr": 0.0009797202797202798, |
| "step": 646, |
| "tokens_trained": 0.317538776 |
| }, |
| { |
| "epoch": 0.1838167505850649, |
| "grad_norm": 127.77408599853516, |
| "loss": 5.8335, |
| "lr": 0.0009794405594405595, |
| "step": 648, |
| "tokens_trained": 0.31851792 |
| }, |
| { |
| "epoch": 0.18438408623501878, |
| "grad_norm": 94.9522933959961, |
| "loss": 5.1948, |
| "lr": 0.000979160839160839, |
| "step": 650, |
| "tokens_trained": 0.319501576 |
| }, |
| { |
| "epoch": 0.18495142188497268, |
| "grad_norm": 110.33658599853516, |
| "loss": 5.098, |
| "lr": 0.000978881118881119, |
| "step": 652, |
| "tokens_trained": 0.320482392 |
| }, |
| { |
| "epoch": 0.1855187575349266, |
| "grad_norm": 67.23124694824219, |
| "loss": 4.7723, |
| "lr": 0.0009786013986013986, |
| "step": 654, |
| "tokens_trained": 0.32146712 |
| }, |
| { |
| "epoch": 0.1860860931848805, |
| "grad_norm": 61.519866943359375, |
| "loss": 4.7245, |
| "lr": 0.0009783216783216782, |
| "step": 656, |
| "tokens_trained": 0.322449576 |
| }, |
| { |
| "epoch": 0.1866534288348344, |
| "grad_norm": 99.51078033447266, |
| "loss": 4.783, |
| "lr": 0.000978041958041958, |
| "step": 658, |
| "tokens_trained": 0.323432688 |
| }, |
| { |
| "epoch": 0.1872207644847883, |
| "grad_norm": 44.619197845458984, |
| "loss": 4.7495, |
| "lr": 0.000977762237762238, |
| "step": 660, |
| "tokens_trained": 0.324413952 |
| }, |
| { |
| "epoch": 0.18778810013474223, |
| "grad_norm": 114.5891342163086, |
| "loss": 5.1261, |
| "lr": 0.0009774825174825176, |
| "step": 662, |
| "tokens_trained": 0.325394536 |
| }, |
| { |
| "epoch": 0.18835543578469613, |
| "grad_norm": 100.3728256225586, |
| "loss": 4.7883, |
| "lr": 0.0009772027972027972, |
| "step": 664, |
| "tokens_trained": 0.326374672 |
| }, |
| { |
| "epoch": 0.18892277143465003, |
| "grad_norm": 51.883033752441406, |
| "loss": 4.7249, |
| "lr": 0.0009769230769230768, |
| "step": 666, |
| "tokens_trained": 0.327357152 |
| }, |
| { |
| "epoch": 0.18949010708460393, |
| "grad_norm": 82.27507019042969, |
| "loss": 4.8277, |
| "lr": 0.0009766433566433567, |
| "step": 668, |
| "tokens_trained": 0.328342088 |
| }, |
| { |
| "epoch": 0.19005744273455782, |
| "grad_norm": 83.53064727783203, |
| "loss": 4.8338, |
| "lr": 0.0009763636363636363, |
| "step": 670, |
| "tokens_trained": 0.329319248 |
| }, |
| { |
| "epoch": 0.19062477838451175, |
| "grad_norm": 76.18387603759766, |
| "loss": 4.6958, |
| "lr": 0.0009760839160839161, |
| "step": 672, |
| "tokens_trained": 0.330305968 |
| }, |
| { |
| "epoch": 0.19119211403446565, |
| "grad_norm": 27.401426315307617, |
| "loss": 4.6929, |
| "lr": 0.0009758041958041958, |
| "step": 674, |
| "tokens_trained": 0.3312912 |
| }, |
| { |
| "epoch": 0.19175944968441955, |
| "grad_norm": 186.770263671875, |
| "loss": 5.5089, |
| "lr": 0.0009755244755244756, |
| "step": 676, |
| "tokens_trained": 0.332275224 |
| }, |
| { |
| "epoch": 0.19232678533437345, |
| "grad_norm": 105.02385711669922, |
| "loss": 4.8876, |
| "lr": 0.0009752447552447553, |
| "step": 678, |
| "tokens_trained": 0.33325588 |
| }, |
| { |
| "epoch": 0.19289412098432734, |
| "grad_norm": 94.96269989013672, |
| "loss": 5.1235, |
| "lr": 0.0009749650349650349, |
| "step": 680, |
| "tokens_trained": 0.334238408 |
| }, |
| { |
| "epoch": 0.19346145663428127, |
| "grad_norm": 92.29356384277344, |
| "loss": 4.8194, |
| "lr": 0.0009746853146853148, |
| "step": 682, |
| "tokens_trained": 0.335219368 |
| }, |
| { |
| "epoch": 0.19402879228423517, |
| "grad_norm": 59.1584358215332, |
| "loss": 4.7511, |
| "lr": 0.0009744055944055944, |
| "step": 684, |
| "tokens_trained": 0.336207136 |
| }, |
| { |
| "epoch": 0.19459612793418907, |
| "grad_norm": 54.759002685546875, |
| "loss": 4.777, |
| "lr": 0.0009741258741258742, |
| "step": 686, |
| "tokens_trained": 0.337193536 |
| }, |
| { |
| "epoch": 0.19516346358414297, |
| "grad_norm": 92.20452880859375, |
| "loss": 4.8225, |
| "lr": 0.0009738461538461538, |
| "step": 688, |
| "tokens_trained": 0.338179224 |
| }, |
| { |
| "epoch": 0.19573079923409686, |
| "grad_norm": 75.97005462646484, |
| "loss": 4.655, |
| "lr": 0.0009735664335664336, |
| "step": 690, |
| "tokens_trained": 0.339162168 |
| }, |
| { |
| "epoch": 0.1962981348840508, |
| "grad_norm": 58.19076919555664, |
| "loss": 4.6446, |
| "lr": 0.0009732867132867133, |
| "step": 692, |
| "tokens_trained": 0.340138904 |
| }, |
| { |
| "epoch": 0.1968654705340047, |
| "grad_norm": 50.81512451171875, |
| "loss": 4.5866, |
| "lr": 0.000973006993006993, |
| "step": 694, |
| "tokens_trained": 0.34112288 |
| }, |
| { |
| "epoch": 0.1974328061839586, |
| "grad_norm": 61.683372497558594, |
| "loss": 4.6018, |
| "lr": 0.0009727272727272728, |
| "step": 696, |
| "tokens_trained": 0.342111992 |
| }, |
| { |
| "epoch": 0.19800014183391249, |
| "grad_norm": 61.01798629760742, |
| "loss": 4.6007, |
| "lr": 0.0009724475524475524, |
| "step": 698, |
| "tokens_trained": 0.343095912 |
| }, |
| { |
| "epoch": 0.19856747748386638, |
| "grad_norm": 96.49671936035156, |
| "loss": 4.7035, |
| "lr": 0.0009721678321678323, |
| "step": 700, |
| "tokens_trained": 0.344078632 |
| }, |
| { |
| "epoch": 0.1991348131338203, |
| "grad_norm": 64.7771224975586, |
| "loss": 4.8341, |
| "lr": 0.0009718881118881119, |
| "step": 702, |
| "tokens_trained": 0.345060576 |
| }, |
| { |
| "epoch": 0.1997021487837742, |
| "grad_norm": 90.1478042602539, |
| "loss": 4.7739, |
| "lr": 0.0009716083916083917, |
| "step": 704, |
| "tokens_trained": 0.34604112 |
| }, |
| { |
| "epoch": 0.2002694844337281, |
| "grad_norm": 67.6308822631836, |
| "loss": 4.6218, |
| "lr": 0.0009713286713286713, |
| "step": 706, |
| "tokens_trained": 0.347023496 |
| }, |
| { |
| "epoch": 0.200836820083682, |
| "grad_norm": 40.50175094604492, |
| "loss": 4.6008, |
| "lr": 0.000971048951048951, |
| "step": 708, |
| "tokens_trained": 0.348005416 |
| }, |
| { |
| "epoch": 0.2014041557336359, |
| "grad_norm": 33.6448860168457, |
| "loss": 4.5307, |
| "lr": 0.0009707692307692308, |
| "step": 710, |
| "tokens_trained": 0.3489886 |
| }, |
| { |
| "epoch": 0.20197149138358983, |
| "grad_norm": 15.484851837158203, |
| "loss": 4.5065, |
| "lr": 0.0009704895104895105, |
| "step": 712, |
| "tokens_trained": 0.34997024 |
| }, |
| { |
| "epoch": 0.20253882703354373, |
| "grad_norm": 109.26301574707031, |
| "loss": 4.9613, |
| "lr": 0.0009702097902097903, |
| "step": 714, |
| "tokens_trained": 0.350958496 |
| }, |
| { |
| "epoch": 0.20310616268349763, |
| "grad_norm": 150.07492065429688, |
| "loss": 4.8507, |
| "lr": 0.0009699300699300699, |
| "step": 716, |
| "tokens_trained": 0.35193892 |
| }, |
| { |
| "epoch": 0.20367349833345152, |
| "grad_norm": 113.43978881835938, |
| "loss": 5.4494, |
| "lr": 0.0009696503496503498, |
| "step": 718, |
| "tokens_trained": 0.35291908 |
| }, |
| { |
| "epoch": 0.20424083398340542, |
| "grad_norm": 123.0071792602539, |
| "loss": 4.9475, |
| "lr": 0.0009693706293706294, |
| "step": 720, |
| "tokens_trained": 0.353896072 |
| }, |
| { |
| "epoch": 0.20480816963335935, |
| "grad_norm": 65.55500793457031, |
| "loss": 4.7585, |
| "lr": 0.0009690909090909091, |
| "step": 722, |
| "tokens_trained": 0.354878992 |
| }, |
| { |
| "epoch": 0.20537550528331325, |
| "grad_norm": 36.11159896850586, |
| "loss": 4.6323, |
| "lr": 0.0009688111888111888, |
| "step": 724, |
| "tokens_trained": 0.355863728 |
| }, |
| { |
| "epoch": 0.20594284093326715, |
| "grad_norm": 30.566436767578125, |
| "loss": 4.53, |
| "lr": 0.0009685314685314685, |
| "step": 726, |
| "tokens_trained": 0.356845272 |
| }, |
| { |
| "epoch": 0.20651017658322104, |
| "grad_norm": 59.01853561401367, |
| "loss": 4.5283, |
| "lr": 0.0009682517482517483, |
| "step": 728, |
| "tokens_trained": 0.357826656 |
| }, |
| { |
| "epoch": 0.20707751223317494, |
| "grad_norm": 91.78115844726562, |
| "loss": 4.6149, |
| "lr": 0.000967972027972028, |
| "step": 730, |
| "tokens_trained": 0.358809896 |
| }, |
| { |
| "epoch": 0.20764484788312887, |
| "grad_norm": 67.97398376464844, |
| "loss": 4.617, |
| "lr": 0.0009676923076923078, |
| "step": 732, |
| "tokens_trained": 0.359788736 |
| }, |
| { |
| "epoch": 0.20821218353308277, |
| "grad_norm": 42.82001876831055, |
| "loss": 4.6134, |
| "lr": 0.0009674125874125874, |
| "step": 734, |
| "tokens_trained": 0.360771744 |
| }, |
| { |
| "epoch": 0.20877951918303667, |
| "grad_norm": 63.52122116088867, |
| "loss": 4.6995, |
| "lr": 0.0009671328671328672, |
| "step": 736, |
| "tokens_trained": 0.361757656 |
| }, |
| { |
| "epoch": 0.20934685483299056, |
| "grad_norm": 116.39544677734375, |
| "loss": 4.7153, |
| "lr": 0.0009668531468531469, |
| "step": 738, |
| "tokens_trained": 0.362744008 |
| }, |
| { |
| "epoch": 0.20991419048294446, |
| "grad_norm": 40.74269485473633, |
| "loss": 4.7978, |
| "lr": 0.0009665734265734266, |
| "step": 740, |
| "tokens_trained": 0.36372872 |
| }, |
| { |
| "epoch": 0.2104815261328984, |
| "grad_norm": 114.29917907714844, |
| "loss": 5.1683, |
| "lr": 0.0009662937062937063, |
| "step": 742, |
| "tokens_trained": 0.364710536 |
| }, |
| { |
| "epoch": 0.2110488617828523, |
| "grad_norm": 115.83326721191406, |
| "loss": 4.7642, |
| "lr": 0.000966013986013986, |
| "step": 744, |
| "tokens_trained": 0.3656912 |
| }, |
| { |
| "epoch": 0.21161619743280619, |
| "grad_norm": 21.708093643188477, |
| "loss": 4.8244, |
| "lr": 0.0009657342657342657, |
| "step": 746, |
| "tokens_trained": 0.36667388 |
| }, |
| { |
| "epoch": 0.21218353308276008, |
| "grad_norm": 182.01918029785156, |
| "loss": 5.6045, |
| "lr": 0.0009654545454545455, |
| "step": 748, |
| "tokens_trained": 0.3676634 |
| }, |
| { |
| "epoch": 0.21275086873271398, |
| "grad_norm": 47.119319915771484, |
| "loss": 4.7929, |
| "lr": 0.0009651748251748252, |
| "step": 750, |
| "tokens_trained": 0.368647288 |
| }, |
| { |
| "epoch": 0.21275086873271398, |
| "eval_loss": 1.2186306715011597, |
| "eval_runtime": 20.9362, |
| "step": 750, |
| "tokens_trained": 0.368647288 |
| }, |
| { |
| "epoch": 0.2133182043826679, |
| "grad_norm": 51.43566131591797, |
| "loss": 4.7298, |
| "lr": 0.0009648951048951049, |
| "step": 752, |
| "tokens_trained": 0.36962992 |
| }, |
| { |
| "epoch": 0.2138855400326218, |
| "grad_norm": 79.49323272705078, |
| "loss": 5.0749, |
| "lr": 0.0009646153846153846, |
| "step": 754, |
| "tokens_trained": 0.370616064 |
| }, |
| { |
| "epoch": 0.2144528756825757, |
| "grad_norm": 119.80200958251953, |
| "loss": 4.8198, |
| "lr": 0.0009643356643356644, |
| "step": 756, |
| "tokens_trained": 0.371596208 |
| }, |
| { |
| "epoch": 0.2150202113325296, |
| "grad_norm": 95.88092041015625, |
| "loss": 4.7437, |
| "lr": 0.0009640559440559441, |
| "step": 758, |
| "tokens_trained": 0.372579584 |
| }, |
| { |
| "epoch": 0.2155875469824835, |
| "grad_norm": 79.64202117919922, |
| "loss": 4.9181, |
| "lr": 0.0009637762237762237, |
| "step": 760, |
| "tokens_trained": 0.373563056 |
| }, |
| { |
| "epoch": 0.21615488263243743, |
| "grad_norm": 79.93920135498047, |
| "loss": 4.6393, |
| "lr": 0.0009634965034965035, |
| "step": 762, |
| "tokens_trained": 0.374547648 |
| }, |
| { |
| "epoch": 0.21672221828239133, |
| "grad_norm": 78.67620849609375, |
| "loss": 4.6178, |
| "lr": 0.0009632167832167832, |
| "step": 764, |
| "tokens_trained": 0.375531456 |
| }, |
| { |
| "epoch": 0.21728955393234523, |
| "grad_norm": 56.32818603515625, |
| "loss": 4.6498, |
| "lr": 0.000962937062937063, |
| "step": 766, |
| "tokens_trained": 0.376516896 |
| }, |
| { |
| "epoch": 0.21785688958229912, |
| "grad_norm": 45.35737228393555, |
| "loss": 4.5812, |
| "lr": 0.0009626573426573427, |
| "step": 768, |
| "tokens_trained": 0.377499752 |
| }, |
| { |
| "epoch": 0.21842422523225302, |
| "grad_norm": 58.13076400756836, |
| "loss": 4.5793, |
| "lr": 0.0009623776223776224, |
| "step": 770, |
| "tokens_trained": 0.37848276 |
| }, |
| { |
| "epoch": 0.21899156088220695, |
| "grad_norm": 55.620628356933594, |
| "loss": 4.4865, |
| "lr": 0.0009620979020979021, |
| "step": 772, |
| "tokens_trained": 0.379466296 |
| }, |
| { |
| "epoch": 0.21955889653216085, |
| "grad_norm": 77.26813507080078, |
| "loss": 4.5671, |
| "lr": 0.0009618181818181818, |
| "step": 774, |
| "tokens_trained": 0.380449888 |
| }, |
| { |
| "epoch": 0.22012623218211474, |
| "grad_norm": 45.00653839111328, |
| "loss": 4.5923, |
| "lr": 0.0009615384615384616, |
| "step": 776, |
| "tokens_trained": 0.381430352 |
| }, |
| { |
| "epoch": 0.22069356783206864, |
| "grad_norm": 52.77407455444336, |
| "loss": 4.5094, |
| "lr": 0.0009612587412587412, |
| "step": 778, |
| "tokens_trained": 0.382416152 |
| }, |
| { |
| "epoch": 0.22126090348202254, |
| "grad_norm": 36.721073150634766, |
| "loss": 4.4536, |
| "lr": 0.000960979020979021, |
| "step": 780, |
| "tokens_trained": 0.383396672 |
| }, |
| { |
| "epoch": 0.22182823913197647, |
| "grad_norm": 51.21247100830078, |
| "loss": 4.4599, |
| "lr": 0.0009606993006993007, |
| "step": 782, |
| "tokens_trained": 0.384380584 |
| }, |
| { |
| "epoch": 0.22239557478193037, |
| "grad_norm": 65.23794555664062, |
| "loss": 4.5397, |
| "lr": 0.0009604195804195805, |
| "step": 784, |
| "tokens_trained": 0.385361368 |
| }, |
| { |
| "epoch": 0.22296291043188426, |
| "grad_norm": 23.255144119262695, |
| "loss": 4.5007, |
| "lr": 0.0009601398601398602, |
| "step": 786, |
| "tokens_trained": 0.386341416 |
| }, |
| { |
| "epoch": 0.22353024608183816, |
| "grad_norm": 30.812740325927734, |
| "loss": 4.5239, |
| "lr": 0.0009598601398601398, |
| "step": 788, |
| "tokens_trained": 0.387324624 |
| }, |
| { |
| "epoch": 0.22409758173179206, |
| "grad_norm": 50.781219482421875, |
| "loss": 4.5131, |
| "lr": 0.0009595804195804196, |
| "step": 790, |
| "tokens_trained": 0.388312744 |
| }, |
| { |
| "epoch": 0.224664917381746, |
| "grad_norm": 47.88816452026367, |
| "loss": 4.4622, |
| "lr": 0.0009593006993006993, |
| "step": 792, |
| "tokens_trained": 0.38929852 |
| }, |
| { |
| "epoch": 0.22523225303169989, |
| "grad_norm": 49.32049560546875, |
| "loss": 4.5053, |
| "lr": 0.0009590209790209791, |
| "step": 794, |
| "tokens_trained": 0.390279792 |
| }, |
| { |
| "epoch": 0.22579958868165378, |
| "grad_norm": 36.98805618286133, |
| "loss": 4.5144, |
| "lr": 0.0009587412587412587, |
| "step": 796, |
| "tokens_trained": 0.391258904 |
| }, |
| { |
| "epoch": 0.22636692433160768, |
| "grad_norm": 24.88475799560547, |
| "loss": 4.4992, |
| "lr": 0.0009584615384615385, |
| "step": 798, |
| "tokens_trained": 0.392238976 |
| }, |
| { |
| "epoch": 0.22693425998156158, |
| "grad_norm": 38.89309310913086, |
| "loss": 4.4853, |
| "lr": 0.0009581818181818182, |
| "step": 800, |
| "tokens_trained": 0.393226312 |
| }, |
| { |
| "epoch": 0.2275015956315155, |
| "grad_norm": 34.86774444580078, |
| "loss": 4.4519, |
| "lr": 0.000957902097902098, |
| "step": 802, |
| "tokens_trained": 0.394206688 |
| }, |
| { |
| "epoch": 0.2280689312814694, |
| "grad_norm": 24.966291427612305, |
| "loss": 4.456, |
| "lr": 0.0009576223776223777, |
| "step": 804, |
| "tokens_trained": 0.395191608 |
| }, |
| { |
| "epoch": 0.2286362669314233, |
| "grad_norm": 12.218213081359863, |
| "loss": 4.4266, |
| "lr": 0.0009573426573426573, |
| "step": 806, |
| "tokens_trained": 0.396174512 |
| }, |
| { |
| "epoch": 0.2292036025813772, |
| "grad_norm": 50.817054748535156, |
| "loss": 4.586, |
| "lr": 0.0009570629370629371, |
| "step": 808, |
| "tokens_trained": 0.397156912 |
| }, |
| { |
| "epoch": 0.2297709382313311, |
| "grad_norm": 37.60087203979492, |
| "loss": 4.4616, |
| "lr": 0.0009567832167832168, |
| "step": 810, |
| "tokens_trained": 0.398140016 |
| }, |
| { |
| "epoch": 0.23033827388128503, |
| "grad_norm": 37.55678176879883, |
| "loss": 4.4755, |
| "lr": 0.0009565034965034966, |
| "step": 812, |
| "tokens_trained": 0.39912384 |
| }, |
| { |
| "epoch": 0.23090560953123893, |
| "grad_norm": 56.427215576171875, |
| "loss": 4.5078, |
| "lr": 0.0009562237762237762, |
| "step": 814, |
| "tokens_trained": 0.400111224 |
| }, |
| { |
| "epoch": 0.23147294518119282, |
| "grad_norm": 31.869827270507812, |
| "loss": 4.5013, |
| "lr": 0.0009559440559440559, |
| "step": 816, |
| "tokens_trained": 0.401094936 |
| }, |
| { |
| "epoch": 0.23204028083114672, |
| "grad_norm": 77.57958984375, |
| "loss": 4.6977, |
| "lr": 0.0009556643356643357, |
| "step": 818, |
| "tokens_trained": 0.402078888 |
| }, |
| { |
| "epoch": 0.23260761648110062, |
| "grad_norm": 52.50204849243164, |
| "loss": 4.5142, |
| "lr": 0.0009553846153846154, |
| "step": 820, |
| "tokens_trained": 0.403059904 |
| }, |
| { |
| "epoch": 0.23317495213105455, |
| "grad_norm": 32.34305191040039, |
| "loss": 4.4828, |
| "lr": 0.0009551048951048952, |
| "step": 822, |
| "tokens_trained": 0.404049848 |
| }, |
| { |
| "epoch": 0.23374228778100845, |
| "grad_norm": 52.08961486816406, |
| "loss": 4.4869, |
| "lr": 0.0009548251748251748, |
| "step": 824, |
| "tokens_trained": 0.405033872 |
| }, |
| { |
| "epoch": 0.23430962343096234, |
| "grad_norm": 44.32194900512695, |
| "loss": 4.4802, |
| "lr": 0.0009545454545454546, |
| "step": 826, |
| "tokens_trained": 0.406017872 |
| }, |
| { |
| "epoch": 0.23487695908091624, |
| "grad_norm": 30.941524505615234, |
| "loss": 4.4323, |
| "lr": 0.0009542657342657343, |
| "step": 828, |
| "tokens_trained": 0.40700704 |
| }, |
| { |
| "epoch": 0.23544429473087014, |
| "grad_norm": 20.52709197998047, |
| "loss": 4.4919, |
| "lr": 0.000953986013986014, |
| "step": 830, |
| "tokens_trained": 0.407991512 |
| }, |
| { |
| "epoch": 0.23601163038082407, |
| "grad_norm": 86.80307006835938, |
| "loss": 4.8228, |
| "lr": 0.0009537062937062937, |
| "step": 832, |
| "tokens_trained": 0.408979272 |
| }, |
| { |
| "epoch": 0.23657896603077797, |
| "grad_norm": 73.71435546875, |
| "loss": 4.5954, |
| "lr": 0.0009534265734265734, |
| "step": 834, |
| "tokens_trained": 0.409962984 |
| }, |
| { |
| "epoch": 0.23714630168073186, |
| "grad_norm": 66.3813247680664, |
| "loss": 4.5969, |
| "lr": 0.0009531468531468532, |
| "step": 836, |
| "tokens_trained": 0.410945248 |
| }, |
| { |
| "epoch": 0.23771363733068576, |
| "grad_norm": 86.94453430175781, |
| "loss": 4.5894, |
| "lr": 0.0009528671328671329, |
| "step": 838, |
| "tokens_trained": 0.411930872 |
| }, |
| { |
| "epoch": 0.23828097298063966, |
| "grad_norm": 61.28915786743164, |
| "loss": 4.5613, |
| "lr": 0.0009525874125874127, |
| "step": 840, |
| "tokens_trained": 0.412912608 |
| }, |
| { |
| "epoch": 0.2388483086305936, |
| "grad_norm": 65.02153778076172, |
| "loss": 4.5398, |
| "lr": 0.0009523076923076923, |
| "step": 842, |
| "tokens_trained": 0.413897488 |
| }, |
| { |
| "epoch": 0.23941564428054748, |
| "grad_norm": 54.01200485229492, |
| "loss": 4.4922, |
| "lr": 0.000952027972027972, |
| "step": 844, |
| "tokens_trained": 0.414872888 |
| }, |
| { |
| "epoch": 0.23998297993050138, |
| "grad_norm": 66.7095718383789, |
| "loss": 4.5317, |
| "lr": 0.0009517482517482518, |
| "step": 846, |
| "tokens_trained": 0.415856296 |
| }, |
| { |
| "epoch": 0.24055031558045528, |
| "grad_norm": 64.23979949951172, |
| "loss": 4.4686, |
| "lr": 0.0009514685314685315, |
| "step": 848, |
| "tokens_trained": 0.416843344 |
| }, |
| { |
| "epoch": 0.24111765123040918, |
| "grad_norm": 51.012840270996094, |
| "loss": 4.4544, |
| "lr": 0.0009511888111888112, |
| "step": 850, |
| "tokens_trained": 0.41782032 |
| }, |
| { |
| "epoch": 0.2416849868803631, |
| "grad_norm": 40.83076095581055, |
| "loss": 4.4665, |
| "lr": 0.0009509090909090909, |
| "step": 852, |
| "tokens_trained": 0.418805672 |
| }, |
| { |
| "epoch": 0.242252322530317, |
| "grad_norm": 48.31489944458008, |
| "loss": 4.4748, |
| "lr": 0.0009506293706293707, |
| "step": 854, |
| "tokens_trained": 0.419786344 |
| }, |
| { |
| "epoch": 0.2428196581802709, |
| "grad_norm": 50.08705520629883, |
| "loss": 4.4973, |
| "lr": 0.0009503496503496504, |
| "step": 856, |
| "tokens_trained": 0.420768872 |
| }, |
| { |
| "epoch": 0.2433869938302248, |
| "grad_norm": 26.840139389038086, |
| "loss": 4.461, |
| "lr": 0.0009500699300699301, |
| "step": 858, |
| "tokens_trained": 0.421750296 |
| }, |
| { |
| "epoch": 0.2439543294801787, |
| "grad_norm": 24.721454620361328, |
| "loss": 4.4246, |
| "lr": 0.0009497902097902098, |
| "step": 860, |
| "tokens_trained": 0.422730976 |
| }, |
| { |
| "epoch": 0.24452166513013263, |
| "grad_norm": 63.147926330566406, |
| "loss": 4.623, |
| "lr": 0.0009495104895104895, |
| "step": 862, |
| "tokens_trained": 0.423715768 |
| }, |
| { |
| "epoch": 0.24508900078008652, |
| "grad_norm": 50.99778747558594, |
| "loss": 4.4663, |
| "lr": 0.0009492307692307693, |
| "step": 864, |
| "tokens_trained": 0.424697072 |
| }, |
| { |
| "epoch": 0.24565633643004042, |
| "grad_norm": 38.0300407409668, |
| "loss": 4.4649, |
| "lr": 0.000948951048951049, |
| "step": 866, |
| "tokens_trained": 0.425681392 |
| }, |
| { |
| "epoch": 0.24622367207999432, |
| "grad_norm": 19.017776489257812, |
| "loss": 4.4296, |
| "lr": 0.0009486713286713286, |
| "step": 868, |
| "tokens_trained": 0.426665088 |
| }, |
| { |
| "epoch": 0.24679100772994822, |
| "grad_norm": 24.02813148498535, |
| "loss": 4.4958, |
| "lr": 0.0009483916083916084, |
| "step": 870, |
| "tokens_trained": 0.427646016 |
| }, |
| { |
| "epoch": 0.24735834337990215, |
| "grad_norm": 59.40018081665039, |
| "loss": 4.5919, |
| "lr": 0.0009481118881118881, |
| "step": 872, |
| "tokens_trained": 0.428628048 |
| }, |
| { |
| "epoch": 0.24792567902985604, |
| "grad_norm": 61.13710403442383, |
| "loss": 4.4642, |
| "lr": 0.0009478321678321679, |
| "step": 874, |
| "tokens_trained": 0.4296112 |
| }, |
| { |
| "epoch": 0.24820934685483298, |
| "eval_loss": 1.1135390996932983, |
| "eval_runtime": 20.4738, |
| "step": 875, |
| "tokens_trained": 0.430109024 |
| }, |
| { |
| "epoch": 0.24849301467980994, |
| "grad_norm": 47.920021057128906, |
| "loss": 4.4832, |
| "lr": 0.0009475524475524476, |
| "step": 876, |
| "tokens_trained": 0.430599208 |
| }, |
| { |
| "epoch": 0.24906035032976384, |
| "grad_norm": 25.661701202392578, |
| "loss": 4.4176, |
| "lr": 0.0009472727272727273, |
| "step": 878, |
| "tokens_trained": 0.43158356 |
| }, |
| { |
| "epoch": 0.24962768597971774, |
| "grad_norm": 32.86565399169922, |
| "loss": 4.405, |
| "lr": 0.000946993006993007, |
| "step": 880, |
| "tokens_trained": 0.432570584 |
| }, |
| { |
| "epoch": 0.25019502162967167, |
| "grad_norm": 23.443584442138672, |
| "loss": 4.4218, |
| "lr": 0.0009467132867132868, |
| "step": 882, |
| "tokens_trained": 0.433557672 |
| }, |
| { |
| "epoch": 0.25076235727962554, |
| "grad_norm": 28.315975189208984, |
| "loss": 4.4019, |
| "lr": 0.0009464335664335665, |
| "step": 884, |
| "tokens_trained": 0.434542736 |
| }, |
| { |
| "epoch": 0.25132969292957946, |
| "grad_norm": 31.056642532348633, |
| "loss": 4.4027, |
| "lr": 0.0009461538461538461, |
| "step": 886, |
| "tokens_trained": 0.43553112 |
| }, |
| { |
| "epoch": 0.2518970285795334, |
| "grad_norm": 13.661805152893066, |
| "loss": 4.3745, |
| "lr": 0.0009458741258741259, |
| "step": 888, |
| "tokens_trained": 0.436511584 |
| }, |
| { |
| "epoch": 0.25246436422948726, |
| "grad_norm": 47.04901885986328, |
| "loss": 4.4875, |
| "lr": 0.0009455944055944056, |
| "step": 890, |
| "tokens_trained": 0.43749464 |
| }, |
| { |
| "epoch": 0.2530316998794412, |
| "grad_norm": 84.91446685791016, |
| "loss": 4.5185, |
| "lr": 0.0009453146853146854, |
| "step": 892, |
| "tokens_trained": 0.43847764 |
| }, |
| { |
| "epoch": 0.25359903552939506, |
| "grad_norm": 40.9110107421875, |
| "loss": 4.5735, |
| "lr": 0.000945034965034965, |
| "step": 894, |
| "tokens_trained": 0.439461496 |
| }, |
| { |
| "epoch": 0.254166371179349, |
| "grad_norm": 58.98877716064453, |
| "loss": 4.5146, |
| "lr": 0.0009447552447552447, |
| "step": 896, |
| "tokens_trained": 0.440443656 |
| }, |
| { |
| "epoch": 0.2547337068293029, |
| "grad_norm": 34.037315368652344, |
| "loss": 4.4714, |
| "lr": 0.0009444755244755245, |
| "step": 898, |
| "tokens_trained": 0.441423496 |
| }, |
| { |
| "epoch": 0.2553010424792568, |
| "grad_norm": 24.91920280456543, |
| "loss": 4.4334, |
| "lr": 0.0009441958041958042, |
| "step": 900, |
| "tokens_trained": 0.442407408 |
| }, |
| { |
| "epoch": 0.2558683781292107, |
| "grad_norm": 30.612323760986328, |
| "loss": 4.4459, |
| "lr": 0.000943916083916084, |
| "step": 902, |
| "tokens_trained": 0.443383464 |
| }, |
| { |
| "epoch": 0.2564357137791646, |
| "grad_norm": 50.595577239990234, |
| "loss": 4.4848, |
| "lr": 0.0009436363636363636, |
| "step": 904, |
| "tokens_trained": 0.4443674 |
| }, |
| { |
| "epoch": 0.2570030494291185, |
| "grad_norm": 41.3300895690918, |
| "loss": 4.4445, |
| "lr": 0.0009433566433566434, |
| "step": 906, |
| "tokens_trained": 0.445346072 |
| }, |
| { |
| "epoch": 0.25757038507907243, |
| "grad_norm": 48.33689880371094, |
| "loss": 4.4058, |
| "lr": 0.0009430769230769231, |
| "step": 908, |
| "tokens_trained": 0.446329872 |
| }, |
| { |
| "epoch": 0.2581377207290263, |
| "grad_norm": 39.081382751464844, |
| "loss": 4.4321, |
| "lr": 0.0009427972027972029, |
| "step": 910, |
| "tokens_trained": 0.447309544 |
| }, |
| { |
| "epoch": 0.2587050563789802, |
| "grad_norm": 62.18062210083008, |
| "loss": 4.4672, |
| "lr": 0.0009425174825174825, |
| "step": 912, |
| "tokens_trained": 0.448295056 |
| }, |
| { |
| "epoch": 0.2592723920289341, |
| "grad_norm": 28.725404739379883, |
| "loss": 4.4786, |
| "lr": 0.0009422377622377622, |
| "step": 914, |
| "tokens_trained": 0.449274208 |
| }, |
| { |
| "epoch": 0.259839727678888, |
| "grad_norm": 47.55582809448242, |
| "loss": 4.4227, |
| "lr": 0.000941958041958042, |
| "step": 916, |
| "tokens_trained": 0.450256408 |
| }, |
| { |
| "epoch": 0.26040706332884195, |
| "grad_norm": 35.743125915527344, |
| "loss": 4.379, |
| "lr": 0.0009416783216783217, |
| "step": 918, |
| "tokens_trained": 0.45123684 |
| }, |
| { |
| "epoch": 0.2609743989787958, |
| "grad_norm": 31.489402770996094, |
| "loss": 4.3888, |
| "lr": 0.0009413986013986015, |
| "step": 920, |
| "tokens_trained": 0.45221748 |
| }, |
| { |
| "epoch": 0.26154173462874974, |
| "grad_norm": 36.46233367919922, |
| "loss": 4.3982, |
| "lr": 0.0009411188811188811, |
| "step": 922, |
| "tokens_trained": 0.453202064 |
| }, |
| { |
| "epoch": 0.2621090702787036, |
| "grad_norm": 41.6457633972168, |
| "loss": 4.385, |
| "lr": 0.0009408391608391608, |
| "step": 924, |
| "tokens_trained": 0.454183456 |
| }, |
| { |
| "epoch": 0.26267640592865754, |
| "grad_norm": 26.52242088317871, |
| "loss": 4.4091, |
| "lr": 0.0009405594405594406, |
| "step": 926, |
| "tokens_trained": 0.455165496 |
| }, |
| { |
| "epoch": 0.26324374157861147, |
| "grad_norm": 14.401509284973145, |
| "loss": 4.3549, |
| "lr": 0.0009402797202797203, |
| "step": 928, |
| "tokens_trained": 0.456150248 |
| }, |
| { |
| "epoch": 0.26381107722856534, |
| "grad_norm": 30.626131057739258, |
| "loss": 4.3325, |
| "lr": 0.00094, |
| "step": 930, |
| "tokens_trained": 0.457134184 |
| }, |
| { |
| "epoch": 0.26437841287851926, |
| "grad_norm": 63.74067687988281, |
| "loss": 4.442, |
| "lr": 0.0009397202797202797, |
| "step": 932, |
| "tokens_trained": 0.458118808 |
| }, |
| { |
| "epoch": 0.26494574852847314, |
| "grad_norm": 12.15156364440918, |
| "loss": 4.4658, |
| "lr": 0.0009394405594405595, |
| "step": 934, |
| "tokens_trained": 0.459103872 |
| }, |
| { |
| "epoch": 0.26551308417842706, |
| "grad_norm": 76.2789306640625, |
| "loss": 4.8153, |
| "lr": 0.0009391608391608392, |
| "step": 936, |
| "tokens_trained": 0.460087216 |
| }, |
| { |
| "epoch": 0.266080419828381, |
| "grad_norm": 63.919334411621094, |
| "loss": 4.5707, |
| "lr": 0.000938881118881119, |
| "step": 938, |
| "tokens_trained": 0.461070568 |
| }, |
| { |
| "epoch": 0.26664775547833486, |
| "grad_norm": 75.1481704711914, |
| "loss": 4.5931, |
| "lr": 0.0009386013986013986, |
| "step": 940, |
| "tokens_trained": 0.462055184 |
| }, |
| { |
| "epoch": 0.2672150911282888, |
| "grad_norm": 33.118961334228516, |
| "loss": 4.4723, |
| "lr": 0.0009383216783216783, |
| "step": 942, |
| "tokens_trained": 0.463034592 |
| }, |
| { |
| "epoch": 0.26778242677824265, |
| "grad_norm": 30.8759765625, |
| "loss": 4.4275, |
| "lr": 0.0009380419580419581, |
| "step": 944, |
| "tokens_trained": 0.464016816 |
| }, |
| { |
| "epoch": 0.2683497624281966, |
| "grad_norm": 41.05061340332031, |
| "loss": 4.4566, |
| "lr": 0.0009377622377622378, |
| "step": 946, |
| "tokens_trained": 0.465000872 |
| }, |
| { |
| "epoch": 0.2689170980781505, |
| "grad_norm": 30.93424415588379, |
| "loss": 4.3985, |
| "lr": 0.0009374825174825175, |
| "step": 948, |
| "tokens_trained": 0.465984096 |
| }, |
| { |
| "epoch": 0.2694844337281044, |
| "grad_norm": 29.477052688598633, |
| "loss": 4.3718, |
| "lr": 0.0009372027972027972, |
| "step": 950, |
| "tokens_trained": 0.466961752 |
| }, |
| { |
| "epoch": 0.2700517693780583, |
| "grad_norm": 21.568912506103516, |
| "loss": 4.3697, |
| "lr": 0.0009369230769230769, |
| "step": 952, |
| "tokens_trained": 0.467950088 |
| }, |
| { |
| "epoch": 0.2706191050280122, |
| "grad_norm": 41.66835021972656, |
| "loss": 4.4241, |
| "lr": 0.0009366433566433567, |
| "step": 954, |
| "tokens_trained": 0.468928736 |
| }, |
| { |
| "epoch": 0.2711864406779661, |
| "grad_norm": 68.04551696777344, |
| "loss": 4.3978, |
| "lr": 0.0009363636363636364, |
| "step": 956, |
| "tokens_trained": 0.469907496 |
| }, |
| { |
| "epoch": 0.27175377632792, |
| "grad_norm": 37.655181884765625, |
| "loss": 4.4497, |
| "lr": 0.0009360839160839161, |
| "step": 958, |
| "tokens_trained": 0.470889168 |
| }, |
| { |
| "epoch": 0.2723211119778739, |
| "grad_norm": 22.074953079223633, |
| "loss": 4.3918, |
| "lr": 0.0009358041958041958, |
| "step": 960, |
| "tokens_trained": 0.471871816 |
| }, |
| { |
| "epoch": 0.2728884476278278, |
| "grad_norm": 49.925777435302734, |
| "loss": 4.4745, |
| "lr": 0.0009355244755244755, |
| "step": 962, |
| "tokens_trained": 0.472856728 |
| }, |
| { |
| "epoch": 0.2734557832777817, |
| "grad_norm": 46.520851135253906, |
| "loss": 4.403, |
| "lr": 0.0009352447552447553, |
| "step": 964, |
| "tokens_trained": 0.473838544 |
| }, |
| { |
| "epoch": 0.2740231189277356, |
| "grad_norm": 25.053146362304688, |
| "loss": 4.4247, |
| "lr": 0.0009349650349650349, |
| "step": 966, |
| "tokens_trained": 0.474819976 |
| }, |
| { |
| "epoch": 0.27459045457768955, |
| "grad_norm": 30.127140045166016, |
| "loss": 4.3834, |
| "lr": 0.0009346853146853147, |
| "step": 968, |
| "tokens_trained": 0.475800696 |
| }, |
| { |
| "epoch": 0.2751577902276434, |
| "grad_norm": 41.478328704833984, |
| "loss": 4.3978, |
| "lr": 0.0009344055944055944, |
| "step": 970, |
| "tokens_trained": 0.4767834 |
| }, |
| { |
| "epoch": 0.27572512587759734, |
| "grad_norm": 23.739456176757812, |
| "loss": 4.3698, |
| "lr": 0.0009341258741258742, |
| "step": 972, |
| "tokens_trained": 0.47776944 |
| }, |
| { |
| "epoch": 0.2762924615275512, |
| "grad_norm": 21.813220977783203, |
| "loss": 4.3902, |
| "lr": 0.0009338461538461539, |
| "step": 974, |
| "tokens_trained": 0.478757048 |
| }, |
| { |
| "epoch": 0.27685979717750514, |
| "grad_norm": 64.79598999023438, |
| "loss": 4.5237, |
| "lr": 0.0009335664335664336, |
| "step": 976, |
| "tokens_trained": 0.47973872 |
| }, |
| { |
| "epoch": 0.27742713282745907, |
| "grad_norm": 68.32705688476562, |
| "loss": 4.4461, |
| "lr": 0.0009332867132867133, |
| "step": 978, |
| "tokens_trained": 0.480721912 |
| }, |
| { |
| "epoch": 0.27799446847741294, |
| "grad_norm": 41.857582092285156, |
| "loss": 4.4663, |
| "lr": 0.0009330069930069929, |
| "step": 980, |
| "tokens_trained": 0.481704248 |
| }, |
| { |
| "epoch": 0.27856180412736686, |
| "grad_norm": 28.30609893798828, |
| "loss": 4.3461, |
| "lr": 0.0009327272727272728, |
| "step": 982, |
| "tokens_trained": 0.482689768 |
| }, |
| { |
| "epoch": 0.27912913977732073, |
| "grad_norm": 33.207950592041016, |
| "loss": 4.4185, |
| "lr": 0.0009324475524475524, |
| "step": 984, |
| "tokens_trained": 0.483670008 |
| }, |
| { |
| "epoch": 0.27969647542727466, |
| "grad_norm": 29.541227340698242, |
| "loss": 4.388, |
| "lr": 0.0009321678321678322, |
| "step": 986, |
| "tokens_trained": 0.48465836 |
| }, |
| { |
| "epoch": 0.2802638110772286, |
| "grad_norm": 16.23346710205078, |
| "loss": 4.3219, |
| "lr": 0.0009318881118881119, |
| "step": 988, |
| "tokens_trained": 0.4856402 |
| }, |
| { |
| "epoch": 0.28083114672718246, |
| "grad_norm": 20.036178588867188, |
| "loss": 4.3273, |
| "lr": 0.0009316083916083917, |
| "step": 990, |
| "tokens_trained": 0.486621648 |
| }, |
| { |
| "epoch": 0.2813984823771364, |
| "grad_norm": 49.25468063354492, |
| "loss": 4.4649, |
| "lr": 0.0009313286713286714, |
| "step": 992, |
| "tokens_trained": 0.48760744 |
| }, |
| { |
| "epoch": 0.28196581802709025, |
| "grad_norm": 48.59744644165039, |
| "loss": 4.3979, |
| "lr": 0.000931048951048951, |
| "step": 994, |
| "tokens_trained": 0.488590472 |
| }, |
| { |
| "epoch": 0.2825331536770442, |
| "grad_norm": 16.33649253845215, |
| "loss": 4.3945, |
| "lr": 0.0009307692307692308, |
| "step": 996, |
| "tokens_trained": 0.489570976 |
| }, |
| { |
| "epoch": 0.2831004893269981, |
| "grad_norm": 60.632591247558594, |
| "loss": 4.5581, |
| "lr": 0.0009304895104895104, |
| "step": 998, |
| "tokens_trained": 0.490552296 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "grad_norm": 52.75735092163086, |
| "loss": 4.424, |
| "lr": 0.0009302097902097903, |
| "step": 1000, |
| "tokens_trained": 0.49153744 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "eval_loss": 1.1363450288772583, |
| "eval_runtime": 20.7491, |
| "step": 1000, |
| "tokens_trained": 0.49153744 |
| }, |
| { |
| "epoch": 0.2842351606269059, |
| "grad_norm": 20.506614685058594, |
| "loss": 4.4241, |
| "lr": 0.0009299300699300699, |
| "step": 1002, |
| "tokens_trained": 0.492522608 |
| }, |
| { |
| "epoch": 0.2848024962768598, |
| "grad_norm": 23.148601531982422, |
| "loss": 4.3975, |
| "lr": 0.0009296503496503497, |
| "step": 1004, |
| "tokens_trained": 0.493501384 |
| }, |
| { |
| "epoch": 0.2853698319268137, |
| "grad_norm": 9.550869941711426, |
| "loss": 4.3952, |
| "lr": 0.0009293706293706294, |
| "step": 1006, |
| "tokens_trained": 0.494482544 |
| }, |
| { |
| "epoch": 0.2859371675767676, |
| "grad_norm": 80.31155395507812, |
| "loss": 4.7614, |
| "lr": 0.0009290909090909091, |
| "step": 1008, |
| "tokens_trained": 0.495459416 |
| }, |
| { |
| "epoch": 0.2865045032267215, |
| "grad_norm": 61.021026611328125, |
| "loss": 4.4396, |
| "lr": 0.0009288111888111889, |
| "step": 1010, |
| "tokens_trained": 0.4964418 |
| }, |
| { |
| "epoch": 0.2870718388766754, |
| "grad_norm": 35.23258972167969, |
| "loss": 4.5548, |
| "lr": 0.0009285314685314685, |
| "step": 1012, |
| "tokens_trained": 0.497428288 |
| }, |
| { |
| "epoch": 0.2876391745266293, |
| "grad_norm": 36.45478057861328, |
| "loss": 4.46, |
| "lr": 0.0009282517482517483, |
| "step": 1014, |
| "tokens_trained": 0.498416832 |
| }, |
| { |
| "epoch": 0.2882065101765832, |
| "grad_norm": 46.622982025146484, |
| "loss": 4.3554, |
| "lr": 0.0009279720279720279, |
| "step": 1016, |
| "tokens_trained": 0.499399792 |
| }, |
| { |
| "epoch": 0.28877384582653715, |
| "grad_norm": 87.00289154052734, |
| "loss": 4.5276, |
| "lr": 0.0009276923076923078, |
| "step": 1018, |
| "tokens_trained": 0.500383776 |
| }, |
| { |
| "epoch": 0.289341181476491, |
| "grad_norm": 11.444964408874512, |
| "loss": 4.5483, |
| "lr": 0.0009274125874125874, |
| "step": 1020, |
| "tokens_trained": 0.50136468 |
| }, |
| { |
| "epoch": 0.28990851712644494, |
| "grad_norm": 89.05914306640625, |
| "loss": 4.8957, |
| "lr": 0.0009271328671328671, |
| "step": 1022, |
| "tokens_trained": 0.50235172 |
| }, |
| { |
| "epoch": 0.2904758527763988, |
| "grad_norm": 26.915477752685547, |
| "loss": 4.6184, |
| "lr": 0.0009268531468531469, |
| "step": 1024, |
| "tokens_trained": 0.50333208 |
| }, |
| { |
| "epoch": 0.29104318842635274, |
| "grad_norm": 44.32100296020508, |
| "loss": 4.5263, |
| "lr": 0.0009265734265734266, |
| "step": 1026, |
| "tokens_trained": 0.504314656 |
| }, |
| { |
| "epoch": 0.29161052407630667, |
| "grad_norm": 26.699670791625977, |
| "loss": 4.3871, |
| "lr": 0.0009262937062937064, |
| "step": 1028, |
| "tokens_trained": 0.505296568 |
| }, |
| { |
| "epoch": 0.29217785972626054, |
| "grad_norm": 27.469482421875, |
| "loss": 4.3558, |
| "lr": 0.000926013986013986, |
| "step": 1030, |
| "tokens_trained": 0.506280416 |
| }, |
| { |
| "epoch": 0.29274519537621446, |
| "grad_norm": 26.149612426757812, |
| "loss": 4.3368, |
| "lr": 0.0009257342657342658, |
| "step": 1032, |
| "tokens_trained": 0.507261224 |
| }, |
| { |
| "epoch": 0.29331253102616833, |
| "grad_norm": 8.754459381103516, |
| "loss": 4.3447, |
| "lr": 0.0009254545454545454, |
| "step": 1034, |
| "tokens_trained": 0.508243288 |
| }, |
| { |
| "epoch": 0.29387986667612226, |
| "grad_norm": 32.17164611816406, |
| "loss": 4.4174, |
| "lr": 0.0009251748251748252, |
| "step": 1036, |
| "tokens_trained": 0.509224176 |
| }, |
| { |
| "epoch": 0.2944472023260762, |
| "grad_norm": 41.17238235473633, |
| "loss": 4.4221, |
| "lr": 0.0009248951048951049, |
| "step": 1038, |
| "tokens_trained": 0.510203568 |
| }, |
| { |
| "epoch": 0.29501453797603006, |
| "grad_norm": 44.97213363647461, |
| "loss": 4.3594, |
| "lr": 0.0009246153846153846, |
| "step": 1040, |
| "tokens_trained": 0.511186464 |
| }, |
| { |
| "epoch": 0.295581873625984, |
| "grad_norm": 42.23421859741211, |
| "loss": 4.4159, |
| "lr": 0.0009243356643356644, |
| "step": 1042, |
| "tokens_trained": 0.51216944 |
| }, |
| { |
| "epoch": 0.29614920927593785, |
| "grad_norm": 36.13594436645508, |
| "loss": 4.4105, |
| "lr": 0.0009240559440559441, |
| "step": 1044, |
| "tokens_trained": 0.513153144 |
| }, |
| { |
| "epoch": 0.2967165449258918, |
| "grad_norm": 36.89309310913086, |
| "loss": 4.3947, |
| "lr": 0.0009237762237762239, |
| "step": 1046, |
| "tokens_trained": 0.51413388 |
| }, |
| { |
| "epoch": 0.2972838805758457, |
| "grad_norm": 58.599700927734375, |
| "loss": 4.3988, |
| "lr": 0.0009234965034965035, |
| "step": 1048, |
| "tokens_trained": 0.515119288 |
| }, |
| { |
| "epoch": 0.2978512162257996, |
| "grad_norm": 13.725994110107422, |
| "loss": 4.412, |
| "lr": 0.0009232167832167832, |
| "step": 1050, |
| "tokens_trained": 0.51610284 |
| }, |
| { |
| "epoch": 0.2984185518757535, |
| "grad_norm": 105.28518676757812, |
| "loss": 4.7305, |
| "lr": 0.0009229370629370629, |
| "step": 1052, |
| "tokens_trained": 0.517085576 |
| }, |
| { |
| "epoch": 0.2989858875257074, |
| "grad_norm": 29.499713897705078, |
| "loss": 4.5106, |
| "lr": 0.0009226573426573427, |
| "step": 1054, |
| "tokens_trained": 0.518064224 |
| }, |
| { |
| "epoch": 0.2995532231756613, |
| "grad_norm": 60.907203674316406, |
| "loss": 4.5249, |
| "lr": 0.0009223776223776224, |
| "step": 1056, |
| "tokens_trained": 0.51905084 |
| }, |
| { |
| "epoch": 0.3001205588256152, |
| "grad_norm": 39.825069427490234, |
| "loss": 4.3695, |
| "lr": 0.0009220979020979021, |
| "step": 1058, |
| "tokens_trained": 0.5200318 |
| }, |
| { |
| "epoch": 0.3006878944755691, |
| "grad_norm": 42.77061462402344, |
| "loss": 4.4094, |
| "lr": 0.0009218181818181819, |
| "step": 1060, |
| "tokens_trained": 0.521013568 |
| }, |
| { |
| "epoch": 0.301255230125523, |
| "grad_norm": 37.05888748168945, |
| "loss": 4.3684, |
| "lr": 0.0009215384615384616, |
| "step": 1062, |
| "tokens_trained": 0.521997624 |
| }, |
| { |
| "epoch": 0.3018225657754769, |
| "grad_norm": 42.28252029418945, |
| "loss": 4.3489, |
| "lr": 0.0009212587412587413, |
| "step": 1064, |
| "tokens_trained": 0.522986184 |
| }, |
| { |
| "epoch": 0.3023899014254308, |
| "grad_norm": 40.95197677612305, |
| "loss": 4.3564, |
| "lr": 0.000920979020979021, |
| "step": 1066, |
| "tokens_trained": 0.523970984 |
| }, |
| { |
| "epoch": 0.30295723707538474, |
| "grad_norm": 25.469568252563477, |
| "loss": 4.3833, |
| "lr": 0.0009206993006993007, |
| "step": 1068, |
| "tokens_trained": 0.524952808 |
| }, |
| { |
| "epoch": 0.3035245727253386, |
| "grad_norm": 29.921735763549805, |
| "loss": 4.3579, |
| "lr": 0.0009204195804195804, |
| "step": 1070, |
| "tokens_trained": 0.525935696 |
| }, |
| { |
| "epoch": 0.30409190837529254, |
| "grad_norm": 26.038026809692383, |
| "loss": 4.2898, |
| "lr": 0.0009201398601398602, |
| "step": 1072, |
| "tokens_trained": 0.526916904 |
| }, |
| { |
| "epoch": 0.3046592440252464, |
| "grad_norm": 32.59503936767578, |
| "loss": 4.3335, |
| "lr": 0.0009198601398601398, |
| "step": 1074, |
| "tokens_trained": 0.527899864 |
| }, |
| { |
| "epoch": 0.30522657967520034, |
| "grad_norm": 14.04964828491211, |
| "loss": 4.3171, |
| "lr": 0.0009195804195804196, |
| "step": 1076, |
| "tokens_trained": 0.528878176 |
| }, |
| { |
| "epoch": 0.30579391532515426, |
| "grad_norm": 15.936906814575195, |
| "loss": 4.3005, |
| "lr": 0.0009193006993006993, |
| "step": 1078, |
| "tokens_trained": 0.529859952 |
| }, |
| { |
| "epoch": 0.30636125097510813, |
| "grad_norm": 9.73235034942627, |
| "loss": 4.3287, |
| "lr": 0.0009190209790209791, |
| "step": 1080, |
| "tokens_trained": 0.530838192 |
| }, |
| { |
| "epoch": 0.30692858662506206, |
| "grad_norm": 45.44027328491211, |
| "loss": 4.4384, |
| "lr": 0.0009187412587412588, |
| "step": 1082, |
| "tokens_trained": 0.531818376 |
| }, |
| { |
| "epoch": 0.30749592227501593, |
| "grad_norm": 55.65925598144531, |
| "loss": 4.3772, |
| "lr": 0.0009184615384615385, |
| "step": 1084, |
| "tokens_trained": 0.532802048 |
| }, |
| { |
| "epoch": 0.30806325792496986, |
| "grad_norm": 33.47093200683594, |
| "loss": 4.4257, |
| "lr": 0.0009181818181818182, |
| "step": 1086, |
| "tokens_trained": 0.533785376 |
| }, |
| { |
| "epoch": 0.3086305935749238, |
| "grad_norm": 39.709224700927734, |
| "loss": 4.4177, |
| "lr": 0.0009179020979020978, |
| "step": 1088, |
| "tokens_trained": 0.5347698 |
| }, |
| { |
| "epoch": 0.30919792922487765, |
| "grad_norm": 34.25212097167969, |
| "loss": 4.3518, |
| "lr": 0.0009176223776223777, |
| "step": 1090, |
| "tokens_trained": 0.53575108 |
| }, |
| { |
| "epoch": 0.3097652648748316, |
| "grad_norm": 29.156312942504883, |
| "loss": 4.3596, |
| "lr": 0.0009173426573426573, |
| "step": 1092, |
| "tokens_trained": 0.536735544 |
| }, |
| { |
| "epoch": 0.31033260052478545, |
| "grad_norm": 31.714128494262695, |
| "loss": 4.3736, |
| "lr": 0.0009170629370629371, |
| "step": 1094, |
| "tokens_trained": 0.537718008 |
| }, |
| { |
| "epoch": 0.3108999361747394, |
| "grad_norm": 12.244729042053223, |
| "loss": 4.3472, |
| "lr": 0.0009167832167832168, |
| "step": 1096, |
| "tokens_trained": 0.538693512 |
| }, |
| { |
| "epoch": 0.3114672718246933, |
| "grad_norm": 10.271063804626465, |
| "loss": 4.301, |
| "lr": 0.0009165034965034966, |
| "step": 1098, |
| "tokens_trained": 0.539681376 |
| }, |
| { |
| "epoch": 0.3120346074746472, |
| "grad_norm": 35.79754638671875, |
| "loss": 4.3912, |
| "lr": 0.0009162237762237763, |
| "step": 1100, |
| "tokens_trained": 0.540661392 |
| }, |
| { |
| "epoch": 0.3126019431246011, |
| "grad_norm": 24.1260986328125, |
| "loss": 4.3303, |
| "lr": 0.0009159440559440559, |
| "step": 1102, |
| "tokens_trained": 0.541646968 |
| }, |
| { |
| "epoch": 0.31316927877455497, |
| "grad_norm": 24.501169204711914, |
| "loss": 4.3205, |
| "lr": 0.0009156643356643357, |
| "step": 1104, |
| "tokens_trained": 0.542629392 |
| }, |
| { |
| "epoch": 0.3137366144245089, |
| "grad_norm": 17.031600952148438, |
| "loss": 4.2521, |
| "lr": 0.0009153846153846153, |
| "step": 1106, |
| "tokens_trained": 0.54361348 |
| }, |
| { |
| "epoch": 0.3143039500744628, |
| "grad_norm": 19.506216049194336, |
| "loss": 4.3225, |
| "lr": 0.0009151048951048952, |
| "step": 1108, |
| "tokens_trained": 0.544595336 |
| }, |
| { |
| "epoch": 0.3148712857244167, |
| "grad_norm": 20.822546005249023, |
| "loss": 4.2711, |
| "lr": 0.0009148251748251748, |
| "step": 1110, |
| "tokens_trained": 0.545578256 |
| }, |
| { |
| "epoch": 0.3154386213743706, |
| "grad_norm": 29.967998504638672, |
| "loss": 4.2868, |
| "lr": 0.0009145454545454546, |
| "step": 1112, |
| "tokens_trained": 0.546561024 |
| }, |
| { |
| "epoch": 0.3160059570243245, |
| "grad_norm": 24.06121063232422, |
| "loss": 4.2701, |
| "lr": 0.0009142657342657343, |
| "step": 1114, |
| "tokens_trained": 0.547544616 |
| }, |
| { |
| "epoch": 0.3165732926742784, |
| "grad_norm": 15.868765830993652, |
| "loss": 4.3233, |
| "lr": 0.000913986013986014, |
| "step": 1116, |
| "tokens_trained": 0.548526216 |
| }, |
| { |
| "epoch": 0.31714062832423234, |
| "grad_norm": 27.47897720336914, |
| "loss": 4.2813, |
| "lr": 0.0009137062937062938, |
| "step": 1118, |
| "tokens_trained": 0.549506544 |
| }, |
| { |
| "epoch": 0.3177079639741862, |
| "grad_norm": 15.343204498291016, |
| "loss": 4.3002, |
| "lr": 0.0009134265734265734, |
| "step": 1120, |
| "tokens_trained": 0.550488496 |
| }, |
| { |
| "epoch": 0.31827529962414014, |
| "grad_norm": 4.320124626159668, |
| "loss": 4.2622, |
| "lr": 0.0009131468531468532, |
| "step": 1122, |
| "tokens_trained": 0.551471792 |
| }, |
| { |
| "epoch": 0.318842635274094, |
| "grad_norm": 34.520050048828125, |
| "loss": 4.366, |
| "lr": 0.0009128671328671328, |
| "step": 1124, |
| "tokens_trained": 0.552457008 |
| }, |
| { |
| "epoch": 0.319126303099071, |
| "eval_loss": 1.096465826034546, |
| "eval_runtime": 20.7643, |
| "step": 1125, |
| "tokens_trained": 0.552948064 |
| }, |
| { |
| "epoch": 0.31940997092404794, |
| "grad_norm": 39.718719482421875, |
| "loss": 4.3317, |
| "lr": 0.0009125874125874127, |
| "step": 1126, |
| "tokens_trained": 0.5534394 |
| }, |
| { |
| "epoch": 0.31997730657400186, |
| "grad_norm": 20.843252182006836, |
| "loss": 4.3883, |
| "lr": 0.0009123076923076923, |
| "step": 1128, |
| "tokens_trained": 0.554419184 |
| }, |
| { |
| "epoch": 0.32054464222395573, |
| "grad_norm": 12.916360855102539, |
| "loss": 4.3119, |
| "lr": 0.000912027972027972, |
| "step": 1130, |
| "tokens_trained": 0.555401952 |
| }, |
| { |
| "epoch": 0.32111197787390966, |
| "grad_norm": 48.54426956176758, |
| "loss": 4.4155, |
| "lr": 0.0009117482517482518, |
| "step": 1132, |
| "tokens_trained": 0.556385024 |
| }, |
| { |
| "epoch": 0.32167931352386353, |
| "grad_norm": 41.00883483886719, |
| "loss": 4.362, |
| "lr": 0.0009114685314685315, |
| "step": 1134, |
| "tokens_trained": 0.557368472 |
| }, |
| { |
| "epoch": 0.32224664917381746, |
| "grad_norm": 28.0487060546875, |
| "loss": 4.3504, |
| "lr": 0.0009111888111888113, |
| "step": 1136, |
| "tokens_trained": 0.55835288 |
| }, |
| { |
| "epoch": 0.3228139848237714, |
| "grad_norm": 22.05229377746582, |
| "loss": 4.331, |
| "lr": 0.0009109090909090909, |
| "step": 1138, |
| "tokens_trained": 0.559337064 |
| }, |
| { |
| "epoch": 0.32338132047372525, |
| "grad_norm": 16.770631790161133, |
| "loss": 4.3008, |
| "lr": 0.0009106293706293707, |
| "step": 1140, |
| "tokens_trained": 0.560317984 |
| }, |
| { |
| "epoch": 0.3239486561236792, |
| "grad_norm": 35.300262451171875, |
| "loss": 4.4083, |
| "lr": 0.0009103496503496503, |
| "step": 1142, |
| "tokens_trained": 0.561299688 |
| }, |
| { |
| "epoch": 0.32451599177363305, |
| "grad_norm": 23.788284301757812, |
| "loss": 4.2772, |
| "lr": 0.0009100699300699301, |
| "step": 1144, |
| "tokens_trained": 0.562285664 |
| }, |
| { |
| "epoch": 0.325083327423587, |
| "grad_norm": 23.085710525512695, |
| "loss": 4.3185, |
| "lr": 0.0009097902097902098, |
| "step": 1146, |
| "tokens_trained": 0.563267832 |
| }, |
| { |
| "epoch": 0.3256506630735409, |
| "grad_norm": 13.11314582824707, |
| "loss": 4.2711, |
| "lr": 0.0009095104895104895, |
| "step": 1148, |
| "tokens_trained": 0.564248928 |
| }, |
| { |
| "epoch": 0.3262179987234948, |
| "grad_norm": 31.297805786132812, |
| "loss": 4.3096, |
| "lr": 0.0009092307692307692, |
| "step": 1150, |
| "tokens_trained": 0.56522952 |
| }, |
| { |
| "epoch": 0.3267853343734487, |
| "grad_norm": 11.668539047241211, |
| "loss": 4.2667, |
| "lr": 0.000908951048951049, |
| "step": 1152, |
| "tokens_trained": 0.566212392 |
| }, |
| { |
| "epoch": 0.32735267002340257, |
| "grad_norm": 23.359189987182617, |
| "loss": 4.3156, |
| "lr": 0.0009086713286713288, |
| "step": 1154, |
| "tokens_trained": 0.567192216 |
| }, |
| { |
| "epoch": 0.3279200056733565, |
| "grad_norm": 31.09916114807129, |
| "loss": 4.3367, |
| "lr": 0.0009083916083916084, |
| "step": 1156, |
| "tokens_trained": 0.568177088 |
| }, |
| { |
| "epoch": 0.3284873413233104, |
| "grad_norm": 24.03261947631836, |
| "loss": 4.3504, |
| "lr": 0.0009081118881118881, |
| "step": 1158, |
| "tokens_trained": 0.56915868 |
| }, |
| { |
| "epoch": 0.3290546769732643, |
| "grad_norm": 16.029443740844727, |
| "loss": 4.3192, |
| "lr": 0.0009078321678321678, |
| "step": 1160, |
| "tokens_trained": 0.570142976 |
| }, |
| { |
| "epoch": 0.3296220126232182, |
| "grad_norm": 53.486724853515625, |
| "loss": 4.3921, |
| "lr": 0.0009075524475524476, |
| "step": 1162, |
| "tokens_trained": 0.57112748 |
| }, |
| { |
| "epoch": 0.3301893482731721, |
| "grad_norm": 37.42267608642578, |
| "loss": 4.2821, |
| "lr": 0.0009072727272727273, |
| "step": 1164, |
| "tokens_trained": 0.57211356 |
| }, |
| { |
| "epoch": 0.330756683923126, |
| "grad_norm": 28.862472534179688, |
| "loss": 4.3002, |
| "lr": 0.000906993006993007, |
| "step": 1166, |
| "tokens_trained": 0.57309492 |
| }, |
| { |
| "epoch": 0.33132401957307994, |
| "grad_norm": 22.26299476623535, |
| "loss": 4.2729, |
| "lr": 0.0009067132867132866, |
| "step": 1168, |
| "tokens_trained": 0.5740806 |
| }, |
| { |
| "epoch": 0.3318913552230338, |
| "grad_norm": 21.635013580322266, |
| "loss": 4.2866, |
| "lr": 0.0009064335664335665, |
| "step": 1170, |
| "tokens_trained": 0.575061664 |
| }, |
| { |
| "epoch": 0.33245869087298774, |
| "grad_norm": 18.995012283325195, |
| "loss": 4.2814, |
| "lr": 0.0009061538461538462, |
| "step": 1172, |
| "tokens_trained": 0.576046304 |
| }, |
| { |
| "epoch": 0.3330260265229416, |
| "grad_norm": 22.621299743652344, |
| "loss": 4.2739, |
| "lr": 0.0009058741258741259, |
| "step": 1174, |
| "tokens_trained": 0.577032376 |
| }, |
| { |
| "epoch": 0.33359336217289554, |
| "grad_norm": 21.758216857910156, |
| "loss": 4.263, |
| "lr": 0.0009055944055944056, |
| "step": 1176, |
| "tokens_trained": 0.578013896 |
| }, |
| { |
| "epoch": 0.33416069782284946, |
| "grad_norm": 32.38374710083008, |
| "loss": 4.2713, |
| "lr": 0.0009053146853146853, |
| "step": 1178, |
| "tokens_trained": 0.57900508 |
| }, |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 35.57462692260742, |
| "loss": 4.2986, |
| "lr": 0.0009050349650349651, |
| "step": 1180, |
| "tokens_trained": 0.57999512 |
| }, |
| { |
| "epoch": 0.33529536912275726, |
| "grad_norm": 11.77812385559082, |
| "loss": 4.3085, |
| "lr": 0.0009047552447552448, |
| "step": 1182, |
| "tokens_trained": 0.580982752 |
| }, |
| { |
| "epoch": 0.33586270477271113, |
| "grad_norm": 51.48725509643555, |
| "loss": 4.4003, |
| "lr": 0.0009044755244755245, |
| "step": 1184, |
| "tokens_trained": 0.581964936 |
| }, |
| { |
| "epoch": 0.33643004042266506, |
| "grad_norm": 47.01481628417969, |
| "loss": 4.3182, |
| "lr": 0.0009041958041958041, |
| "step": 1186, |
| "tokens_trained": 0.582949944 |
| }, |
| { |
| "epoch": 0.336997376072619, |
| "grad_norm": 22.935691833496094, |
| "loss": 4.3432, |
| "lr": 0.000903916083916084, |
| "step": 1188, |
| "tokens_trained": 0.583934776 |
| }, |
| { |
| "epoch": 0.33756471172257285, |
| "grad_norm": 45.21054458618164, |
| "loss": 4.4674, |
| "lr": 0.0009036363636363637, |
| "step": 1190, |
| "tokens_trained": 0.584918344 |
| }, |
| { |
| "epoch": 0.3381320473725268, |
| "grad_norm": 27.012706756591797, |
| "loss": 4.2889, |
| "lr": 0.0009033566433566434, |
| "step": 1192, |
| "tokens_trained": 0.585897632 |
| }, |
| { |
| "epoch": 0.33869938302248065, |
| "grad_norm": 16.68247413635254, |
| "loss": 4.2896, |
| "lr": 0.0009030769230769231, |
| "step": 1194, |
| "tokens_trained": 0.586879408 |
| }, |
| { |
| "epoch": 0.3392667186724346, |
| "grad_norm": 20.664148330688477, |
| "loss": 4.304, |
| "lr": 0.0009027972027972027, |
| "step": 1196, |
| "tokens_trained": 0.587859392 |
| }, |
| { |
| "epoch": 0.3398340543223885, |
| "grad_norm": 22.954742431640625, |
| "loss": 4.2853, |
| "lr": 0.0009025174825174826, |
| "step": 1198, |
| "tokens_trained": 0.588845408 |
| }, |
| { |
| "epoch": 0.34040138997234237, |
| "grad_norm": 23.226943969726562, |
| "loss": 4.2597, |
| "lr": 0.0009022377622377622, |
| "step": 1200, |
| "tokens_trained": 0.589832736 |
| }, |
| { |
| "epoch": 0.3409687256222963, |
| "grad_norm": 7.963059902191162, |
| "loss": 4.261, |
| "lr": 0.000901958041958042, |
| "step": 1202, |
| "tokens_trained": 0.590816568 |
| }, |
| { |
| "epoch": 0.34153606127225017, |
| "grad_norm": 25.160730361938477, |
| "loss": 4.3288, |
| "lr": 0.0009016783216783216, |
| "step": 1204, |
| "tokens_trained": 0.59179692 |
| }, |
| { |
| "epoch": 0.3421033969222041, |
| "grad_norm": 38.45030212402344, |
| "loss": 4.3371, |
| "lr": 0.0009013986013986014, |
| "step": 1206, |
| "tokens_trained": 0.592780968 |
| }, |
| { |
| "epoch": 0.342670732572158, |
| "grad_norm": 52.66873550415039, |
| "loss": 4.2805, |
| "lr": 0.0009011188811188812, |
| "step": 1208, |
| "tokens_trained": 0.593760896 |
| }, |
| { |
| "epoch": 0.3432380682221119, |
| "grad_norm": 28.104921340942383, |
| "loss": 4.3885, |
| "lr": 0.0009008391608391609, |
| "step": 1210, |
| "tokens_trained": 0.59474304 |
| }, |
| { |
| "epoch": 0.3438054038720658, |
| "grad_norm": 49.20989990234375, |
| "loss": 4.346, |
| "lr": 0.0009005594405594406, |
| "step": 1212, |
| "tokens_trained": 0.59572768 |
| }, |
| { |
| "epoch": 0.3443727395220197, |
| "grad_norm": 20.652427673339844, |
| "loss": 4.2368, |
| "lr": 0.0009002797202797202, |
| "step": 1214, |
| "tokens_trained": 0.59671092 |
| }, |
| { |
| "epoch": 0.3449400751719736, |
| "grad_norm": 17.821596145629883, |
| "loss": 4.3041, |
| "lr": 0.0009000000000000001, |
| "step": 1216, |
| "tokens_trained": 0.597697344 |
| }, |
| { |
| "epoch": 0.34550741082192754, |
| "grad_norm": 48.594932556152344, |
| "loss": 4.3668, |
| "lr": 0.0008997202797202797, |
| "step": 1218, |
| "tokens_trained": 0.598677288 |
| }, |
| { |
| "epoch": 0.3460747464718814, |
| "grad_norm": 27.70078468322754, |
| "loss": 4.2939, |
| "lr": 0.0008994405594405595, |
| "step": 1220, |
| "tokens_trained": 0.599662488 |
| }, |
| { |
| "epoch": 0.34664208212183534, |
| "grad_norm": 25.498798370361328, |
| "loss": 4.2891, |
| "lr": 0.0008991608391608391, |
| "step": 1222, |
| "tokens_trained": 0.600646904 |
| }, |
| { |
| "epoch": 0.3472094177717892, |
| "grad_norm": 13.455835342407227, |
| "loss": 4.2881, |
| "lr": 0.0008988811188811188, |
| "step": 1224, |
| "tokens_trained": 0.601628112 |
| }, |
| { |
| "epoch": 0.34777675342174313, |
| "grad_norm": 17.518342971801758, |
| "loss": 4.2977, |
| "lr": 0.0008986013986013987, |
| "step": 1226, |
| "tokens_trained": 0.602612336 |
| }, |
| { |
| "epoch": 0.34834408907169706, |
| "grad_norm": 20.642597198486328, |
| "loss": 4.2921, |
| "lr": 0.0008983216783216783, |
| "step": 1228, |
| "tokens_trained": 0.603595 |
| }, |
| { |
| "epoch": 0.34891142472165093, |
| "grad_norm": 14.464616775512695, |
| "loss": 4.233, |
| "lr": 0.0008980419580419581, |
| "step": 1230, |
| "tokens_trained": 0.604576592 |
| }, |
| { |
| "epoch": 0.34947876037160486, |
| "grad_norm": 13.204504013061523, |
| "loss": 4.2707, |
| "lr": 0.0008977622377622377, |
| "step": 1232, |
| "tokens_trained": 0.60555656 |
| }, |
| { |
| "epoch": 0.35004609602155873, |
| "grad_norm": 12.241665840148926, |
| "loss": 4.2506, |
| "lr": 0.0008974825174825176, |
| "step": 1234, |
| "tokens_trained": 0.606536024 |
| }, |
| { |
| "epoch": 0.35061343167151265, |
| "grad_norm": 18.187660217285156, |
| "loss": 4.2659, |
| "lr": 0.0008972027972027972, |
| "step": 1236, |
| "tokens_trained": 0.607522576 |
| }, |
| { |
| "epoch": 0.3511807673214666, |
| "grad_norm": 8.911888122558594, |
| "loss": 4.2505, |
| "lr": 0.000896923076923077, |
| "step": 1238, |
| "tokens_trained": 0.608507736 |
| }, |
| { |
| "epoch": 0.35174810297142045, |
| "grad_norm": 21.351713180541992, |
| "loss": 4.2291, |
| "lr": 0.0008966433566433566, |
| "step": 1240, |
| "tokens_trained": 0.609486688 |
| }, |
| { |
| "epoch": 0.3523154386213744, |
| "grad_norm": 47.81566619873047, |
| "loss": 4.2725, |
| "lr": 0.0008963636363636363, |
| "step": 1242, |
| "tokens_trained": 0.610470272 |
| }, |
| { |
| "epoch": 0.35288277427132825, |
| "grad_norm": 33.53351974487305, |
| "loss": 4.3237, |
| "lr": 0.0008960839160839162, |
| "step": 1244, |
| "tokens_trained": 0.611455176 |
| }, |
| { |
| "epoch": 0.3534501099212822, |
| "grad_norm": 15.252607345581055, |
| "loss": 4.2868, |
| "lr": 0.0008958041958041958, |
| "step": 1246, |
| "tokens_trained": 0.612437888 |
| }, |
| { |
| "epoch": 0.3540174455712361, |
| "grad_norm": 24.129865646362305, |
| "loss": 4.2626, |
| "lr": 0.0008955244755244756, |
| "step": 1248, |
| "tokens_trained": 0.613420728 |
| }, |
| { |
| "epoch": 0.35458478122118997, |
| "grad_norm": 34.814605712890625, |
| "loss": 4.2627, |
| "lr": 0.0008952447552447552, |
| "step": 1250, |
| "tokens_trained": 0.614405904 |
| }, |
| { |
| "epoch": 0.35458478122118997, |
| "eval_loss": 1.078355312347412, |
| "eval_runtime": 20.4723, |
| "step": 1250, |
| "tokens_trained": 0.614405904 |
| }, |
| { |
| "epoch": 0.3551521168711439, |
| "grad_norm": 18.26809310913086, |
| "loss": 4.2986, |
| "lr": 0.000894965034965035, |
| "step": 1252, |
| "tokens_trained": 0.615386288 |
| }, |
| { |
| "epoch": 0.35571945252109777, |
| "grad_norm": 24.68335723876953, |
| "loss": 4.3146, |
| "lr": 0.0008946853146853147, |
| "step": 1254, |
| "tokens_trained": 0.616370576 |
| }, |
| { |
| "epoch": 0.3562867881710517, |
| "grad_norm": 35.34586715698242, |
| "loss": 4.2905, |
| "lr": 0.0008944055944055944, |
| "step": 1256, |
| "tokens_trained": 0.617351944 |
| }, |
| { |
| "epoch": 0.3568541238210056, |
| "grad_norm": 22.668407440185547, |
| "loss": 4.2607, |
| "lr": 0.0008941258741258741, |
| "step": 1258, |
| "tokens_trained": 0.618334816 |
| }, |
| { |
| "epoch": 0.3574214594709595, |
| "grad_norm": 14.068164825439453, |
| "loss": 4.2459, |
| "lr": 0.0008938461538461538, |
| "step": 1260, |
| "tokens_trained": 0.619319736 |
| }, |
| { |
| "epoch": 0.3579887951209134, |
| "grad_norm": 8.274995803833008, |
| "loss": 4.2713, |
| "lr": 0.0008935664335664337, |
| "step": 1262, |
| "tokens_trained": 0.620299344 |
| }, |
| { |
| "epoch": 0.3585561307708673, |
| "grad_norm": 22.12897491455078, |
| "loss": 4.2841, |
| "lr": 0.0008932867132867133, |
| "step": 1264, |
| "tokens_trained": 0.621282592 |
| }, |
| { |
| "epoch": 0.3591234664208212, |
| "grad_norm": 26.171052932739258, |
| "loss": 4.2505, |
| "lr": 0.000893006993006993, |
| "step": 1266, |
| "tokens_trained": 0.622266136 |
| }, |
| { |
| "epoch": 0.35969080207077514, |
| "grad_norm": 14.768603324890137, |
| "loss": 4.271, |
| "lr": 0.0008927272727272727, |
| "step": 1268, |
| "tokens_trained": 0.623247816 |
| }, |
| { |
| "epoch": 0.360258137720729, |
| "grad_norm": 13.065408706665039, |
| "loss": 4.2387, |
| "lr": 0.0008924475524475525, |
| "step": 1270, |
| "tokens_trained": 0.624234848 |
| }, |
| { |
| "epoch": 0.36082547337068294, |
| "grad_norm": 14.043888092041016, |
| "loss": 4.2601, |
| "lr": 0.0008921678321678322, |
| "step": 1272, |
| "tokens_trained": 0.625214176 |
| }, |
| { |
| "epoch": 0.3613928090206368, |
| "grad_norm": 13.734328269958496, |
| "loss": 4.2426, |
| "lr": 0.0008918881118881119, |
| "step": 1274, |
| "tokens_trained": 0.626197608 |
| }, |
| { |
| "epoch": 0.36196014467059073, |
| "grad_norm": 10.075374603271484, |
| "loss": 4.2259, |
| "lr": 0.0008916083916083916, |
| "step": 1276, |
| "tokens_trained": 0.62717884 |
| }, |
| { |
| "epoch": 0.36252748032054466, |
| "grad_norm": 33.92001724243164, |
| "loss": 4.3054, |
| "lr": 0.0008913286713286713, |
| "step": 1278, |
| "tokens_trained": 0.628166888 |
| }, |
| { |
| "epoch": 0.36309481597049853, |
| "grad_norm": 31.1391544342041, |
| "loss": 4.3066, |
| "lr": 0.0008910489510489512, |
| "step": 1280, |
| "tokens_trained": 0.629152528 |
| }, |
| { |
| "epoch": 0.36366215162045246, |
| "grad_norm": 10.888711929321289, |
| "loss": 4.2348, |
| "lr": 0.0008907692307692308, |
| "step": 1282, |
| "tokens_trained": 0.630132584 |
| }, |
| { |
| "epoch": 0.3642294872704063, |
| "grad_norm": 27.298410415649414, |
| "loss": 4.3225, |
| "lr": 0.0008904895104895105, |
| "step": 1284, |
| "tokens_trained": 0.63111212 |
| }, |
| { |
| "epoch": 0.36479682292036025, |
| "grad_norm": 23.396818161010742, |
| "loss": 4.3177, |
| "lr": 0.0008902097902097902, |
| "step": 1286, |
| "tokens_trained": 0.632094984 |
| }, |
| { |
| "epoch": 0.3653641585703142, |
| "grad_norm": 18.824432373046875, |
| "loss": 4.2235, |
| "lr": 0.00088993006993007, |
| "step": 1288, |
| "tokens_trained": 0.633076832 |
| }, |
| { |
| "epoch": 0.36593149422026805, |
| "grad_norm": 8.04826545715332, |
| "loss": 4.2268, |
| "lr": 0.0008896503496503497, |
| "step": 1290, |
| "tokens_trained": 0.63405868 |
| }, |
| { |
| "epoch": 0.366498829870222, |
| "grad_norm": 32.26673889160156, |
| "loss": 4.3113, |
| "lr": 0.0008893706293706294, |
| "step": 1292, |
| "tokens_trained": 0.635045096 |
| }, |
| { |
| "epoch": 0.36706616552017585, |
| "grad_norm": 29.91358184814453, |
| "loss": 4.2971, |
| "lr": 0.000889090909090909, |
| "step": 1294, |
| "tokens_trained": 0.63603008 |
| }, |
| { |
| "epoch": 0.3676335011701298, |
| "grad_norm": 12.093538284301758, |
| "loss": 4.2502, |
| "lr": 0.0008888111888111888, |
| "step": 1296, |
| "tokens_trained": 0.637014016 |
| }, |
| { |
| "epoch": 0.3682008368200837, |
| "grad_norm": 8.252509117126465, |
| "loss": 4.2905, |
| "lr": 0.0008885314685314686, |
| "step": 1298, |
| "tokens_trained": 0.637997752 |
| }, |
| { |
| "epoch": 0.36876817247003757, |
| "grad_norm": 61.22240447998047, |
| "loss": 4.4753, |
| "lr": 0.0008882517482517483, |
| "step": 1300, |
| "tokens_trained": 0.638981552 |
| }, |
| { |
| "epoch": 0.3693355081199915, |
| "grad_norm": 47.58195877075195, |
| "loss": 4.2769, |
| "lr": 0.000887972027972028, |
| "step": 1302, |
| "tokens_trained": 0.639963512 |
| }, |
| { |
| "epoch": 0.36990284376994537, |
| "grad_norm": 28.806411743164062, |
| "loss": 4.3728, |
| "lr": 0.0008876923076923077, |
| "step": 1304, |
| "tokens_trained": 0.640948392 |
| }, |
| { |
| "epoch": 0.3704701794198993, |
| "grad_norm": 38.960853576660156, |
| "loss": 4.338, |
| "lr": 0.0008874125874125875, |
| "step": 1306, |
| "tokens_trained": 0.641935304 |
| }, |
| { |
| "epoch": 0.3710375150698532, |
| "grad_norm": 25.05726432800293, |
| "loss": 4.3002, |
| "lr": 0.0008871328671328671, |
| "step": 1308, |
| "tokens_trained": 0.642924168 |
| }, |
| { |
| "epoch": 0.3716048507198071, |
| "grad_norm": 39.84127426147461, |
| "loss": 4.3593, |
| "lr": 0.0008868531468531469, |
| "step": 1310, |
| "tokens_trained": 0.64390412 |
| }, |
| { |
| "epoch": 0.372172186369761, |
| "grad_norm": 15.03055191040039, |
| "loss": 4.223, |
| "lr": 0.0008865734265734265, |
| "step": 1312, |
| "tokens_trained": 0.644882104 |
| }, |
| { |
| "epoch": 0.3727395220197149, |
| "grad_norm": 41.85628890991211, |
| "loss": 4.3819, |
| "lr": 0.0008862937062937063, |
| "step": 1314, |
| "tokens_trained": 0.645866912 |
| }, |
| { |
| "epoch": 0.3733068576696688, |
| "grad_norm": 29.014118194580078, |
| "loss": 4.2843, |
| "lr": 0.0008860139860139861, |
| "step": 1316, |
| "tokens_trained": 0.646850376 |
| }, |
| { |
| "epoch": 0.37387419331962274, |
| "grad_norm": 24.407743453979492, |
| "loss": 4.2598, |
| "lr": 0.0008857342657342658, |
| "step": 1318, |
| "tokens_trained": 0.647832272 |
| }, |
| { |
| "epoch": 0.3744415289695766, |
| "grad_norm": 23.28154182434082, |
| "loss": 4.2162, |
| "lr": 0.0008854545454545455, |
| "step": 1320, |
| "tokens_trained": 0.64881652 |
| }, |
| { |
| "epoch": 0.37500886461953054, |
| "grad_norm": 17.70418930053711, |
| "loss": 4.2386, |
| "lr": 0.0008851748251748251, |
| "step": 1322, |
| "tokens_trained": 0.649794936 |
| }, |
| { |
| "epoch": 0.37557620026948446, |
| "grad_norm": 22.582124710083008, |
| "loss": 4.2358, |
| "lr": 0.000884895104895105, |
| "step": 1324, |
| "tokens_trained": 0.650777784 |
| }, |
| { |
| "epoch": 0.37614353591943833, |
| "grad_norm": 16.77848243713379, |
| "loss": 4.2536, |
| "lr": 0.0008846153846153846, |
| "step": 1326, |
| "tokens_trained": 0.651762472 |
| }, |
| { |
| "epoch": 0.37671087156939226, |
| "grad_norm": 14.382417678833008, |
| "loss": 4.2403, |
| "lr": 0.0008843356643356644, |
| "step": 1328, |
| "tokens_trained": 0.652741832 |
| }, |
| { |
| "epoch": 0.37727820721934613, |
| "grad_norm": 22.420886993408203, |
| "loss": 4.1977, |
| "lr": 0.000884055944055944, |
| "step": 1330, |
| "tokens_trained": 0.653725792 |
| }, |
| { |
| "epoch": 0.37784554286930006, |
| "grad_norm": 9.768660545349121, |
| "loss": 4.2148, |
| "lr": 0.0008837762237762238, |
| "step": 1332, |
| "tokens_trained": 0.654704648 |
| }, |
| { |
| "epoch": 0.378412878519254, |
| "grad_norm": 5.091487407684326, |
| "loss": 4.2062, |
| "lr": 0.0008834965034965036, |
| "step": 1334, |
| "tokens_trained": 0.65569176 |
| }, |
| { |
| "epoch": 0.37898021416920785, |
| "grad_norm": 53.520957946777344, |
| "loss": 4.4082, |
| "lr": 0.0008832167832167832, |
| "step": 1336, |
| "tokens_trained": 0.656679344 |
| }, |
| { |
| "epoch": 0.3795475498191618, |
| "grad_norm": 32.17420959472656, |
| "loss": 4.2911, |
| "lr": 0.000882937062937063, |
| "step": 1338, |
| "tokens_trained": 0.657665136 |
| }, |
| { |
| "epoch": 0.38011488546911565, |
| "grad_norm": 14.12790584564209, |
| "loss": 4.2899, |
| "lr": 0.0008826573426573426, |
| "step": 1340, |
| "tokens_trained": 0.658651576 |
| }, |
| { |
| "epoch": 0.3806822211190696, |
| "grad_norm": 51.74199676513672, |
| "loss": 4.3901, |
| "lr": 0.0008823776223776225, |
| "step": 1342, |
| "tokens_trained": 0.659631792 |
| }, |
| { |
| "epoch": 0.3812495567690235, |
| "grad_norm": 48.99909973144531, |
| "loss": 4.298, |
| "lr": 0.0008820979020979021, |
| "step": 1344, |
| "tokens_trained": 0.660616912 |
| }, |
| { |
| "epoch": 0.38181689241897737, |
| "grad_norm": 28.356245040893555, |
| "loss": 4.3171, |
| "lr": 0.0008818181818181819, |
| "step": 1346, |
| "tokens_trained": 0.66159872 |
| }, |
| { |
| "epoch": 0.3823842280689313, |
| "grad_norm": 45.081703186035156, |
| "loss": 4.3067, |
| "lr": 0.0008815384615384615, |
| "step": 1348, |
| "tokens_trained": 0.662582152 |
| }, |
| { |
| "epoch": 0.38295156371888517, |
| "grad_norm": 37.175052642822266, |
| "loss": 4.241, |
| "lr": 0.0008812587412587412, |
| "step": 1350, |
| "tokens_trained": 0.663561176 |
| }, |
| { |
| "epoch": 0.3835188993688391, |
| "grad_norm": 49.46076965332031, |
| "loss": 4.2896, |
| "lr": 0.0008809790209790211, |
| "step": 1352, |
| "tokens_trained": 0.664545144 |
| }, |
| { |
| "epoch": 0.384086235018793, |
| "grad_norm": 22.20182991027832, |
| "loss": 4.323, |
| "lr": 0.0008806993006993007, |
| "step": 1354, |
| "tokens_trained": 0.66553092 |
| }, |
| { |
| "epoch": 0.3846535706687469, |
| "grad_norm": 34.111549377441406, |
| "loss": 4.3138, |
| "lr": 0.0008804195804195805, |
| "step": 1356, |
| "tokens_trained": 0.666517568 |
| }, |
| { |
| "epoch": 0.3852209063187008, |
| "grad_norm": 47.01582336425781, |
| "loss": 4.3009, |
| "lr": 0.0008801398601398601, |
| "step": 1358, |
| "tokens_trained": 0.667498192 |
| }, |
| { |
| "epoch": 0.3857882419686547, |
| "grad_norm": 18.845388412475586, |
| "loss": 4.3176, |
| "lr": 0.00087986013986014, |
| "step": 1360, |
| "tokens_trained": 0.668479008 |
| }, |
| { |
| "epoch": 0.3863555776186086, |
| "grad_norm": 53.68927764892578, |
| "loss": 4.4024, |
| "lr": 0.0008795804195804196, |
| "step": 1362, |
| "tokens_trained": 0.669462472 |
| }, |
| { |
| "epoch": 0.38692291326856254, |
| "grad_norm": 29.88358497619629, |
| "loss": 4.286, |
| "lr": 0.0008793006993006993, |
| "step": 1364, |
| "tokens_trained": 0.67044392 |
| }, |
| { |
| "epoch": 0.3874902489185164, |
| "grad_norm": 11.12879753112793, |
| "loss": 4.3024, |
| "lr": 0.000879020979020979, |
| "step": 1366, |
| "tokens_trained": 0.671424552 |
| }, |
| { |
| "epoch": 0.38805758456847034, |
| "grad_norm": 23.573301315307617, |
| "loss": 4.2662, |
| "lr": 0.0008787412587412587, |
| "step": 1368, |
| "tokens_trained": 0.672409992 |
| }, |
| { |
| "epoch": 0.3886249202184242, |
| "grad_norm": 24.749160766601562, |
| "loss": 4.274, |
| "lr": 0.0008784615384615386, |
| "step": 1370, |
| "tokens_trained": 0.67339824 |
| }, |
| { |
| "epoch": 0.38919225586837813, |
| "grad_norm": 33.26881408691406, |
| "loss": 4.2588, |
| "lr": 0.0008781818181818182, |
| "step": 1372, |
| "tokens_trained": 0.67438204 |
| }, |
| { |
| "epoch": 0.38975959151833206, |
| "grad_norm": 24.466472625732422, |
| "loss": 4.2837, |
| "lr": 0.000877902097902098, |
| "step": 1374, |
| "tokens_trained": 0.67536356 |
| }, |
| { |
| "epoch": 0.39004325934330897, |
| "eval_loss": 1.0616238117218018, |
| "eval_runtime": 20.3698, |
| "step": 1375, |
| "tokens_trained": 0.675855672 |
| }, |
| { |
| "epoch": 0.39032692716828593, |
| "grad_norm": 24.48844337463379, |
| "loss": 4.259, |
| "lr": 0.0008776223776223776, |
| "step": 1376, |
| "tokens_trained": 0.676346368 |
| }, |
| { |
| "epoch": 0.39089426281823986, |
| "grad_norm": 30.594989776611328, |
| "loss": 4.1894, |
| "lr": 0.0008773426573426574, |
| "step": 1378, |
| "tokens_trained": 0.677329312 |
| }, |
| { |
| "epoch": 0.3914615984681937, |
| "grad_norm": 19.835350036621094, |
| "loss": 4.2718, |
| "lr": 0.0008770629370629371, |
| "step": 1380, |
| "tokens_trained": 0.678312272 |
| }, |
| { |
| "epoch": 0.39202893411814765, |
| "grad_norm": 14.570358276367188, |
| "loss": 4.2419, |
| "lr": 0.0008767832167832168, |
| "step": 1382, |
| "tokens_trained": 0.679291216 |
| }, |
| { |
| "epoch": 0.3925962697681016, |
| "grad_norm": 11.608271598815918, |
| "loss": 4.1917, |
| "lr": 0.0008765034965034965, |
| "step": 1384, |
| "tokens_trained": 0.680273296 |
| }, |
| { |
| "epoch": 0.39316360541805545, |
| "grad_norm": 26.094860076904297, |
| "loss": 4.2762, |
| "lr": 0.0008762237762237762, |
| "step": 1386, |
| "tokens_trained": 0.681249464 |
| }, |
| { |
| "epoch": 0.3937309410680094, |
| "grad_norm": 12.754049301147461, |
| "loss": 4.2032, |
| "lr": 0.0008759440559440561, |
| "step": 1388, |
| "tokens_trained": 0.682234168 |
| }, |
| { |
| "epoch": 0.39429827671796325, |
| "grad_norm": 5.951663970947266, |
| "loss": 4.1921, |
| "lr": 0.0008756643356643357, |
| "step": 1390, |
| "tokens_trained": 0.683217176 |
| }, |
| { |
| "epoch": 0.3948656123679172, |
| "grad_norm": 26.907669067382812, |
| "loss": 4.24, |
| "lr": 0.0008753846153846154, |
| "step": 1392, |
| "tokens_trained": 0.68419888 |
| }, |
| { |
| "epoch": 0.3954329480178711, |
| "grad_norm": 25.04796600341797, |
| "loss": 4.2656, |
| "lr": 0.0008751048951048951, |
| "step": 1394, |
| "tokens_trained": 0.685178784 |
| }, |
| { |
| "epoch": 0.39600028366782497, |
| "grad_norm": 19.600811004638672, |
| "loss": 4.2683, |
| "lr": 0.0008748251748251749, |
| "step": 1396, |
| "tokens_trained": 0.686161632 |
| }, |
| { |
| "epoch": 0.3965676193177789, |
| "grad_norm": 14.087088584899902, |
| "loss": 4.2658, |
| "lr": 0.0008745454545454546, |
| "step": 1398, |
| "tokens_trained": 0.687139992 |
| }, |
| { |
| "epoch": 0.39713495496773277, |
| "grad_norm": 9.257765769958496, |
| "loss": 4.2021, |
| "lr": 0.0008742657342657343, |
| "step": 1400, |
| "tokens_trained": 0.688117912 |
| }, |
| { |
| "epoch": 0.3977022906176867, |
| "grad_norm": 18.830154418945312, |
| "loss": 4.2249, |
| "lr": 0.0008739860139860139, |
| "step": 1402, |
| "tokens_trained": 0.689098776 |
| }, |
| { |
| "epoch": 0.3982696262676406, |
| "grad_norm": 24.81566619873047, |
| "loss": 4.246, |
| "lr": 0.0008737062937062937, |
| "step": 1404, |
| "tokens_trained": 0.690085432 |
| }, |
| { |
| "epoch": 0.3988369619175945, |
| "grad_norm": 14.071616172790527, |
| "loss": 4.2531, |
| "lr": 0.0008734265734265734, |
| "step": 1406, |
| "tokens_trained": 0.691069232 |
| }, |
| { |
| "epoch": 0.3994042975675484, |
| "grad_norm": 21.414424896240234, |
| "loss": 4.2192, |
| "lr": 0.0008731468531468532, |
| "step": 1408, |
| "tokens_trained": 0.692051224 |
| }, |
| { |
| "epoch": 0.3999716332175023, |
| "grad_norm": 38.74683380126953, |
| "loss": 4.2421, |
| "lr": 0.0008728671328671329, |
| "step": 1410, |
| "tokens_trained": 0.693029976 |
| }, |
| { |
| "epoch": 0.4005389688674562, |
| "grad_norm": 12.595442771911621, |
| "loss": 4.2569, |
| "lr": 0.0008725874125874126, |
| "step": 1412, |
| "tokens_trained": 0.694013304 |
| }, |
| { |
| "epoch": 0.40110630451741014, |
| "grad_norm": 55.233673095703125, |
| "loss": 4.3422, |
| "lr": 0.0008723076923076924, |
| "step": 1414, |
| "tokens_trained": 0.694997536 |
| }, |
| { |
| "epoch": 0.401673640167364, |
| "grad_norm": 24.717113494873047, |
| "loss": 4.2567, |
| "lr": 0.000872027972027972, |
| "step": 1416, |
| "tokens_trained": 0.695982632 |
| }, |
| { |
| "epoch": 0.40224097581731794, |
| "grad_norm": 20.552875518798828, |
| "loss": 4.2464, |
| "lr": 0.0008717482517482518, |
| "step": 1418, |
| "tokens_trained": 0.696966408 |
| }, |
| { |
| "epoch": 0.4028083114672718, |
| "grad_norm": 25.569900512695312, |
| "loss": 4.21, |
| "lr": 0.0008714685314685314, |
| "step": 1420, |
| "tokens_trained": 0.697948224 |
| }, |
| { |
| "epoch": 0.40337564711722573, |
| "grad_norm": 24.538320541381836, |
| "loss": 4.2605, |
| "lr": 0.0008711888111888112, |
| "step": 1422, |
| "tokens_trained": 0.698934688 |
| }, |
| { |
| "epoch": 0.40394298276717966, |
| "grad_norm": 9.585651397705078, |
| "loss": 4.2524, |
| "lr": 0.0008709090909090909, |
| "step": 1424, |
| "tokens_trained": 0.699921976 |
| }, |
| { |
| "epoch": 0.40451031841713353, |
| "grad_norm": 11.886672973632812, |
| "loss": 4.1934, |
| "lr": 0.0008706293706293707, |
| "step": 1426, |
| "tokens_trained": 0.70090396 |
| }, |
| { |
| "epoch": 0.40507765406708746, |
| "grad_norm": 26.162124633789062, |
| "loss": 4.2412, |
| "lr": 0.0008703496503496504, |
| "step": 1428, |
| "tokens_trained": 0.701888448 |
| }, |
| { |
| "epoch": 0.4056449897170413, |
| "grad_norm": 5.03931188583374, |
| "loss": 4.202, |
| "lr": 0.00087006993006993, |
| "step": 1430, |
| "tokens_trained": 0.702864336 |
| }, |
| { |
| "epoch": 0.40621232536699525, |
| "grad_norm": 33.67579650878906, |
| "loss": 4.3087, |
| "lr": 0.0008697902097902099, |
| "step": 1432, |
| "tokens_trained": 0.703847784 |
| }, |
| { |
| "epoch": 0.4067796610169492, |
| "grad_norm": 34.38542556762695, |
| "loss": 4.2807, |
| "lr": 0.0008695104895104895, |
| "step": 1434, |
| "tokens_trained": 0.704827288 |
| }, |
| { |
| "epoch": 0.40734699666690305, |
| "grad_norm": 13.319886207580566, |
| "loss": 4.3332, |
| "lr": 0.0008692307692307693, |
| "step": 1436, |
| "tokens_trained": 0.705815392 |
| }, |
| { |
| "epoch": 0.407914332316857, |
| "grad_norm": 36.58311080932617, |
| "loss": 4.3318, |
| "lr": 0.0008689510489510489, |
| "step": 1438, |
| "tokens_trained": 0.7067914 |
| }, |
| { |
| "epoch": 0.40848166796681085, |
| "grad_norm": 29.63648223876953, |
| "loss": 4.2962, |
| "lr": 0.0008686713286713287, |
| "step": 1440, |
| "tokens_trained": 0.70777396 |
| }, |
| { |
| "epoch": 0.4090490036167648, |
| "grad_norm": 9.55128002166748, |
| "loss": 4.2773, |
| "lr": 0.0008683916083916084, |
| "step": 1442, |
| "tokens_trained": 0.708750496 |
| }, |
| { |
| "epoch": 0.4096163392667187, |
| "grad_norm": 53.83981704711914, |
| "loss": 4.3875, |
| "lr": 0.0008681118881118881, |
| "step": 1444, |
| "tokens_trained": 0.709730168 |
| }, |
| { |
| "epoch": 0.41018367491667257, |
| "grad_norm": 54.59236526489258, |
| "loss": 4.3582, |
| "lr": 0.0008678321678321679, |
| "step": 1446, |
| "tokens_trained": 0.710709704 |
| }, |
| { |
| "epoch": 0.4107510105666265, |
| "grad_norm": 13.964411735534668, |
| "loss": 4.3065, |
| "lr": 0.0008675524475524475, |
| "step": 1448, |
| "tokens_trained": 0.711690136 |
| }, |
| { |
| "epoch": 0.41131834621658037, |
| "grad_norm": 25.506649017333984, |
| "loss": 4.2686, |
| "lr": 0.0008672727272727273, |
| "step": 1450, |
| "tokens_trained": 0.712668056 |
| }, |
| { |
| "epoch": 0.4118856818665343, |
| "grad_norm": 21.1628360748291, |
| "loss": 4.2485, |
| "lr": 0.000866993006993007, |
| "step": 1452, |
| "tokens_trained": 0.71365004 |
| }, |
| { |
| "epoch": 0.4124530175164882, |
| "grad_norm": 15.751238822937012, |
| "loss": 4.2078, |
| "lr": 0.0008667132867132868, |
| "step": 1454, |
| "tokens_trained": 0.714632032 |
| }, |
| { |
| "epoch": 0.4130203531664421, |
| "grad_norm": 15.838552474975586, |
| "loss": 4.1944, |
| "lr": 0.0008664335664335664, |
| "step": 1456, |
| "tokens_trained": 0.715611376 |
| }, |
| { |
| "epoch": 0.413587688816396, |
| "grad_norm": 15.968609809875488, |
| "loss": 4.1768, |
| "lr": 0.0008661538461538461, |
| "step": 1458, |
| "tokens_trained": 0.716591112 |
| }, |
| { |
| "epoch": 0.4141550244663499, |
| "grad_norm": 15.419891357421875, |
| "loss": 4.1978, |
| "lr": 0.0008658741258741259, |
| "step": 1460, |
| "tokens_trained": 0.717575952 |
| }, |
| { |
| "epoch": 0.4147223601163038, |
| "grad_norm": 15.088132858276367, |
| "loss": 4.2361, |
| "lr": 0.0008655944055944056, |
| "step": 1462, |
| "tokens_trained": 0.718563696 |
| }, |
| { |
| "epoch": 0.41528969576625774, |
| "grad_norm": 4.839190483093262, |
| "loss": 4.2089, |
| "lr": 0.0008653146853146854, |
| "step": 1464, |
| "tokens_trained": 0.71954848 |
| }, |
| { |
| "epoch": 0.4158570314162116, |
| "grad_norm": 22.192466735839844, |
| "loss": 4.2109, |
| "lr": 0.000865034965034965, |
| "step": 1466, |
| "tokens_trained": 0.720533304 |
| }, |
| { |
| "epoch": 0.41642436706616553, |
| "grad_norm": 28.983531951904297, |
| "loss": 4.2402, |
| "lr": 0.0008647552447552448, |
| "step": 1468, |
| "tokens_trained": 0.721518176 |
| }, |
| { |
| "epoch": 0.4169917027161194, |
| "grad_norm": 21.010780334472656, |
| "loss": 4.1732, |
| "lr": 0.0008644755244755245, |
| "step": 1470, |
| "tokens_trained": 0.72250176 |
| }, |
| { |
| "epoch": 0.41755903836607333, |
| "grad_norm": 14.59277057647705, |
| "loss": 4.1847, |
| "lr": 0.0008641958041958042, |
| "step": 1472, |
| "tokens_trained": 0.723486664 |
| }, |
| { |
| "epoch": 0.41812637401602726, |
| "grad_norm": 13.688531875610352, |
| "loss": 4.1577, |
| "lr": 0.0008639160839160839, |
| "step": 1474, |
| "tokens_trained": 0.724469328 |
| }, |
| { |
| "epoch": 0.41869370966598113, |
| "grad_norm": 15.879347801208496, |
| "loss": 4.1721, |
| "lr": 0.0008636363636363636, |
| "step": 1476, |
| "tokens_trained": 0.725454968 |
| }, |
| { |
| "epoch": 0.41926104531593505, |
| "grad_norm": 10.225201606750488, |
| "loss": 4.1999, |
| "lr": 0.0008633566433566434, |
| "step": 1478, |
| "tokens_trained": 0.7264426 |
| }, |
| { |
| "epoch": 0.4198283809658889, |
| "grad_norm": 17.007728576660156, |
| "loss": 4.2229, |
| "lr": 0.0008630769230769231, |
| "step": 1480, |
| "tokens_trained": 0.727422056 |
| }, |
| { |
| "epoch": 0.42039571661584285, |
| "grad_norm": 13.517934799194336, |
| "loss": 4.2241, |
| "lr": 0.0008627972027972029, |
| "step": 1482, |
| "tokens_trained": 0.728403688 |
| }, |
| { |
| "epoch": 0.4209630522657968, |
| "grad_norm": 17.132064819335938, |
| "loss": 4.1679, |
| "lr": 0.0008625174825174825, |
| "step": 1484, |
| "tokens_trained": 0.729386248 |
| }, |
| { |
| "epoch": 0.42153038791575065, |
| "grad_norm": 19.782320022583008, |
| "loss": 4.1817, |
| "lr": 0.0008622377622377622, |
| "step": 1486, |
| "tokens_trained": 0.730368752 |
| }, |
| { |
| "epoch": 0.4220977235657046, |
| "grad_norm": 3.388552188873291, |
| "loss": 4.1726, |
| "lr": 0.000861958041958042, |
| "step": 1488, |
| "tokens_trained": 0.731354304 |
| }, |
| { |
| "epoch": 0.42266505921565845, |
| "grad_norm": 28.33499526977539, |
| "loss": 4.2623, |
| "lr": 0.0008616783216783217, |
| "step": 1490, |
| "tokens_trained": 0.732337296 |
| }, |
| { |
| "epoch": 0.42323239486561237, |
| "grad_norm": 24.927406311035156, |
| "loss": 4.2422, |
| "lr": 0.0008613986013986014, |
| "step": 1492, |
| "tokens_trained": 0.733319824 |
| }, |
| { |
| "epoch": 0.4237997305155663, |
| "grad_norm": 25.996028900146484, |
| "loss": 4.2227, |
| "lr": 0.0008611188811188811, |
| "step": 1494, |
| "tokens_trained": 0.73430636 |
| }, |
| { |
| "epoch": 0.42436706616552017, |
| "grad_norm": 14.625783920288086, |
| "loss": 4.2268, |
| "lr": 0.0008608391608391609, |
| "step": 1496, |
| "tokens_trained": 0.735285848 |
| }, |
| { |
| "epoch": 0.4249344018154741, |
| "grad_norm": 12.556640625, |
| "loss": 4.2352, |
| "lr": 0.0008605594405594406, |
| "step": 1498, |
| "tokens_trained": 0.736270632 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "grad_norm": 18.579416275024414, |
| "loss": 4.2377, |
| "lr": 0.0008602797202797203, |
| "step": 1500, |
| "tokens_trained": 0.737255104 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "eval_loss": 1.052606463432312, |
| "eval_runtime": 20.5089, |
| "step": 1500, |
| "tokens_trained": 0.737255104 |
| }, |
| { |
| "epoch": 0.4260690731153819, |
| "grad_norm": 16.550657272338867, |
| "loss": 4.182, |
| "lr": 0.00086, |
| "step": 1502, |
| "tokens_trained": 0.738240848 |
| }, |
| { |
| "epoch": 0.4266364087653358, |
| "grad_norm": 24.4381046295166, |
| "loss": 4.2093, |
| "lr": 0.0008597202797202797, |
| "step": 1504, |
| "tokens_trained": 0.73922592 |
| }, |
| { |
| "epoch": 0.4272037444152897, |
| "grad_norm": 13.155163764953613, |
| "loss": 4.239, |
| "lr": 0.0008594405594405595, |
| "step": 1506, |
| "tokens_trained": 0.740208896 |
| }, |
| { |
| "epoch": 0.4277710800652436, |
| "grad_norm": 27.667949676513672, |
| "loss": 4.2607, |
| "lr": 0.0008591608391608392, |
| "step": 1508, |
| "tokens_trained": 0.741189312 |
| }, |
| { |
| "epoch": 0.4283384157151975, |
| "grad_norm": 35.897743225097656, |
| "loss": 4.2153, |
| "lr": 0.0008588811188811188, |
| "step": 1510, |
| "tokens_trained": 0.742170456 |
| }, |
| { |
| "epoch": 0.4289057513651514, |
| "grad_norm": 18.16407012939453, |
| "loss": 4.2753, |
| "lr": 0.0008586013986013986, |
| "step": 1512, |
| "tokens_trained": 0.743152504 |
| }, |
| { |
| "epoch": 0.42947308701510534, |
| "grad_norm": 27.447364807128906, |
| "loss": 4.2321, |
| "lr": 0.0008583216783216783, |
| "step": 1514, |
| "tokens_trained": 0.744139768 |
| }, |
| { |
| "epoch": 0.4300404226650592, |
| "grad_norm": 21.115859985351562, |
| "loss": 4.2048, |
| "lr": 0.0008580419580419581, |
| "step": 1516, |
| "tokens_trained": 0.745122368 |
| }, |
| { |
| "epoch": 0.43060775831501313, |
| "grad_norm": 5.949585914611816, |
| "loss": 4.1787, |
| "lr": 0.0008577622377622378, |
| "step": 1518, |
| "tokens_trained": 0.746104936 |
| }, |
| { |
| "epoch": 0.431175093964967, |
| "grad_norm": 6.631585121154785, |
| "loss": 4.2035, |
| "lr": 0.0008574825174825175, |
| "step": 1520, |
| "tokens_trained": 0.747086264 |
| }, |
| { |
| "epoch": 0.43174242961492093, |
| "grad_norm": 38.91585159301758, |
| "loss": 4.354, |
| "lr": 0.0008572027972027972, |
| "step": 1522, |
| "tokens_trained": 0.74806844 |
| }, |
| { |
| "epoch": 0.43230976526487486, |
| "grad_norm": 37.53727722167969, |
| "loss": 4.228, |
| "lr": 0.000856923076923077, |
| "step": 1524, |
| "tokens_trained": 0.749052432 |
| }, |
| { |
| "epoch": 0.4328771009148287, |
| "grad_norm": 19.87713623046875, |
| "loss": 4.2696, |
| "lr": 0.0008566433566433567, |
| "step": 1526, |
| "tokens_trained": 0.750037072 |
| }, |
| { |
| "epoch": 0.43344443656478265, |
| "grad_norm": 25.615995407104492, |
| "loss": 4.2676, |
| "lr": 0.0008563636363636363, |
| "step": 1528, |
| "tokens_trained": 0.751020584 |
| }, |
| { |
| "epoch": 0.4340117722147365, |
| "grad_norm": 16.643299102783203, |
| "loss": 4.201, |
| "lr": 0.0008560839160839161, |
| "step": 1530, |
| "tokens_trained": 0.75200224 |
| }, |
| { |
| "epoch": 0.43457910786469045, |
| "grad_norm": 16.207853317260742, |
| "loss": 4.1944, |
| "lr": 0.0008558041958041958, |
| "step": 1532, |
| "tokens_trained": 0.752981624 |
| }, |
| { |
| "epoch": 0.4351464435146444, |
| "grad_norm": 27.054973602294922, |
| "loss": 4.2188, |
| "lr": 0.0008555244755244756, |
| "step": 1534, |
| "tokens_trained": 0.753968464 |
| }, |
| { |
| "epoch": 0.43571377916459825, |
| "grad_norm": 33.468238830566406, |
| "loss": 4.2052, |
| "lr": 0.0008552447552447553, |
| "step": 1536, |
| "tokens_trained": 0.754950976 |
| }, |
| { |
| "epoch": 0.4362811148145522, |
| "grad_norm": 21.083576202392578, |
| "loss": 4.2514, |
| "lr": 0.000854965034965035, |
| "step": 1538, |
| "tokens_trained": 0.755938272 |
| }, |
| { |
| "epoch": 0.43684845046450604, |
| "grad_norm": 19.927122116088867, |
| "loss": 4.2493, |
| "lr": 0.0008546853146853147, |
| "step": 1540, |
| "tokens_trained": 0.756916784 |
| }, |
| { |
| "epoch": 0.43741578611445997, |
| "grad_norm": 22.105287551879883, |
| "loss": 4.2264, |
| "lr": 0.0008544055944055944, |
| "step": 1542, |
| "tokens_trained": 0.757901152 |
| }, |
| { |
| "epoch": 0.4379831217644139, |
| "grad_norm": 22.448705673217773, |
| "loss": 4.1987, |
| "lr": 0.0008541258741258742, |
| "step": 1544, |
| "tokens_trained": 0.758886048 |
| }, |
| { |
| "epoch": 0.43855045741436777, |
| "grad_norm": 17.740005493164062, |
| "loss": 4.1918, |
| "lr": 0.0008538461538461538, |
| "step": 1546, |
| "tokens_trained": 0.759864304 |
| }, |
| { |
| "epoch": 0.4391177930643217, |
| "grad_norm": 20.58041763305664, |
| "loss": 4.2144, |
| "lr": 0.0008535664335664336, |
| "step": 1548, |
| "tokens_trained": 0.760844312 |
| }, |
| { |
| "epoch": 0.43968512871427556, |
| "grad_norm": 21.937252044677734, |
| "loss": 4.2129, |
| "lr": 0.0008532867132867133, |
| "step": 1550, |
| "tokens_trained": 0.761827256 |
| }, |
| { |
| "epoch": 0.4402524643642295, |
| "grad_norm": 26.883426666259766, |
| "loss": 4.2244, |
| "lr": 0.000853006993006993, |
| "step": 1552, |
| "tokens_trained": 0.7628098 |
| }, |
| { |
| "epoch": 0.4408198000141834, |
| "grad_norm": 10.297266960144043, |
| "loss": 4.1724, |
| "lr": 0.0008527272727272728, |
| "step": 1554, |
| "tokens_trained": 0.763792488 |
| }, |
| { |
| "epoch": 0.4413871356641373, |
| "grad_norm": 12.119601249694824, |
| "loss": 4.1828, |
| "lr": 0.0008524475524475524, |
| "step": 1556, |
| "tokens_trained": 0.764769936 |
| }, |
| { |
| "epoch": 0.4419544713140912, |
| "grad_norm": 16.565885543823242, |
| "loss": 4.2113, |
| "lr": 0.0008521678321678322, |
| "step": 1558, |
| "tokens_trained": 0.765752376 |
| }, |
| { |
| "epoch": 0.4425218069640451, |
| "grad_norm": 18.860309600830078, |
| "loss": 4.1864, |
| "lr": 0.0008518881118881119, |
| "step": 1560, |
| "tokens_trained": 0.766736256 |
| }, |
| { |
| "epoch": 0.443089142613999, |
| "grad_norm": 4.049737453460693, |
| "loss": 4.2108, |
| "lr": 0.0008516083916083917, |
| "step": 1562, |
| "tokens_trained": 0.767720568 |
| }, |
| { |
| "epoch": 0.44365647826395294, |
| "grad_norm": 15.730945587158203, |
| "loss": 4.2339, |
| "lr": 0.0008513286713286713, |
| "step": 1564, |
| "tokens_trained": 0.768701288 |
| }, |
| { |
| "epoch": 0.4442238139139068, |
| "grad_norm": 18.64398956298828, |
| "loss": 4.2132, |
| "lr": 0.000851048951048951, |
| "step": 1566, |
| "tokens_trained": 0.769681336 |
| }, |
| { |
| "epoch": 0.44479114956386073, |
| "grad_norm": 22.01759147644043, |
| "loss": 4.2211, |
| "lr": 0.0008507692307692308, |
| "step": 1568, |
| "tokens_trained": 0.770661168 |
| }, |
| { |
| "epoch": 0.4453584852138146, |
| "grad_norm": 3.097306489944458, |
| "loss": 4.2114, |
| "lr": 0.0008504895104895105, |
| "step": 1570, |
| "tokens_trained": 0.7716424 |
| }, |
| { |
| "epoch": 0.44592582086376853, |
| "grad_norm": 35.901546478271484, |
| "loss": 4.3, |
| "lr": 0.0008502097902097903, |
| "step": 1572, |
| "tokens_trained": 0.772627536 |
| }, |
| { |
| "epoch": 0.44649315651372246, |
| "grad_norm": 20.762710571289062, |
| "loss": 4.2465, |
| "lr": 0.0008499300699300699, |
| "step": 1574, |
| "tokens_trained": 0.77361008 |
| }, |
| { |
| "epoch": 0.4470604921636763, |
| "grad_norm": 13.54304027557373, |
| "loss": 4.221, |
| "lr": 0.0008496503496503497, |
| "step": 1576, |
| "tokens_trained": 0.774591184 |
| }, |
| { |
| "epoch": 0.44762782781363025, |
| "grad_norm": 18.83641242980957, |
| "loss": 4.2228, |
| "lr": 0.0008493706293706294, |
| "step": 1578, |
| "tokens_trained": 0.775574136 |
| }, |
| { |
| "epoch": 0.4481951634635841, |
| "grad_norm": 12.294941902160645, |
| "loss": 4.1768, |
| "lr": 0.0008490909090909091, |
| "step": 1580, |
| "tokens_trained": 0.776554752 |
| }, |
| { |
| "epoch": 0.44876249911353805, |
| "grad_norm": 5.768923759460449, |
| "loss": 4.2255, |
| "lr": 0.0008488111888111888, |
| "step": 1582, |
| "tokens_trained": 0.777539368 |
| }, |
| { |
| "epoch": 0.449329834763492, |
| "grad_norm": 7.9961137771606445, |
| "loss": 4.2218, |
| "lr": 0.0008485314685314685, |
| "step": 1584, |
| "tokens_trained": 0.778522344 |
| }, |
| { |
| "epoch": 0.44989717041344585, |
| "grad_norm": 22.005645751953125, |
| "loss": 4.2452, |
| "lr": 0.0008482517482517483, |
| "step": 1586, |
| "tokens_trained": 0.77950768 |
| }, |
| { |
| "epoch": 0.45046450606339977, |
| "grad_norm": 27.313426971435547, |
| "loss": 4.1875, |
| "lr": 0.000847972027972028, |
| "step": 1588, |
| "tokens_trained": 0.780490984 |
| }, |
| { |
| "epoch": 0.45103184171335364, |
| "grad_norm": 10.344687461853027, |
| "loss": 4.2356, |
| "lr": 0.0008476923076923078, |
| "step": 1590, |
| "tokens_trained": 0.781469 |
| }, |
| { |
| "epoch": 0.45159917736330757, |
| "grad_norm": 27.348726272583008, |
| "loss": 4.2962, |
| "lr": 0.0008474125874125874, |
| "step": 1592, |
| "tokens_trained": 0.782450304 |
| }, |
| { |
| "epoch": 0.4521665130132615, |
| "grad_norm": 32.965911865234375, |
| "loss": 4.2736, |
| "lr": 0.0008471328671328671, |
| "step": 1594, |
| "tokens_trained": 0.783431416 |
| }, |
| { |
| "epoch": 0.45273384866321537, |
| "grad_norm": 7.752636909484863, |
| "loss": 4.2074, |
| "lr": 0.0008468531468531469, |
| "step": 1596, |
| "tokens_trained": 0.784409568 |
| }, |
| { |
| "epoch": 0.4533011843131693, |
| "grad_norm": 38.85223388671875, |
| "loss": 4.3261, |
| "lr": 0.0008465734265734266, |
| "step": 1598, |
| "tokens_trained": 0.785399368 |
| }, |
| { |
| "epoch": 0.45386851996312316, |
| "grad_norm": 38.017967224121094, |
| "loss": 4.2646, |
| "lr": 0.0008462937062937063, |
| "step": 1600, |
| "tokens_trained": 0.786376072 |
| }, |
| { |
| "epoch": 0.4544358556130771, |
| "grad_norm": 7.856576442718506, |
| "loss": 4.191, |
| "lr": 0.000846013986013986, |
| "step": 1602, |
| "tokens_trained": 0.787362072 |
| }, |
| { |
| "epoch": 0.455003191263031, |
| "grad_norm": 37.902870178222656, |
| "loss": 4.2651, |
| "lr": 0.0008457342657342658, |
| "step": 1604, |
| "tokens_trained": 0.788345104 |
| }, |
| { |
| "epoch": 0.4555705269129849, |
| "grad_norm": 7.724793434143066, |
| "loss": 4.1994, |
| "lr": 0.0008454545454545455, |
| "step": 1606, |
| "tokens_trained": 0.7893314 |
| }, |
| { |
| "epoch": 0.4561378625629388, |
| "grad_norm": 26.484699249267578, |
| "loss": 4.2276, |
| "lr": 0.0008451748251748252, |
| "step": 1608, |
| "tokens_trained": 0.790309344 |
| }, |
| { |
| "epoch": 0.4567051982128927, |
| "grad_norm": 23.137874603271484, |
| "loss": 4.2082, |
| "lr": 0.0008448951048951049, |
| "step": 1610, |
| "tokens_trained": 0.791295784 |
| }, |
| { |
| "epoch": 0.4572725338628466, |
| "grad_norm": 13.902606964111328, |
| "loss": 4.2035, |
| "lr": 0.0008446153846153846, |
| "step": 1612, |
| "tokens_trained": 0.79228076 |
| }, |
| { |
| "epoch": 0.45783986951280053, |
| "grad_norm": 8.438498497009277, |
| "loss": 4.1713, |
| "lr": 0.0008443356643356644, |
| "step": 1614, |
| "tokens_trained": 0.793265456 |
| }, |
| { |
| "epoch": 0.4584072051627544, |
| "grad_norm": 11.60899829864502, |
| "loss": 4.1971, |
| "lr": 0.0008440559440559441, |
| "step": 1616, |
| "tokens_trained": 0.794245896 |
| }, |
| { |
| "epoch": 0.45897454081270833, |
| "grad_norm": 19.33312225341797, |
| "loss": 4.2328, |
| "lr": 0.0008437762237762238, |
| "step": 1618, |
| "tokens_trained": 0.795229016 |
| }, |
| { |
| "epoch": 0.4595418764626622, |
| "grad_norm": 16.45014190673828, |
| "loss": 4.2277, |
| "lr": 0.0008434965034965035, |
| "step": 1620, |
| "tokens_trained": 0.79620792 |
| }, |
| { |
| "epoch": 0.46010921211261613, |
| "grad_norm": 9.818867683410645, |
| "loss": 4.1494, |
| "lr": 0.0008432167832167832, |
| "step": 1622, |
| "tokens_trained": 0.797192352 |
| }, |
| { |
| "epoch": 0.46067654776257005, |
| "grad_norm": 7.920058250427246, |
| "loss": 4.2027, |
| "lr": 0.000842937062937063, |
| "step": 1624, |
| "tokens_trained": 0.798174104 |
| }, |
| { |
| "epoch": 0.46096021558754696, |
| "eval_loss": 1.044265627861023, |
| "eval_runtime": 20.5617, |
| "step": 1625, |
| "tokens_trained": 0.798668072 |
| }, |
| { |
| "epoch": 0.4612438834125239, |
| "grad_norm": 10.734235763549805, |
| "loss": 4.1505, |
| "lr": 0.0008426573426573427, |
| "step": 1626, |
| "tokens_trained": 0.799160304 |
| }, |
| { |
| "epoch": 0.46181121906247785, |
| "grad_norm": 23.376392364501953, |
| "loss": 4.195, |
| "lr": 0.0008423776223776224, |
| "step": 1628, |
| "tokens_trained": 0.800144144 |
| }, |
| { |
| "epoch": 0.4623785547124317, |
| "grad_norm": 23.567371368408203, |
| "loss": 4.2367, |
| "lr": 0.0008420979020979021, |
| "step": 1630, |
| "tokens_trained": 0.801131184 |
| }, |
| { |
| "epoch": 0.46294589036238565, |
| "grad_norm": 19.271820068359375, |
| "loss": 4.1899, |
| "lr": 0.0008418181818181819, |
| "step": 1632, |
| "tokens_trained": 0.802111296 |
| }, |
| { |
| "epoch": 0.4635132260123396, |
| "grad_norm": 17.468698501586914, |
| "loss": 4.1941, |
| "lr": 0.0008415384615384616, |
| "step": 1634, |
| "tokens_trained": 0.803095112 |
| }, |
| { |
| "epoch": 0.46408056166229344, |
| "grad_norm": 22.298749923706055, |
| "loss": 4.2083, |
| "lr": 0.0008412587412587412, |
| "step": 1636, |
| "tokens_trained": 0.804080456 |
| }, |
| { |
| "epoch": 0.46464789731224737, |
| "grad_norm": 12.506179809570312, |
| "loss": 4.1953, |
| "lr": 0.000840979020979021, |
| "step": 1638, |
| "tokens_trained": 0.805062464 |
| }, |
| { |
| "epoch": 0.46521523296220124, |
| "grad_norm": 11.819656372070312, |
| "loss": 4.2047, |
| "lr": 0.0008406993006993006, |
| "step": 1640, |
| "tokens_trained": 0.806045504 |
| }, |
| { |
| "epoch": 0.46578256861215517, |
| "grad_norm": 15.925740242004395, |
| "loss": 4.1565, |
| "lr": 0.0008404195804195805, |
| "step": 1642, |
| "tokens_trained": 0.80702736 |
| }, |
| { |
| "epoch": 0.4663499042621091, |
| "grad_norm": 15.869892120361328, |
| "loss": 4.2134, |
| "lr": 0.0008401398601398602, |
| "step": 1644, |
| "tokens_trained": 0.808009192 |
| }, |
| { |
| "epoch": 0.46691723991206296, |
| "grad_norm": 10.851021766662598, |
| "loss": 4.2041, |
| "lr": 0.0008398601398601399, |
| "step": 1646, |
| "tokens_trained": 0.808994728 |
| }, |
| { |
| "epoch": 0.4674845755620169, |
| "grad_norm": 8.271230697631836, |
| "loss": 4.1739, |
| "lr": 0.0008395804195804196, |
| "step": 1648, |
| "tokens_trained": 0.809976448 |
| }, |
| { |
| "epoch": 0.46805191121197076, |
| "grad_norm": 13.768092155456543, |
| "loss": 4.1761, |
| "lr": 0.0008393006993006993, |
| "step": 1650, |
| "tokens_trained": 0.810958392 |
| }, |
| { |
| "epoch": 0.4686192468619247, |
| "grad_norm": 7.760485649108887, |
| "loss": 4.1826, |
| "lr": 0.0008390209790209791, |
| "step": 1652, |
| "tokens_trained": 0.81194136 |
| }, |
| { |
| "epoch": 0.4691865825118786, |
| "grad_norm": 13.28488540649414, |
| "loss": 4.1659, |
| "lr": 0.0008387412587412587, |
| "step": 1654, |
| "tokens_trained": 0.812924984 |
| }, |
| { |
| "epoch": 0.4697539181618325, |
| "grad_norm": 10.466367721557617, |
| "loss": 4.1432, |
| "lr": 0.0008384615384615385, |
| "step": 1656, |
| "tokens_trained": 0.813907424 |
| }, |
| { |
| "epoch": 0.4703212538117864, |
| "grad_norm": 15.40854549407959, |
| "loss": 4.1625, |
| "lr": 0.0008381818181818181, |
| "step": 1658, |
| "tokens_trained": 0.814888712 |
| }, |
| { |
| "epoch": 0.4708885894617403, |
| "grad_norm": 20.580612182617188, |
| "loss": 4.1636, |
| "lr": 0.000837902097902098, |
| "step": 1660, |
| "tokens_trained": 0.815869152 |
| }, |
| { |
| "epoch": 0.4714559251116942, |
| "grad_norm": 14.908403396606445, |
| "loss": 4.1763, |
| "lr": 0.0008376223776223776, |
| "step": 1662, |
| "tokens_trained": 0.816852664 |
| }, |
| { |
| "epoch": 0.47202326076164813, |
| "grad_norm": 10.217529296875, |
| "loss": 4.1934, |
| "lr": 0.0008373426573426573, |
| "step": 1664, |
| "tokens_trained": 0.817832792 |
| }, |
| { |
| "epoch": 0.472590596411602, |
| "grad_norm": 15.74150276184082, |
| "loss": 4.1714, |
| "lr": 0.0008370629370629371, |
| "step": 1666, |
| "tokens_trained": 0.81881728 |
| }, |
| { |
| "epoch": 0.47315793206155593, |
| "grad_norm": 15.39499282836914, |
| "loss": 4.2005, |
| "lr": 0.0008367832167832168, |
| "step": 1668, |
| "tokens_trained": 0.819800824 |
| }, |
| { |
| "epoch": 0.4737252677115098, |
| "grad_norm": 11.585809707641602, |
| "loss": 4.136, |
| "lr": 0.0008365034965034966, |
| "step": 1670, |
| "tokens_trained": 0.8207856 |
| }, |
| { |
| "epoch": 0.4742926033614637, |
| "grad_norm": 16.053237915039062, |
| "loss": 4.1827, |
| "lr": 0.0008362237762237762, |
| "step": 1672, |
| "tokens_trained": 0.821766576 |
| }, |
| { |
| "epoch": 0.47485993901141765, |
| "grad_norm": 9.23779582977295, |
| "loss": 4.1159, |
| "lr": 0.000835944055944056, |
| "step": 1674, |
| "tokens_trained": 0.822749696 |
| }, |
| { |
| "epoch": 0.4754272746613715, |
| "grad_norm": 11.395891189575195, |
| "loss": 4.17, |
| "lr": 0.0008356643356643356, |
| "step": 1676, |
| "tokens_trained": 0.82373032 |
| }, |
| { |
| "epoch": 0.47599461031132545, |
| "grad_norm": 17.745365142822266, |
| "loss": 4.1696, |
| "lr": 0.0008353846153846154, |
| "step": 1678, |
| "tokens_trained": 0.824712192 |
| }, |
| { |
| "epoch": 0.4765619459612793, |
| "grad_norm": 6.7816572189331055, |
| "loss": 4.1933, |
| "lr": 0.0008351048951048951, |
| "step": 1680, |
| "tokens_trained": 0.825691208 |
| }, |
| { |
| "epoch": 0.47712928161123325, |
| "grad_norm": 20.552772521972656, |
| "loss": 4.1625, |
| "lr": 0.0008348251748251748, |
| "step": 1682, |
| "tokens_trained": 0.826672584 |
| }, |
| { |
| "epoch": 0.4776966172611872, |
| "grad_norm": 21.632352828979492, |
| "loss": 4.2061, |
| "lr": 0.0008345454545454546, |
| "step": 1684, |
| "tokens_trained": 0.827654368 |
| }, |
| { |
| "epoch": 0.47826395291114104, |
| "grad_norm": 17.754596710205078, |
| "loss": 4.222, |
| "lr": 0.0008342657342657343, |
| "step": 1686, |
| "tokens_trained": 0.828639392 |
| }, |
| { |
| "epoch": 0.47883128856109497, |
| "grad_norm": 20.73906707763672, |
| "loss": 4.1679, |
| "lr": 0.0008339860139860141, |
| "step": 1688, |
| "tokens_trained": 0.829627232 |
| }, |
| { |
| "epoch": 0.47939862421104884, |
| "grad_norm": 28.157238006591797, |
| "loss": 4.1658, |
| "lr": 0.0008337062937062937, |
| "step": 1690, |
| "tokens_trained": 0.830610904 |
| }, |
| { |
| "epoch": 0.47996595986100277, |
| "grad_norm": 12.728020668029785, |
| "loss": 4.1892, |
| "lr": 0.0008334265734265734, |
| "step": 1692, |
| "tokens_trained": 0.831602544 |
| }, |
| { |
| "epoch": 0.4805332955109567, |
| "grad_norm": 20.21622657775879, |
| "loss": 4.1453, |
| "lr": 0.0008331468531468531, |
| "step": 1694, |
| "tokens_trained": 0.832584656 |
| }, |
| { |
| "epoch": 0.48110063116091056, |
| "grad_norm": 18.5329647064209, |
| "loss": 4.2145, |
| "lr": 0.0008328671328671329, |
| "step": 1696, |
| "tokens_trained": 0.833570472 |
| }, |
| { |
| "epoch": 0.4816679668108645, |
| "grad_norm": 12.47617244720459, |
| "loss": 4.1944, |
| "lr": 0.0008325874125874126, |
| "step": 1698, |
| "tokens_trained": 0.834556104 |
| }, |
| { |
| "epoch": 0.48223530246081836, |
| "grad_norm": 21.34851837158203, |
| "loss": 4.1754, |
| "lr": 0.0008323076923076923, |
| "step": 1700, |
| "tokens_trained": 0.835540592 |
| }, |
| { |
| "epoch": 0.4828026381107723, |
| "grad_norm": 13.20995807647705, |
| "loss": 4.1657, |
| "lr": 0.000832027972027972, |
| "step": 1702, |
| "tokens_trained": 0.836525136 |
| }, |
| { |
| "epoch": 0.4833699737607262, |
| "grad_norm": 16.77725601196289, |
| "loss": 4.1905, |
| "lr": 0.0008317482517482518, |
| "step": 1704, |
| "tokens_trained": 0.837509224 |
| }, |
| { |
| "epoch": 0.4839373094106801, |
| "grad_norm": 15.17611312866211, |
| "loss": 4.1823, |
| "lr": 0.0008314685314685315, |
| "step": 1706, |
| "tokens_trained": 0.838492472 |
| }, |
| { |
| "epoch": 0.484504645060634, |
| "grad_norm": 13.06942081451416, |
| "loss": 4.1732, |
| "lr": 0.0008311888111888112, |
| "step": 1708, |
| "tokens_trained": 0.839471696 |
| }, |
| { |
| "epoch": 0.4850719807105879, |
| "grad_norm": 10.456578254699707, |
| "loss": 4.1862, |
| "lr": 0.0008309090909090909, |
| "step": 1710, |
| "tokens_trained": 0.840452808 |
| }, |
| { |
| "epoch": 0.4856393163605418, |
| "grad_norm": 13.80197525024414, |
| "loss": 4.1663, |
| "lr": 0.0008306293706293706, |
| "step": 1712, |
| "tokens_trained": 0.841434224 |
| }, |
| { |
| "epoch": 0.48620665201049573, |
| "grad_norm": 20.076507568359375, |
| "loss": 4.1436, |
| "lr": 0.0008303496503496504, |
| "step": 1714, |
| "tokens_trained": 0.842415304 |
| }, |
| { |
| "epoch": 0.4867739876604496, |
| "grad_norm": 5.629086971282959, |
| "loss": 4.149, |
| "lr": 0.00083006993006993, |
| "step": 1716, |
| "tokens_trained": 0.84339416 |
| }, |
| { |
| "epoch": 0.48734132331040353, |
| "grad_norm": 13.932148933410645, |
| "loss": 4.1785, |
| "lr": 0.0008297902097902098, |
| "step": 1718, |
| "tokens_trained": 0.844380472 |
| }, |
| { |
| "epoch": 0.4879086589603574, |
| "grad_norm": 18.951047897338867, |
| "loss": 4.216, |
| "lr": 0.0008295104895104895, |
| "step": 1720, |
| "tokens_trained": 0.845366896 |
| }, |
| { |
| "epoch": 0.4884759946103113, |
| "grad_norm": 21.042476654052734, |
| "loss": 4.1634, |
| "lr": 0.0008292307692307693, |
| "step": 1722, |
| "tokens_trained": 0.846344792 |
| }, |
| { |
| "epoch": 0.48904333026026525, |
| "grad_norm": 23.94416618347168, |
| "loss": 4.1613, |
| "lr": 0.000828951048951049, |
| "step": 1724, |
| "tokens_trained": 0.847323608 |
| }, |
| { |
| "epoch": 0.4896106659102191, |
| "grad_norm": 5.057071208953857, |
| "loss": 4.1729, |
| "lr": 0.0008286713286713287, |
| "step": 1726, |
| "tokens_trained": 0.848304856 |
| }, |
| { |
| "epoch": 0.49017800156017305, |
| "grad_norm": 18.068674087524414, |
| "loss": 4.2194, |
| "lr": 0.0008283916083916084, |
| "step": 1728, |
| "tokens_trained": 0.849287712 |
| }, |
| { |
| "epoch": 0.4907453372101269, |
| "grad_norm": 11.621233940124512, |
| "loss": 4.2232, |
| "lr": 0.000828111888111888, |
| "step": 1730, |
| "tokens_trained": 0.850268968 |
| }, |
| { |
| "epoch": 0.49131267286008085, |
| "grad_norm": 12.939676284790039, |
| "loss": 4.2003, |
| "lr": 0.0008278321678321679, |
| "step": 1732, |
| "tokens_trained": 0.851256528 |
| }, |
| { |
| "epoch": 0.49188000851003477, |
| "grad_norm": 10.638157844543457, |
| "loss": 4.1975, |
| "lr": 0.0008275524475524475, |
| "step": 1734, |
| "tokens_trained": 0.852240824 |
| }, |
| { |
| "epoch": 0.49244734415998864, |
| "grad_norm": 6.2671003341674805, |
| "loss": 4.1617, |
| "lr": 0.0008272727272727273, |
| "step": 1736, |
| "tokens_trained": 0.853224768 |
| }, |
| { |
| "epoch": 0.49301467980994257, |
| "grad_norm": 12.318375587463379, |
| "loss": 4.1939, |
| "lr": 0.000826993006993007, |
| "step": 1738, |
| "tokens_trained": 0.8542062 |
| }, |
| { |
| "epoch": 0.49358201545989644, |
| "grad_norm": 17.275348663330078, |
| "loss": 4.1911, |
| "lr": 0.0008267132867132868, |
| "step": 1740, |
| "tokens_trained": 0.855192024 |
| }, |
| { |
| "epoch": 0.49414935110985037, |
| "grad_norm": 11.122747421264648, |
| "loss": 4.17, |
| "lr": 0.0008264335664335665, |
| "step": 1742, |
| "tokens_trained": 0.856172136 |
| }, |
| { |
| "epoch": 0.4947166867598043, |
| "grad_norm": 6.223485469818115, |
| "loss": 4.1774, |
| "lr": 0.0008261538461538461, |
| "step": 1744, |
| "tokens_trained": 0.857156312 |
| }, |
| { |
| "epoch": 0.49528402240975816, |
| "grad_norm": 14.62152099609375, |
| "loss": 4.1607, |
| "lr": 0.0008258741258741259, |
| "step": 1746, |
| "tokens_trained": 0.858140152 |
| }, |
| { |
| "epoch": 0.4958513580597121, |
| "grad_norm": 15.991989135742188, |
| "loss": 4.1825, |
| "lr": 0.0008255944055944055, |
| "step": 1748, |
| "tokens_trained": 0.85912524 |
| }, |
| { |
| "epoch": 0.49641869370966596, |
| "grad_norm": 28.88335418701172, |
| "loss": 4.2244, |
| "lr": 0.0008253146853146854, |
| "step": 1750, |
| "tokens_trained": 0.860105784 |
| }, |
| { |
| "epoch": 0.49641869370966596, |
| "eval_loss": 1.061833143234253, |
| "eval_runtime": 20.4841, |
| "step": 1750, |
| "tokens_trained": 0.860105784 |
| }, |
| { |
| "epoch": 0.4969860293596199, |
| "grad_norm": 14.708030700683594, |
| "loss": 4.2036, |
| "lr": 0.000825034965034965, |
| "step": 1752, |
| "tokens_trained": 0.861089272 |
| }, |
| { |
| "epoch": 0.4975533650095738, |
| "grad_norm": 24.67535400390625, |
| "loss": 4.2405, |
| "lr": 0.0008247552447552448, |
| "step": 1754, |
| "tokens_trained": 0.862066656 |
| }, |
| { |
| "epoch": 0.4981207006595277, |
| "grad_norm": 10.923722267150879, |
| "loss": 4.1713, |
| "lr": 0.0008244755244755245, |
| "step": 1756, |
| "tokens_trained": 0.863049256 |
| }, |
| { |
| "epoch": 0.4986880363094816, |
| "grad_norm": 8.88796615600586, |
| "loss": 4.1834, |
| "lr": 0.0008241958041958042, |
| "step": 1758, |
| "tokens_trained": 0.864029352 |
| }, |
| { |
| "epoch": 0.4992553719594355, |
| "grad_norm": 34.90485382080078, |
| "loss": 4.2338, |
| "lr": 0.000823916083916084, |
| "step": 1760, |
| "tokens_trained": 0.865013008 |
| }, |
| { |
| "epoch": 0.4998227076093894, |
| "grad_norm": 36.34440612792969, |
| "loss": 4.2012, |
| "lr": 0.0008236363636363636, |
| "step": 1762, |
| "tokens_trained": 0.86599204 |
| }, |
| { |
| "epoch": 0.5003900432593433, |
| "grad_norm": 27.913984298706055, |
| "loss": 4.269, |
| "lr": 0.0008233566433566434, |
| "step": 1764, |
| "tokens_trained": 0.866975456 |
| }, |
| { |
| "epoch": 0.5009573789092973, |
| "grad_norm": 28.236122131347656, |
| "loss": 4.2413, |
| "lr": 0.000823076923076923, |
| "step": 1766, |
| "tokens_trained": 0.867963912 |
| }, |
| { |
| "epoch": 0.5015247145592511, |
| "grad_norm": 18.181337356567383, |
| "loss": 4.2088, |
| "lr": 0.0008227972027972029, |
| "step": 1768, |
| "tokens_trained": 0.86894656 |
| }, |
| { |
| "epoch": 0.502092050209205, |
| "grad_norm": 17.403850555419922, |
| "loss": 4.1854, |
| "lr": 0.0008225174825174825, |
| "step": 1770, |
| "tokens_trained": 0.869932592 |
| }, |
| { |
| "epoch": 0.5026593858591589, |
| "grad_norm": 15.002805709838867, |
| "loss": 4.1897, |
| "lr": 0.0008222377622377622, |
| "step": 1772, |
| "tokens_trained": 0.87091592 |
| }, |
| { |
| "epoch": 0.5032267215091129, |
| "grad_norm": 6.787586688995361, |
| "loss": 4.1625, |
| "lr": 0.000821958041958042, |
| "step": 1774, |
| "tokens_trained": 0.871899144 |
| }, |
| { |
| "epoch": 0.5037940571590668, |
| "grad_norm": 6.255197525024414, |
| "loss": 4.1682, |
| "lr": 0.0008216783216783217, |
| "step": 1776, |
| "tokens_trained": 0.872874824 |
| }, |
| { |
| "epoch": 0.5043613928090206, |
| "grad_norm": 25.828433990478516, |
| "loss": 4.2354, |
| "lr": 0.0008213986013986015, |
| "step": 1778, |
| "tokens_trained": 0.873858424 |
| }, |
| { |
| "epoch": 0.5049287284589745, |
| "grad_norm": 20.261323928833008, |
| "loss": 4.2373, |
| "lr": 0.0008211188811188811, |
| "step": 1780, |
| "tokens_trained": 0.87483884 |
| }, |
| { |
| "epoch": 0.5054960641089284, |
| "grad_norm": 9.670608520507812, |
| "loss": 4.191, |
| "lr": 0.0008208391608391609, |
| "step": 1782, |
| "tokens_trained": 0.875820792 |
| }, |
| { |
| "epoch": 0.5060633997588824, |
| "grad_norm": 23.33945655822754, |
| "loss": 4.2319, |
| "lr": 0.0008205594405594405, |
| "step": 1784, |
| "tokens_trained": 0.876804368 |
| }, |
| { |
| "epoch": 0.5066307354088363, |
| "grad_norm": 32.22544479370117, |
| "loss": 4.1799, |
| "lr": 0.0008202797202797203, |
| "step": 1786, |
| "tokens_trained": 0.877784816 |
| }, |
| { |
| "epoch": 0.5071980710587901, |
| "grad_norm": 21.048891067504883, |
| "loss": 4.2635, |
| "lr": 0.00082, |
| "step": 1788, |
| "tokens_trained": 0.878768256 |
| }, |
| { |
| "epoch": 0.507765406708744, |
| "grad_norm": 28.73198699951172, |
| "loss": 4.2436, |
| "lr": 0.0008197202797202797, |
| "step": 1790, |
| "tokens_trained": 0.879751288 |
| }, |
| { |
| "epoch": 0.508332742358698, |
| "grad_norm": 27.627851486206055, |
| "loss": 4.2118, |
| "lr": 0.0008194405594405595, |
| "step": 1792, |
| "tokens_trained": 0.880732072 |
| }, |
| { |
| "epoch": 0.5089000780086519, |
| "grad_norm": 21.16539192199707, |
| "loss": 4.2123, |
| "lr": 0.0008191608391608392, |
| "step": 1794, |
| "tokens_trained": 0.88171332 |
| }, |
| { |
| "epoch": 0.5094674136586058, |
| "grad_norm": 11.402868270874023, |
| "loss": 4.1524, |
| "lr": 0.000818881118881119, |
| "step": 1796, |
| "tokens_trained": 0.882695464 |
| }, |
| { |
| "epoch": 0.5100347493085596, |
| "grad_norm": 11.958270072937012, |
| "loss": 4.2091, |
| "lr": 0.0008186013986013986, |
| "step": 1798, |
| "tokens_trained": 0.883678736 |
| }, |
| { |
| "epoch": 0.5106020849585136, |
| "grad_norm": 15.902670860290527, |
| "loss": 4.1687, |
| "lr": 0.0008183216783216783, |
| "step": 1800, |
| "tokens_trained": 0.8846604 |
| }, |
| { |
| "epoch": 0.5111694206084675, |
| "grad_norm": 19.732566833496094, |
| "loss": 4.1302, |
| "lr": 0.000818041958041958, |
| "step": 1802, |
| "tokens_trained": 0.885641384 |
| }, |
| { |
| "epoch": 0.5117367562584214, |
| "grad_norm": 15.119332313537598, |
| "loss": 4.1546, |
| "lr": 0.0008177622377622378, |
| "step": 1804, |
| "tokens_trained": 0.8866262 |
| }, |
| { |
| "epoch": 0.5123040919083753, |
| "grad_norm": 9.641027450561523, |
| "loss": 4.1748, |
| "lr": 0.0008174825174825175, |
| "step": 1806, |
| "tokens_trained": 0.887604504 |
| }, |
| { |
| "epoch": 0.5128714275583292, |
| "grad_norm": 11.642073631286621, |
| "loss": 4.1879, |
| "lr": 0.0008172027972027972, |
| "step": 1808, |
| "tokens_trained": 0.888584152 |
| }, |
| { |
| "epoch": 0.5134387632082831, |
| "grad_norm": 12.05164909362793, |
| "loss": 4.1332, |
| "lr": 0.000816923076923077, |
| "step": 1810, |
| "tokens_trained": 0.889568448 |
| }, |
| { |
| "epoch": 0.514006098858237, |
| "grad_norm": 13.54423999786377, |
| "loss": 4.1398, |
| "lr": 0.0008166433566433567, |
| "step": 1812, |
| "tokens_trained": 0.890550896 |
| }, |
| { |
| "epoch": 0.5145734345081909, |
| "grad_norm": 21.94988441467285, |
| "loss": 4.1523, |
| "lr": 0.0008163636363636364, |
| "step": 1814, |
| "tokens_trained": 0.89153436 |
| }, |
| { |
| "epoch": 0.5151407701581449, |
| "grad_norm": 8.613338470458984, |
| "loss": 4.1428, |
| "lr": 0.0008160839160839161, |
| "step": 1816, |
| "tokens_trained": 0.89251064 |
| }, |
| { |
| "epoch": 0.5157081058080987, |
| "grad_norm": 27.448917388916016, |
| "loss": 4.2014, |
| "lr": 0.0008158041958041958, |
| "step": 1818, |
| "tokens_trained": 0.893493904 |
| }, |
| { |
| "epoch": 0.5162754414580526, |
| "grad_norm": 16.226577758789062, |
| "loss": 4.1787, |
| "lr": 0.0008155244755244755, |
| "step": 1820, |
| "tokens_trained": 0.894476344 |
| }, |
| { |
| "epoch": 0.5168427771080065, |
| "grad_norm": 16.967891693115234, |
| "loss": 4.1898, |
| "lr": 0.0008152447552447553, |
| "step": 1822, |
| "tokens_trained": 0.895460064 |
| }, |
| { |
| "epoch": 0.5174101127579604, |
| "grad_norm": 13.723483085632324, |
| "loss": 4.2058, |
| "lr": 0.000814965034965035, |
| "step": 1824, |
| "tokens_trained": 0.896443272 |
| }, |
| { |
| "epoch": 0.5179774484079144, |
| "grad_norm": 16.789636611938477, |
| "loss": 4.1669, |
| "lr": 0.0008146853146853147, |
| "step": 1826, |
| "tokens_trained": 0.897426712 |
| }, |
| { |
| "epoch": 0.5185447840578682, |
| "grad_norm": 11.26768684387207, |
| "loss": 4.1401, |
| "lr": 0.0008144055944055944, |
| "step": 1828, |
| "tokens_trained": 0.89840672 |
| }, |
| { |
| "epoch": 0.5191121197078221, |
| "grad_norm": 9.25829029083252, |
| "loss": 4.1581, |
| "lr": 0.0008141258741258742, |
| "step": 1830, |
| "tokens_trained": 0.89939132 |
| }, |
| { |
| "epoch": 0.519679455357776, |
| "grad_norm": 12.006930351257324, |
| "loss": 4.1768, |
| "lr": 0.0008138461538461539, |
| "step": 1832, |
| "tokens_trained": 0.900373704 |
| }, |
| { |
| "epoch": 0.52024679100773, |
| "grad_norm": 18.766008377075195, |
| "loss": 4.1419, |
| "lr": 0.0008135664335664336, |
| "step": 1834, |
| "tokens_trained": 0.901356176 |
| }, |
| { |
| "epoch": 0.5208141266576839, |
| "grad_norm": 17.483421325683594, |
| "loss": 4.1382, |
| "lr": 0.0008132867132867133, |
| "step": 1836, |
| "tokens_trained": 0.902344088 |
| }, |
| { |
| "epoch": 0.5213814623076377, |
| "grad_norm": 10.484652519226074, |
| "loss": 4.1571, |
| "lr": 0.000813006993006993, |
| "step": 1838, |
| "tokens_trained": 0.903328896 |
| }, |
| { |
| "epoch": 0.5219487979575916, |
| "grad_norm": 13.653974533081055, |
| "loss": 4.1638, |
| "lr": 0.0008127272727272728, |
| "step": 1840, |
| "tokens_trained": 0.904309368 |
| }, |
| { |
| "epoch": 0.5225161336075456, |
| "grad_norm": 12.48718547821045, |
| "loss": 4.1226, |
| "lr": 0.0008124475524475524, |
| "step": 1842, |
| "tokens_trained": 0.905293112 |
| }, |
| { |
| "epoch": 0.5230834692574995, |
| "grad_norm": 8.086355209350586, |
| "loss": 4.1303, |
| "lr": 0.0008121678321678322, |
| "step": 1844, |
| "tokens_trained": 0.906275632 |
| }, |
| { |
| "epoch": 0.5236508049074534, |
| "grad_norm": 10.940073013305664, |
| "loss": 4.1634, |
| "lr": 0.0008118881118881119, |
| "step": 1846, |
| "tokens_trained": 0.907255808 |
| }, |
| { |
| "epoch": 0.5242181405574072, |
| "grad_norm": 13.844099044799805, |
| "loss": 4.1505, |
| "lr": 0.0008116083916083917, |
| "step": 1848, |
| "tokens_trained": 0.908238664 |
| }, |
| { |
| "epoch": 0.5247854762073612, |
| "grad_norm": 6.305738925933838, |
| "loss": 4.1463, |
| "lr": 0.0008113286713286714, |
| "step": 1850, |
| "tokens_trained": 0.909221424 |
| }, |
| { |
| "epoch": 0.5253528118573151, |
| "grad_norm": 8.957951545715332, |
| "loss": 4.1785, |
| "lr": 0.000811048951048951, |
| "step": 1852, |
| "tokens_trained": 0.910204472 |
| }, |
| { |
| "epoch": 0.525920147507269, |
| "grad_norm": 12.665373802185059, |
| "loss": 4.1776, |
| "lr": 0.0008107692307692308, |
| "step": 1854, |
| "tokens_trained": 0.911186456 |
| }, |
| { |
| "epoch": 0.5264874831572229, |
| "grad_norm": 13.7921781539917, |
| "loss": 4.2058, |
| "lr": 0.0008104895104895104, |
| "step": 1856, |
| "tokens_trained": 0.912163912 |
| }, |
| { |
| "epoch": 0.5270548188071768, |
| "grad_norm": 18.400495529174805, |
| "loss": 4.1378, |
| "lr": 0.0008102097902097903, |
| "step": 1858, |
| "tokens_trained": 0.913143416 |
| }, |
| { |
| "epoch": 0.5276221544571307, |
| "grad_norm": 10.095234870910645, |
| "loss": 4.1673, |
| "lr": 0.0008099300699300699, |
| "step": 1860, |
| "tokens_trained": 0.914125056 |
| }, |
| { |
| "epoch": 0.5281894901070846, |
| "grad_norm": 9.396644592285156, |
| "loss": 4.1226, |
| "lr": 0.0008096503496503497, |
| "step": 1862, |
| "tokens_trained": 0.915109128 |
| }, |
| { |
| "epoch": 0.5287568257570385, |
| "grad_norm": 12.686080932617188, |
| "loss": 4.1356, |
| "lr": 0.0008093706293706294, |
| "step": 1864, |
| "tokens_trained": 0.916092096 |
| }, |
| { |
| "epoch": 0.5293241614069925, |
| "grad_norm": 15.91020679473877, |
| "loss": 4.1276, |
| "lr": 0.0008090909090909092, |
| "step": 1866, |
| "tokens_trained": 0.917077264 |
| }, |
| { |
| "epoch": 0.5298914970569463, |
| "grad_norm": 21.305110931396484, |
| "loss": 4.1492, |
| "lr": 0.0008088111888111889, |
| "step": 1868, |
| "tokens_trained": 0.918060288 |
| }, |
| { |
| "epoch": 0.5304588327069002, |
| "grad_norm": 9.242319107055664, |
| "loss": 4.1457, |
| "lr": 0.0008085314685314685, |
| "step": 1870, |
| "tokens_trained": 0.91904616 |
| }, |
| { |
| "epoch": 0.5310261683568541, |
| "grad_norm": 17.556922912597656, |
| "loss": 4.1698, |
| "lr": 0.0008082517482517483, |
| "step": 1872, |
| "tokens_trained": 0.920028192 |
| }, |
| { |
| "epoch": 0.531593504006808, |
| "grad_norm": 24.155885696411133, |
| "loss": 4.193, |
| "lr": 0.0008079720279720279, |
| "step": 1874, |
| "tokens_trained": 0.921010456 |
| }, |
| { |
| "epoch": 0.531877171831785, |
| "eval_loss": 1.0404243469238281, |
| "eval_runtime": 21.451, |
| "step": 1875, |
| "tokens_trained": 0.921502192 |
| }, |
| { |
| "epoch": 0.532160839656762, |
| "grad_norm": 4.985994338989258, |
| "loss": 4.1649, |
| "lr": 0.0008076923076923078, |
| "step": 1876, |
| "tokens_trained": 0.921994216 |
| }, |
| { |
| "epoch": 0.5327281753067158, |
| "grad_norm": 19.2642765045166, |
| "loss": 4.1883, |
| "lr": 0.0008074125874125874, |
| "step": 1878, |
| "tokens_trained": 0.922978112 |
| }, |
| { |
| "epoch": 0.5332955109566697, |
| "grad_norm": 15.012572288513184, |
| "loss": 4.1944, |
| "lr": 0.0008071328671328671, |
| "step": 1880, |
| "tokens_trained": 0.923962952 |
| }, |
| { |
| "epoch": 0.5338628466066236, |
| "grad_norm": 21.37204360961914, |
| "loss": 4.1708, |
| "lr": 0.0008068531468531469, |
| "step": 1882, |
| "tokens_trained": 0.92494744 |
| }, |
| { |
| "epoch": 0.5344301822565776, |
| "grad_norm": 6.402398586273193, |
| "loss": 4.1921, |
| "lr": 0.0008065734265734265, |
| "step": 1884, |
| "tokens_trained": 0.925927984 |
| }, |
| { |
| "epoch": 0.5349975179065315, |
| "grad_norm": 27.606822967529297, |
| "loss": 4.2033, |
| "lr": 0.0008062937062937064, |
| "step": 1886, |
| "tokens_trained": 0.926911352 |
| }, |
| { |
| "epoch": 0.5355648535564853, |
| "grad_norm": 16.434572219848633, |
| "loss": 4.1504, |
| "lr": 0.000806013986013986, |
| "step": 1888, |
| "tokens_trained": 0.927894056 |
| }, |
| { |
| "epoch": 0.5361321892064392, |
| "grad_norm": 8.066178321838379, |
| "loss": 4.1674, |
| "lr": 0.0008057342657342658, |
| "step": 1890, |
| "tokens_trained": 0.928879504 |
| }, |
| { |
| "epoch": 0.5366995248563932, |
| "grad_norm": 6.167456150054932, |
| "loss": 4.1207, |
| "lr": 0.0008054545454545454, |
| "step": 1892, |
| "tokens_trained": 0.92986424 |
| }, |
| { |
| "epoch": 0.5372668605063471, |
| "grad_norm": 3.584982395172119, |
| "loss": 4.1051, |
| "lr": 0.0008051748251748253, |
| "step": 1894, |
| "tokens_trained": 0.930846696 |
| }, |
| { |
| "epoch": 0.537834196156301, |
| "grad_norm": 14.988295555114746, |
| "loss": 4.1199, |
| "lr": 0.0008048951048951049, |
| "step": 1896, |
| "tokens_trained": 0.931831112 |
| }, |
| { |
| "epoch": 0.5384015318062548, |
| "grad_norm": 12.735363960266113, |
| "loss": 4.1368, |
| "lr": 0.0008046153846153846, |
| "step": 1898, |
| "tokens_trained": 0.932816952 |
| }, |
| { |
| "epoch": 0.5389688674562088, |
| "grad_norm": 7.701294422149658, |
| "loss": 4.1205, |
| "lr": 0.0008043356643356644, |
| "step": 1900, |
| "tokens_trained": 0.93380264 |
| }, |
| { |
| "epoch": 0.5395362031061627, |
| "grad_norm": 9.15809440612793, |
| "loss": 4.1567, |
| "lr": 0.000804055944055944, |
| "step": 1902, |
| "tokens_trained": 0.934785848 |
| }, |
| { |
| "epoch": 0.5401035387561166, |
| "grad_norm": 10.8292875289917, |
| "loss": 4.1645, |
| "lr": 0.0008037762237762239, |
| "step": 1904, |
| "tokens_trained": 0.935766912 |
| }, |
| { |
| "epoch": 0.5406708744060705, |
| "grad_norm": 10.906803131103516, |
| "loss": 4.1398, |
| "lr": 0.0008034965034965035, |
| "step": 1906, |
| "tokens_trained": 0.936749352 |
| }, |
| { |
| "epoch": 0.5412382100560243, |
| "grad_norm": 10.140864372253418, |
| "loss": 4.1754, |
| "lr": 0.0008032167832167832, |
| "step": 1908, |
| "tokens_trained": 0.9377304 |
| }, |
| { |
| "epoch": 0.5418055457059783, |
| "grad_norm": 10.061383247375488, |
| "loss": 4.1485, |
| "lr": 0.0008029370629370629, |
| "step": 1910, |
| "tokens_trained": 0.938712336 |
| }, |
| { |
| "epoch": 0.5423728813559322, |
| "grad_norm": 8.252259254455566, |
| "loss": 4.1502, |
| "lr": 0.0008026573426573427, |
| "step": 1912, |
| "tokens_trained": 0.939693304 |
| }, |
| { |
| "epoch": 0.5429402170058861, |
| "grad_norm": 15.104400634765625, |
| "loss": 4.182, |
| "lr": 0.0008023776223776224, |
| "step": 1914, |
| "tokens_trained": 0.940679832 |
| }, |
| { |
| "epoch": 0.54350755265584, |
| "grad_norm": 21.167285919189453, |
| "loss": 4.1241, |
| "lr": 0.0008020979020979021, |
| "step": 1916, |
| "tokens_trained": 0.941665088 |
| }, |
| { |
| "epoch": 0.5440748883057939, |
| "grad_norm": 17.936481475830078, |
| "loss": 4.1846, |
| "lr": 0.0008018181818181818, |
| "step": 1918, |
| "tokens_trained": 0.942651632 |
| }, |
| { |
| "epoch": 0.5446422239557478, |
| "grad_norm": 9.773019790649414, |
| "loss": 4.1164, |
| "lr": 0.0008015384615384615, |
| "step": 1920, |
| "tokens_trained": 0.943635928 |
| }, |
| { |
| "epoch": 0.5452095596057017, |
| "grad_norm": 14.120475769042969, |
| "loss": 4.1556, |
| "lr": 0.0008012587412587414, |
| "step": 1922, |
| "tokens_trained": 0.944618336 |
| }, |
| { |
| "epoch": 0.5457768952556556, |
| "grad_norm": 10.898097038269043, |
| "loss": 4.1521, |
| "lr": 0.000800979020979021, |
| "step": 1924, |
| "tokens_trained": 0.945608216 |
| }, |
| { |
| "epoch": 0.5463442309056096, |
| "grad_norm": 8.271462440490723, |
| "loss": 4.0785, |
| "lr": 0.0008006993006993007, |
| "step": 1926, |
| "tokens_trained": 0.946593504 |
| }, |
| { |
| "epoch": 0.5469115665555634, |
| "grad_norm": 17.28820037841797, |
| "loss": 4.0998, |
| "lr": 0.0008004195804195804, |
| "step": 1928, |
| "tokens_trained": 0.947575288 |
| }, |
| { |
| "epoch": 0.5474789022055173, |
| "grad_norm": 17.754959106445312, |
| "loss": 4.1652, |
| "lr": 0.0008001398601398602, |
| "step": 1930, |
| "tokens_trained": 0.948562968 |
| }, |
| { |
| "epoch": 0.5480462378554712, |
| "grad_norm": 10.576292037963867, |
| "loss": 4.1754, |
| "lr": 0.0007998601398601399, |
| "step": 1932, |
| "tokens_trained": 0.949545728 |
| }, |
| { |
| "epoch": 0.5486135735054252, |
| "grad_norm": 14.297791481018066, |
| "loss": 4.1597, |
| "lr": 0.0007995804195804196, |
| "step": 1934, |
| "tokens_trained": 0.950528952 |
| }, |
| { |
| "epoch": 0.5491809091553791, |
| "grad_norm": 23.882539749145508, |
| "loss": 4.1366, |
| "lr": 0.0007993006993006992, |
| "step": 1936, |
| "tokens_trained": 0.951513448 |
| }, |
| { |
| "epoch": 0.5497482448053329, |
| "grad_norm": 5.12502908706665, |
| "loss": 4.1441, |
| "lr": 0.000799020979020979, |
| "step": 1938, |
| "tokens_trained": 0.952497048 |
| }, |
| { |
| "epoch": 0.5503155804552868, |
| "grad_norm": 26.879070281982422, |
| "loss": 4.2595, |
| "lr": 0.0007987412587412588, |
| "step": 1940, |
| "tokens_trained": 0.953475816 |
| }, |
| { |
| "epoch": 0.5508829161052408, |
| "grad_norm": 23.032690048217773, |
| "loss": 4.1841, |
| "lr": 0.0007984615384615385, |
| "step": 1942, |
| "tokens_trained": 0.954459984 |
| }, |
| { |
| "epoch": 0.5514502517551947, |
| "grad_norm": 8.810720443725586, |
| "loss": 4.1329, |
| "lr": 0.0007981818181818182, |
| "step": 1944, |
| "tokens_trained": 0.95544252 |
| }, |
| { |
| "epoch": 0.5520175874051486, |
| "grad_norm": 31.051185607910156, |
| "loss": 4.2278, |
| "lr": 0.0007979020979020979, |
| "step": 1946, |
| "tokens_trained": 0.956428016 |
| }, |
| { |
| "epoch": 0.5525849230551024, |
| "grad_norm": 22.537412643432617, |
| "loss": 4.1729, |
| "lr": 0.0007976223776223777, |
| "step": 1948, |
| "tokens_trained": 0.957406024 |
| }, |
| { |
| "epoch": 0.5531522587050564, |
| "grad_norm": 10.596793174743652, |
| "loss": 4.1636, |
| "lr": 0.0007973426573426573, |
| "step": 1950, |
| "tokens_trained": 0.958391232 |
| }, |
| { |
| "epoch": 0.5537195943550103, |
| "grad_norm": 16.45500373840332, |
| "loss": 4.1591, |
| "lr": 0.0007970629370629371, |
| "step": 1952, |
| "tokens_trained": 0.959378448 |
| }, |
| { |
| "epoch": 0.5542869300049642, |
| "grad_norm": 15.090359687805176, |
| "loss": 4.1516, |
| "lr": 0.0007967832167832167, |
| "step": 1954, |
| "tokens_trained": 0.960363384 |
| }, |
| { |
| "epoch": 0.5548542656549181, |
| "grad_norm": 28.482192993164062, |
| "loss": 4.1211, |
| "lr": 0.0007965034965034965, |
| "step": 1956, |
| "tokens_trained": 0.961348752 |
| }, |
| { |
| "epoch": 0.555421601304872, |
| "grad_norm": 9.402368545532227, |
| "loss": 4.178, |
| "lr": 0.0007962237762237763, |
| "step": 1958, |
| "tokens_trained": 0.962332976 |
| }, |
| { |
| "epoch": 0.5559889369548259, |
| "grad_norm": 33.001346588134766, |
| "loss": 4.218, |
| "lr": 0.000795944055944056, |
| "step": 1960, |
| "tokens_trained": 0.963316928 |
| }, |
| { |
| "epoch": 0.5565562726047798, |
| "grad_norm": 29.695520401000977, |
| "loss": 4.2071, |
| "lr": 0.0007956643356643357, |
| "step": 1962, |
| "tokens_trained": 0.964301728 |
| }, |
| { |
| "epoch": 0.5571236082547337, |
| "grad_norm": 22.22412109375, |
| "loss": 4.2158, |
| "lr": 0.0007953846153846153, |
| "step": 1964, |
| "tokens_trained": 0.96528524 |
| }, |
| { |
| "epoch": 0.5576909439046877, |
| "grad_norm": 15.590829849243164, |
| "loss": 4.1681, |
| "lr": 0.0007951048951048952, |
| "step": 1966, |
| "tokens_trained": 0.966268264 |
| }, |
| { |
| "epoch": 0.5582582795546415, |
| "grad_norm": 16.011110305786133, |
| "loss": 4.1591, |
| "lr": 0.0007948251748251748, |
| "step": 1968, |
| "tokens_trained": 0.967252016 |
| }, |
| { |
| "epoch": 0.5588256152045954, |
| "grad_norm": 15.24573040008545, |
| "loss": 4.1446, |
| "lr": 0.0007945454545454546, |
| "step": 1970, |
| "tokens_trained": 0.96823396 |
| }, |
| { |
| "epoch": 0.5593929508545493, |
| "grad_norm": 15.718021392822266, |
| "loss": 4.1846, |
| "lr": 0.0007942657342657342, |
| "step": 1972, |
| "tokens_trained": 0.969217792 |
| }, |
| { |
| "epoch": 0.5599602865045032, |
| "grad_norm": 8.648459434509277, |
| "loss": 4.1655, |
| "lr": 0.000793986013986014, |
| "step": 1974, |
| "tokens_trained": 0.970200776 |
| }, |
| { |
| "epoch": 0.5605276221544572, |
| "grad_norm": 7.273077487945557, |
| "loss": 4.1397, |
| "lr": 0.0007937062937062938, |
| "step": 1976, |
| "tokens_trained": 0.971181376 |
| }, |
| { |
| "epoch": 0.561094957804411, |
| "grad_norm": 25.027616500854492, |
| "loss": 4.1918, |
| "lr": 0.0007934265734265734, |
| "step": 1978, |
| "tokens_trained": 0.972165496 |
| }, |
| { |
| "epoch": 0.5616622934543649, |
| "grad_norm": 25.485851287841797, |
| "loss": 4.1896, |
| "lr": 0.0007931468531468532, |
| "step": 1980, |
| "tokens_trained": 0.973145616 |
| }, |
| { |
| "epoch": 0.5622296291043188, |
| "grad_norm": 18.065462112426758, |
| "loss": 4.1876, |
| "lr": 0.0007928671328671328, |
| "step": 1982, |
| "tokens_trained": 0.974131104 |
| }, |
| { |
| "epoch": 0.5627969647542728, |
| "grad_norm": 20.412248611450195, |
| "loss": 4.1556, |
| "lr": 0.0007925874125874127, |
| "step": 1984, |
| "tokens_trained": 0.975111232 |
| }, |
| { |
| "epoch": 0.5633643004042267, |
| "grad_norm": 15.51710319519043, |
| "loss": 4.1391, |
| "lr": 0.0007923076923076923, |
| "step": 1986, |
| "tokens_trained": 0.976098968 |
| }, |
| { |
| "epoch": 0.5639316360541805, |
| "grad_norm": 8.650726318359375, |
| "loss": 4.1421, |
| "lr": 0.000792027972027972, |
| "step": 1988, |
| "tokens_trained": 0.977082992 |
| }, |
| { |
| "epoch": 0.5644989717041344, |
| "grad_norm": 19.833505630493164, |
| "loss": 4.1505, |
| "lr": 0.0007917482517482517, |
| "step": 1990, |
| "tokens_trained": 0.978068896 |
| }, |
| { |
| "epoch": 0.5650663073540884, |
| "grad_norm": 26.585390090942383, |
| "loss": 4.1661, |
| "lr": 0.0007914685314685314, |
| "step": 1992, |
| "tokens_trained": 0.979048504 |
| }, |
| { |
| "epoch": 0.5656336430040423, |
| "grad_norm": 20.827394485473633, |
| "loss": 4.1987, |
| "lr": 0.0007911888111888113, |
| "step": 1994, |
| "tokens_trained": 0.98003104 |
| }, |
| { |
| "epoch": 0.5662009786539962, |
| "grad_norm": 23.700273513793945, |
| "loss": 4.1773, |
| "lr": 0.0007909090909090909, |
| "step": 1996, |
| "tokens_trained": 0.981013384 |
| }, |
| { |
| "epoch": 0.56676831430395, |
| "grad_norm": 15.673397064208984, |
| "loss": 4.12, |
| "lr": 0.0007906293706293707, |
| "step": 1998, |
| "tokens_trained": 0.981999776 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "grad_norm": 11.268630981445312, |
| "loss": 4.1373, |
| "lr": 0.0007903496503496503, |
| "step": 2000, |
| "tokens_trained": 0.982980936 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "eval_loss": 1.0422048568725586, |
| "eval_runtime": 20.3928, |
| "step": 2000, |
| "tokens_trained": 0.982980936 |
| }, |
| { |
| "epoch": 0.5679029856038579, |
| "grad_norm": 18.37994384765625, |
| "loss": 4.1536, |
| "lr": 0.0007900699300699302, |
| "step": 2002, |
| "tokens_trained": 0.983969536 |
| }, |
| { |
| "epoch": 0.5684703212538118, |
| "grad_norm": 23.911537170410156, |
| "loss": 4.1652, |
| "lr": 0.0007897902097902098, |
| "step": 2004, |
| "tokens_trained": 0.98495052 |
| }, |
| { |
| "epoch": 0.5690376569037657, |
| "grad_norm": 7.355772018432617, |
| "loss": 4.1846, |
| "lr": 0.0007895104895104895, |
| "step": 2006, |
| "tokens_trained": 0.98593252 |
| }, |
| { |
| "epoch": 0.5696049925537195, |
| "grad_norm": 35.29991149902344, |
| "loss": 4.2145, |
| "lr": 0.0007892307692307692, |
| "step": 2008, |
| "tokens_trained": 0.986922392 |
| }, |
| { |
| "epoch": 0.5701723282036735, |
| "grad_norm": 14.28709602355957, |
| "loss": 4.1629, |
| "lr": 0.0007889510489510489, |
| "step": 2010, |
| "tokens_trained": 0.987905712 |
| }, |
| { |
| "epoch": 0.5707396638536274, |
| "grad_norm": 22.50174331665039, |
| "loss": 4.1907, |
| "lr": 0.0007886713286713288, |
| "step": 2012, |
| "tokens_trained": 0.988887536 |
| }, |
| { |
| "epoch": 0.5713069995035813, |
| "grad_norm": 14.588640213012695, |
| "loss": 4.1523, |
| "lr": 0.0007883916083916084, |
| "step": 2014, |
| "tokens_trained": 0.989872712 |
| }, |
| { |
| "epoch": 0.5718743351535353, |
| "grad_norm": 2.776369094848633, |
| "loss": 4.1548, |
| "lr": 0.0007881118881118882, |
| "step": 2016, |
| "tokens_trained": 0.990854072 |
| }, |
| { |
| "epoch": 0.5724416708034891, |
| "grad_norm": 16.00047492980957, |
| "loss": 4.1319, |
| "lr": 0.0007878321678321678, |
| "step": 2018, |
| "tokens_trained": 0.991834552 |
| }, |
| { |
| "epoch": 0.573009006453443, |
| "grad_norm": 21.678735733032227, |
| "loss": 4.1986, |
| "lr": 0.0007875524475524476, |
| "step": 2020, |
| "tokens_trained": 0.992818256 |
| }, |
| { |
| "epoch": 0.5735763421033969, |
| "grad_norm": 4.835119724273682, |
| "loss": 4.1625, |
| "lr": 0.0007872727272727273, |
| "step": 2022, |
| "tokens_trained": 0.993801376 |
| }, |
| { |
| "epoch": 0.5741436777533508, |
| "grad_norm": 19.427467346191406, |
| "loss": 4.1594, |
| "lr": 0.000786993006993007, |
| "step": 2024, |
| "tokens_trained": 0.994788568 |
| }, |
| { |
| "epoch": 0.5747110134033048, |
| "grad_norm": 15.458346366882324, |
| "loss": 4.1829, |
| "lr": 0.0007867132867132867, |
| "step": 2026, |
| "tokens_trained": 0.995769976 |
| }, |
| { |
| "epoch": 0.5752783490532586, |
| "grad_norm": 11.073614120483398, |
| "loss": 4.1303, |
| "lr": 0.0007864335664335664, |
| "step": 2028, |
| "tokens_trained": 0.996751464 |
| }, |
| { |
| "epoch": 0.5758456847032125, |
| "grad_norm": 4.685436248779297, |
| "loss": 4.1368, |
| "lr": 0.0007861538461538463, |
| "step": 2030, |
| "tokens_trained": 0.997733952 |
| }, |
| { |
| "epoch": 0.5764130203531664, |
| "grad_norm": 15.977241516113281, |
| "loss": 4.1584, |
| "lr": 0.0007858741258741259, |
| "step": 2032, |
| "tokens_trained": 0.998716976 |
| }, |
| { |
| "epoch": 0.5769803560031204, |
| "grad_norm": 11.305732727050781, |
| "loss": 4.102, |
| "lr": 0.0007855944055944056, |
| "step": 2034, |
| "tokens_trained": 0.999703632 |
| }, |
| { |
| "epoch": 0.5775476916530743, |
| "grad_norm": 7.794003963470459, |
| "loss": 4.161, |
| "lr": 0.0007853146853146853, |
| "step": 2036, |
| "tokens_trained": 1.000687488 |
| }, |
| { |
| "epoch": 0.5781150273030281, |
| "grad_norm": 7.609982013702393, |
| "loss": 4.1546, |
| "lr": 0.0007850349650349651, |
| "step": 2038, |
| "tokens_trained": 1.0016692 |
| }, |
| { |
| "epoch": 0.578682362952982, |
| "grad_norm": 7.622653961181641, |
| "loss": 4.1246, |
| "lr": 0.0007847552447552448, |
| "step": 2040, |
| "tokens_trained": 1.002653352 |
| }, |
| { |
| "epoch": 0.579249698602936, |
| "grad_norm": 9.98919677734375, |
| "loss": 4.1319, |
| "lr": 0.0007844755244755245, |
| "step": 2042, |
| "tokens_trained": 1.003639528 |
| }, |
| { |
| "epoch": 0.5798170342528899, |
| "grad_norm": 9.557628631591797, |
| "loss": 4.1105, |
| "lr": 0.0007841958041958041, |
| "step": 2044, |
| "tokens_trained": 1.004623776 |
| }, |
| { |
| "epoch": 0.5803843699028438, |
| "grad_norm": 14.172621726989746, |
| "loss": 4.1339, |
| "lr": 0.0007839160839160839, |
| "step": 2046, |
| "tokens_trained": 1.005604008 |
| }, |
| { |
| "epoch": 0.5809517055527976, |
| "grad_norm": 8.185248374938965, |
| "loss": 4.1142, |
| "lr": 0.0007836363636363637, |
| "step": 2048, |
| "tokens_trained": 1.006585704 |
| }, |
| { |
| "epoch": 0.5815190412027516, |
| "grad_norm": 10.642661094665527, |
| "loss": 4.131, |
| "lr": 0.0007833566433566434, |
| "step": 2050, |
| "tokens_trained": 1.00757132 |
| }, |
| { |
| "epoch": 0.5820863768527055, |
| "grad_norm": 7.868969917297363, |
| "loss": 4.1477, |
| "lr": 0.0007830769230769231, |
| "step": 2052, |
| "tokens_trained": 1.008556824 |
| }, |
| { |
| "epoch": 0.5826537125026594, |
| "grad_norm": 2.8441150188446045, |
| "loss": 4.1156, |
| "lr": 0.0007827972027972028, |
| "step": 2054, |
| "tokens_trained": 1.00954056 |
| }, |
| { |
| "epoch": 0.5832210481526133, |
| "grad_norm": 5.2797932624816895, |
| "loss": 4.1058, |
| "lr": 0.0007825174825174826, |
| "step": 2056, |
| "tokens_trained": 1.010526488 |
| }, |
| { |
| "epoch": 0.5837883838025671, |
| "grad_norm": 11.850811004638672, |
| "loss": 4.165, |
| "lr": 0.0007822377622377622, |
| "step": 2058, |
| "tokens_trained": 1.011507584 |
| }, |
| { |
| "epoch": 0.5843557194525211, |
| "grad_norm": 11.073920249938965, |
| "loss": 4.1509, |
| "lr": 0.000781958041958042, |
| "step": 2060, |
| "tokens_trained": 1.012491648 |
| }, |
| { |
| "epoch": 0.584923055102475, |
| "grad_norm": 8.282343864440918, |
| "loss": 4.0656, |
| "lr": 0.0007816783216783216, |
| "step": 2062, |
| "tokens_trained": 1.013475224 |
| }, |
| { |
| "epoch": 0.5854903907524289, |
| "grad_norm": 10.414461135864258, |
| "loss": 4.1285, |
| "lr": 0.0007813986013986014, |
| "step": 2064, |
| "tokens_trained": 1.014458144 |
| }, |
| { |
| "epoch": 0.5860577264023829, |
| "grad_norm": 9.988463401794434, |
| "loss": 4.1234, |
| "lr": 0.0007811188811188812, |
| "step": 2066, |
| "tokens_trained": 1.015444112 |
| }, |
| { |
| "epoch": 0.5866250620523367, |
| "grad_norm": 8.713189125061035, |
| "loss": 4.129, |
| "lr": 0.0007808391608391609, |
| "step": 2068, |
| "tokens_trained": 1.016427568 |
| }, |
| { |
| "epoch": 0.5871923977022906, |
| "grad_norm": 3.4149773120880127, |
| "loss": 4.155, |
| "lr": 0.0007805594405594406, |
| "step": 2070, |
| "tokens_trained": 1.017412264 |
| }, |
| { |
| "epoch": 0.5877597333522445, |
| "grad_norm": 12.33522891998291, |
| "loss": 4.1856, |
| "lr": 0.0007802797202797202, |
| "step": 2072, |
| "tokens_trained": 1.018402216 |
| }, |
| { |
| "epoch": 0.5883270690021984, |
| "grad_norm": 12.155695915222168, |
| "loss": 4.1468, |
| "lr": 0.0007800000000000001, |
| "step": 2074, |
| "tokens_trained": 1.019387096 |
| }, |
| { |
| "epoch": 0.5888944046521524, |
| "grad_norm": 7.73326301574707, |
| "loss": 4.1239, |
| "lr": 0.0007797202797202797, |
| "step": 2076, |
| "tokens_trained": 1.020370008 |
| }, |
| { |
| "epoch": 0.5894617403021062, |
| "grad_norm": 6.425852298736572, |
| "loss": 4.1101, |
| "lr": 0.0007794405594405595, |
| "step": 2078, |
| "tokens_trained": 1.02135716 |
| }, |
| { |
| "epoch": 0.5900290759520601, |
| "grad_norm": 18.360816955566406, |
| "loss": 4.1726, |
| "lr": 0.0007791608391608391, |
| "step": 2080, |
| "tokens_trained": 1.022338024 |
| }, |
| { |
| "epoch": 0.590596411602014, |
| "grad_norm": 28.31681251525879, |
| "loss": 4.1341, |
| "lr": 0.0007788811188811189, |
| "step": 2082, |
| "tokens_trained": 1.023318008 |
| }, |
| { |
| "epoch": 0.591163747251968, |
| "grad_norm": 10.673089027404785, |
| "loss": 4.1268, |
| "lr": 0.0007786013986013987, |
| "step": 2084, |
| "tokens_trained": 1.02430432 |
| }, |
| { |
| "epoch": 0.5917310829019219, |
| "grad_norm": 26.656522750854492, |
| "loss": 4.1703, |
| "lr": 0.0007783216783216783, |
| "step": 2086, |
| "tokens_trained": 1.025288272 |
| }, |
| { |
| "epoch": 0.5922984185518757, |
| "grad_norm": 20.022029876708984, |
| "loss": 4.1532, |
| "lr": 0.0007780419580419581, |
| "step": 2088, |
| "tokens_trained": 1.026272984 |
| }, |
| { |
| "epoch": 0.5928657542018296, |
| "grad_norm": 7.2955121994018555, |
| "loss": 4.1992, |
| "lr": 0.0007777622377622377, |
| "step": 2090, |
| "tokens_trained": 1.02725572 |
| }, |
| { |
| "epoch": 0.5934330898517836, |
| "grad_norm": 28.561243057250977, |
| "loss": 4.2098, |
| "lr": 0.0007774825174825176, |
| "step": 2092, |
| "tokens_trained": 1.028238456 |
| }, |
| { |
| "epoch": 0.5940004255017375, |
| "grad_norm": 16.715425491333008, |
| "loss": 4.1509, |
| "lr": 0.0007772027972027972, |
| "step": 2094, |
| "tokens_trained": 1.029226048 |
| }, |
| { |
| "epoch": 0.5945677611516914, |
| "grad_norm": 6.325936317443848, |
| "loss": 4.1221, |
| "lr": 0.000776923076923077, |
| "step": 2096, |
| "tokens_trained": 1.030210528 |
| }, |
| { |
| "epoch": 0.5951350968016452, |
| "grad_norm": 12.83181381225586, |
| "loss": 4.1808, |
| "lr": 0.0007766433566433566, |
| "step": 2098, |
| "tokens_trained": 1.031193456 |
| }, |
| { |
| "epoch": 0.5957024324515992, |
| "grad_norm": 12.183184623718262, |
| "loss": 4.1292, |
| "lr": 0.0007763636363636363, |
| "step": 2100, |
| "tokens_trained": 1.032173528 |
| }, |
| { |
| "epoch": 0.5962697681015531, |
| "grad_norm": 8.247485160827637, |
| "loss": 4.1425, |
| "lr": 0.0007760839160839162, |
| "step": 2102, |
| "tokens_trained": 1.033158144 |
| }, |
| { |
| "epoch": 0.596837103751507, |
| "grad_norm": 10.814559936523438, |
| "loss": 4.1167, |
| "lr": 0.0007758041958041958, |
| "step": 2104, |
| "tokens_trained": 1.034141216 |
| }, |
| { |
| "epoch": 0.5974044394014609, |
| "grad_norm": 12.589309692382812, |
| "loss": 4.0916, |
| "lr": 0.0007755244755244756, |
| "step": 2106, |
| "tokens_trained": 1.035121888 |
| }, |
| { |
| "epoch": 0.5979717750514147, |
| "grad_norm": 11.65658187866211, |
| "loss": 4.0776, |
| "lr": 0.0007752447552447552, |
| "step": 2108, |
| "tokens_trained": 1.036103688 |
| }, |
| { |
| "epoch": 0.5985391107013687, |
| "grad_norm": 18.0120792388916, |
| "loss": 4.1588, |
| "lr": 0.0007749650349650351, |
| "step": 2110, |
| "tokens_trained": 1.03708248 |
| }, |
| { |
| "epoch": 0.5991064463513226, |
| "grad_norm": 5.742938995361328, |
| "loss": 4.151, |
| "lr": 0.0007746853146853147, |
| "step": 2112, |
| "tokens_trained": 1.038068792 |
| }, |
| { |
| "epoch": 0.5996737820012765, |
| "grad_norm": 36.54581832885742, |
| "loss": 4.2239, |
| "lr": 0.0007744055944055944, |
| "step": 2114, |
| "tokens_trained": 1.03904728 |
| }, |
| { |
| "epoch": 0.6002411176512304, |
| "grad_norm": 13.304069519042969, |
| "loss": 4.152, |
| "lr": 0.0007741258741258741, |
| "step": 2116, |
| "tokens_trained": 1.040031312 |
| }, |
| { |
| "epoch": 0.6008084533011843, |
| "grad_norm": 18.68927001953125, |
| "loss": 4.1413, |
| "lr": 0.0007738461538461538, |
| "step": 2118, |
| "tokens_trained": 1.041018376 |
| }, |
| { |
| "epoch": 0.6013757889511382, |
| "grad_norm": 16.946630477905273, |
| "loss": 4.1122, |
| "lr": 0.0007735664335664337, |
| "step": 2120, |
| "tokens_trained": 1.0420056 |
| }, |
| { |
| "epoch": 0.6019431246010921, |
| "grad_norm": 4.236926078796387, |
| "loss": 4.1146, |
| "lr": 0.0007732867132867133, |
| "step": 2122, |
| "tokens_trained": 1.042990376 |
| }, |
| { |
| "epoch": 0.602510460251046, |
| "grad_norm": 12.148641586303711, |
| "loss": 4.1472, |
| "lr": 0.0007730069930069931, |
| "step": 2124, |
| "tokens_trained": 1.0439754 |
| }, |
| { |
| "epoch": 0.602794128076023, |
| "eval_loss": 1.039306640625, |
| "eval_runtime": 20.6138, |
| "step": 2125, |
| "tokens_trained": 1.044467008 |
| }, |
| { |
| "epoch": 0.603077795901, |
| "grad_norm": 17.051687240600586, |
| "loss": 4.1572, |
| "lr": 0.0007727272727272727, |
| "step": 2126, |
| "tokens_trained": 1.044957456 |
| }, |
| { |
| "epoch": 0.6036451315509538, |
| "grad_norm": 14.019828796386719, |
| "loss": 4.1464, |
| "lr": 0.0007724475524475525, |
| "step": 2128, |
| "tokens_trained": 1.04593944 |
| }, |
| { |
| "epoch": 0.6042124672009077, |
| "grad_norm": 11.22962760925293, |
| "loss": 4.1345, |
| "lr": 0.0007721678321678322, |
| "step": 2130, |
| "tokens_trained": 1.046919592 |
| }, |
| { |
| "epoch": 0.6047798028508616, |
| "grad_norm": 11.524348258972168, |
| "loss": 4.1233, |
| "lr": 0.0007718881118881119, |
| "step": 2132, |
| "tokens_trained": 1.047904744 |
| }, |
| { |
| "epoch": 0.6053471385008156, |
| "grad_norm": 7.174457550048828, |
| "loss": 4.1201, |
| "lr": 0.0007716083916083916, |
| "step": 2134, |
| "tokens_trained": 1.048885328 |
| }, |
| { |
| "epoch": 0.6059144741507695, |
| "grad_norm": 6.847499847412109, |
| "loss": 4.1313, |
| "lr": 0.0007713286713286713, |
| "step": 2136, |
| "tokens_trained": 1.049868776 |
| }, |
| { |
| "epoch": 0.6064818098007233, |
| "grad_norm": 8.44458293914795, |
| "loss": 4.1236, |
| "lr": 0.0007710489510489512, |
| "step": 2138, |
| "tokens_trained": 1.050852704 |
| }, |
| { |
| "epoch": 0.6070491454506772, |
| "grad_norm": 15.415260314941406, |
| "loss": 4.1424, |
| "lr": 0.0007707692307692308, |
| "step": 2140, |
| "tokens_trained": 1.051837736 |
| }, |
| { |
| "epoch": 0.6076164811006312, |
| "grad_norm": 16.845874786376953, |
| "loss": 4.1037, |
| "lr": 0.0007704895104895105, |
| "step": 2142, |
| "tokens_trained": 1.05282172 |
| }, |
| { |
| "epoch": 0.6081838167505851, |
| "grad_norm": 1.3947086334228516, |
| "loss": 4.1389, |
| "lr": 0.0007702097902097902, |
| "step": 2144, |
| "tokens_trained": 1.053802928 |
| }, |
| { |
| "epoch": 0.608751152400539, |
| "grad_norm": 3.4119038581848145, |
| "loss": 4.16, |
| "lr": 0.0007699300699300699, |
| "step": 2146, |
| "tokens_trained": 1.054784368 |
| }, |
| { |
| "epoch": 0.6093184880504928, |
| "grad_norm": 9.26860523223877, |
| "loss": 4.1841, |
| "lr": 0.0007696503496503497, |
| "step": 2148, |
| "tokens_trained": 1.05576888 |
| }, |
| { |
| "epoch": 0.6098858237004467, |
| "grad_norm": 8.744836807250977, |
| "loss": 4.1043, |
| "lr": 0.0007693706293706294, |
| "step": 2150, |
| "tokens_trained": 1.056751336 |
| }, |
| { |
| "epoch": 0.6104531593504007, |
| "grad_norm": 8.805045127868652, |
| "loss": 4.1032, |
| "lr": 0.000769090909090909, |
| "step": 2152, |
| "tokens_trained": 1.057734 |
| }, |
| { |
| "epoch": 0.6110204950003546, |
| "grad_norm": 4.785625457763672, |
| "loss": 4.1817, |
| "lr": 0.0007688111888111888, |
| "step": 2154, |
| "tokens_trained": 1.058716328 |
| }, |
| { |
| "epoch": 0.6115878306503085, |
| "grad_norm": 2.2137513160705566, |
| "loss": 4.1514, |
| "lr": 0.0007685314685314686, |
| "step": 2156, |
| "tokens_trained": 1.059696248 |
| }, |
| { |
| "epoch": 0.6121551663002623, |
| "grad_norm": 7.164271354675293, |
| "loss": 4.1433, |
| "lr": 0.0007682517482517483, |
| "step": 2158, |
| "tokens_trained": 1.060676648 |
| }, |
| { |
| "epoch": 0.6127225019502163, |
| "grad_norm": 9.481597900390625, |
| "loss": 4.0971, |
| "lr": 0.000767972027972028, |
| "step": 2160, |
| "tokens_trained": 1.061656688 |
| }, |
| { |
| "epoch": 0.6132898376001702, |
| "grad_norm": 11.28831672668457, |
| "loss": 4.149, |
| "lr": 0.0007676923076923077, |
| "step": 2162, |
| "tokens_trained": 1.062640576 |
| }, |
| { |
| "epoch": 0.6138571732501241, |
| "grad_norm": 17.21572494506836, |
| "loss": 4.098, |
| "lr": 0.0007674125874125874, |
| "step": 2164, |
| "tokens_trained": 1.063617688 |
| }, |
| { |
| "epoch": 0.614424508900078, |
| "grad_norm": 14.486310005187988, |
| "loss": 4.123, |
| "lr": 0.0007671328671328672, |
| "step": 2166, |
| "tokens_trained": 1.06460584 |
| }, |
| { |
| "epoch": 0.6149918445500319, |
| "grad_norm": 10.582398414611816, |
| "loss": 4.1243, |
| "lr": 0.0007668531468531469, |
| "step": 2168, |
| "tokens_trained": 1.065589064 |
| }, |
| { |
| "epoch": 0.6155591801999858, |
| "grad_norm": 12.923002243041992, |
| "loss": 4.0928, |
| "lr": 0.0007665734265734265, |
| "step": 2170, |
| "tokens_trained": 1.06657224 |
| }, |
| { |
| "epoch": 0.6161265158499397, |
| "grad_norm": 12.445414543151855, |
| "loss": 4.1697, |
| "lr": 0.0007662937062937063, |
| "step": 2172, |
| "tokens_trained": 1.067556952 |
| }, |
| { |
| "epoch": 0.6166938514998936, |
| "grad_norm": 3.562396287918091, |
| "loss": 4.0763, |
| "lr": 0.000766013986013986, |
| "step": 2174, |
| "tokens_trained": 1.068538248 |
| }, |
| { |
| "epoch": 0.6172611871498476, |
| "grad_norm": 12.62887954711914, |
| "loss": 4.1203, |
| "lr": 0.0007657342657342658, |
| "step": 2176, |
| "tokens_trained": 1.06952032 |
| }, |
| { |
| "epoch": 0.6178285227998014, |
| "grad_norm": 9.387356758117676, |
| "loss": 4.1318, |
| "lr": 0.0007654545454545455, |
| "step": 2178, |
| "tokens_trained": 1.070503872 |
| }, |
| { |
| "epoch": 0.6183958584497553, |
| "grad_norm": 8.885710716247559, |
| "loss": 4.1609, |
| "lr": 0.0007651748251748251, |
| "step": 2180, |
| "tokens_trained": 1.071486328 |
| }, |
| { |
| "epoch": 0.6189631940997092, |
| "grad_norm": 7.174533843994141, |
| "loss": 4.0824, |
| "lr": 0.0007648951048951049, |
| "step": 2182, |
| "tokens_trained": 1.07246928 |
| }, |
| { |
| "epoch": 0.6195305297496632, |
| "grad_norm": 15.866931915283203, |
| "loss": 4.1461, |
| "lr": 0.0007646153846153846, |
| "step": 2184, |
| "tokens_trained": 1.07345252 |
| }, |
| { |
| "epoch": 0.6200978653996171, |
| "grad_norm": 4.892337799072266, |
| "loss": 4.1418, |
| "lr": 0.0007643356643356644, |
| "step": 2186, |
| "tokens_trained": 1.07443796 |
| }, |
| { |
| "epoch": 0.6206652010495709, |
| "grad_norm": 4.796551704406738, |
| "loss": 4.1394, |
| "lr": 0.000764055944055944, |
| "step": 2188, |
| "tokens_trained": 1.075421392 |
| }, |
| { |
| "epoch": 0.6212325366995248, |
| "grad_norm": 10.585665702819824, |
| "loss": 4.1046, |
| "lr": 0.0007637762237762238, |
| "step": 2190, |
| "tokens_trained": 1.076404848 |
| }, |
| { |
| "epoch": 0.6217998723494788, |
| "grad_norm": 8.71747875213623, |
| "loss": 4.1819, |
| "lr": 0.0007634965034965035, |
| "step": 2192, |
| "tokens_trained": 1.077386672 |
| }, |
| { |
| "epoch": 0.6223672079994327, |
| "grad_norm": 10.74347972869873, |
| "loss": 4.1231, |
| "lr": 0.0007632167832167833, |
| "step": 2194, |
| "tokens_trained": 1.078365112 |
| }, |
| { |
| "epoch": 0.6229345436493866, |
| "grad_norm": 12.079446792602539, |
| "loss": 4.1132, |
| "lr": 0.000762937062937063, |
| "step": 2196, |
| "tokens_trained": 1.07935376 |
| }, |
| { |
| "epoch": 0.6235018792993404, |
| "grad_norm": 7.8133649826049805, |
| "loss": 4.0915, |
| "lr": 0.0007626573426573426, |
| "step": 2198, |
| "tokens_trained": 1.080332872 |
| }, |
| { |
| "epoch": 0.6240692149492943, |
| "grad_norm": 4.51243782043457, |
| "loss": 4.1108, |
| "lr": 0.0007623776223776224, |
| "step": 2200, |
| "tokens_trained": 1.081316664 |
| }, |
| { |
| "epoch": 0.6246365505992483, |
| "grad_norm": 12.625933647155762, |
| "loss": 4.1552, |
| "lr": 0.0007620979020979021, |
| "step": 2202, |
| "tokens_trained": 1.08230448 |
| }, |
| { |
| "epoch": 0.6252038862492022, |
| "grad_norm": 9.984200477600098, |
| "loss": 4.1199, |
| "lr": 0.0007618181818181819, |
| "step": 2204, |
| "tokens_trained": 1.083288992 |
| }, |
| { |
| "epoch": 0.6257712218991561, |
| "grad_norm": 11.338666915893555, |
| "loss": 4.0821, |
| "lr": 0.0007615384615384615, |
| "step": 2206, |
| "tokens_trained": 1.084273864 |
| }, |
| { |
| "epoch": 0.6263385575491099, |
| "grad_norm": 6.808894634246826, |
| "loss": 4.1202, |
| "lr": 0.0007612587412587412, |
| "step": 2208, |
| "tokens_trained": 1.085254584 |
| }, |
| { |
| "epoch": 0.6269058931990639, |
| "grad_norm": 4.182394027709961, |
| "loss": 4.1072, |
| "lr": 0.000760979020979021, |
| "step": 2210, |
| "tokens_trained": 1.086237312 |
| }, |
| { |
| "epoch": 0.6274732288490178, |
| "grad_norm": 13.04654312133789, |
| "loss": 4.1611, |
| "lr": 0.0007606993006993007, |
| "step": 2212, |
| "tokens_trained": 1.087220136 |
| }, |
| { |
| "epoch": 0.6280405644989717, |
| "grad_norm": 8.223962783813477, |
| "loss": 4.1094, |
| "lr": 0.0007604195804195805, |
| "step": 2214, |
| "tokens_trained": 1.088203464 |
| }, |
| { |
| "epoch": 0.6286079001489256, |
| "grad_norm": 7.974697589874268, |
| "loss": 4.1061, |
| "lr": 0.0007601398601398601, |
| "step": 2216, |
| "tokens_trained": 1.089188056 |
| }, |
| { |
| "epoch": 0.6291752357988795, |
| "grad_norm": 9.93747329711914, |
| "loss": 4.1625, |
| "lr": 0.0007598601398601399, |
| "step": 2218, |
| "tokens_trained": 1.090168464 |
| }, |
| { |
| "epoch": 0.6297425714488334, |
| "grad_norm": 14.117332458496094, |
| "loss": 4.1386, |
| "lr": 0.0007595804195804196, |
| "step": 2220, |
| "tokens_trained": 1.09115228 |
| }, |
| { |
| "epoch": 0.6303099070987873, |
| "grad_norm": 8.045380592346191, |
| "loss": 4.0962, |
| "lr": 0.0007593006993006993, |
| "step": 2222, |
| "tokens_trained": 1.0921348 |
| }, |
| { |
| "epoch": 0.6308772427487412, |
| "grad_norm": 7.286352634429932, |
| "loss": 4.1456, |
| "lr": 0.000759020979020979, |
| "step": 2224, |
| "tokens_trained": 1.0931198 |
| }, |
| { |
| "epoch": 0.6314445783986952, |
| "grad_norm": 7.278292179107666, |
| "loss": 4.1155, |
| "lr": 0.0007587412587412587, |
| "step": 2226, |
| "tokens_trained": 1.094107536 |
| }, |
| { |
| "epoch": 0.632011914048649, |
| "grad_norm": 5.973489761352539, |
| "loss": 4.1403, |
| "lr": 0.0007584615384615385, |
| "step": 2228, |
| "tokens_trained": 1.095090384 |
| }, |
| { |
| "epoch": 0.6325792496986029, |
| "grad_norm": 11.78962230682373, |
| "loss": 4.1322, |
| "lr": 0.0007581818181818182, |
| "step": 2230, |
| "tokens_trained": 1.096072192 |
| }, |
| { |
| "epoch": 0.6331465853485568, |
| "grad_norm": 9.853010177612305, |
| "loss": 4.0905, |
| "lr": 0.000757902097902098, |
| "step": 2232, |
| "tokens_trained": 1.097057368 |
| }, |
| { |
| "epoch": 0.6337139209985108, |
| "grad_norm": 12.578025817871094, |
| "loss": 4.0871, |
| "lr": 0.0007576223776223776, |
| "step": 2234, |
| "tokens_trained": 1.0980418 |
| }, |
| { |
| "epoch": 0.6342812566484647, |
| "grad_norm": 8.467657089233398, |
| "loss": 4.0972, |
| "lr": 0.0007573426573426573, |
| "step": 2236, |
| "tokens_trained": 1.099023032 |
| }, |
| { |
| "epoch": 0.6348485922984185, |
| "grad_norm": 10.768691062927246, |
| "loss": 4.0683, |
| "lr": 0.0007570629370629371, |
| "step": 2238, |
| "tokens_trained": 1.1000078 |
| }, |
| { |
| "epoch": 0.6354159279483724, |
| "grad_norm": 8.509350776672363, |
| "loss": 4.1319, |
| "lr": 0.0007567832167832168, |
| "step": 2240, |
| "tokens_trained": 1.100990904 |
| }, |
| { |
| "epoch": 0.6359832635983264, |
| "grad_norm": 9.473450660705566, |
| "loss": 4.0971, |
| "lr": 0.0007565034965034965, |
| "step": 2242, |
| "tokens_trained": 1.101971112 |
| }, |
| { |
| "epoch": 0.6365505992482803, |
| "grad_norm": 5.248406887054443, |
| "loss": 4.1212, |
| "lr": 0.0007562237762237762, |
| "step": 2244, |
| "tokens_trained": 1.10295244 |
| }, |
| { |
| "epoch": 0.6371179348982342, |
| "grad_norm": 2.8849964141845703, |
| "loss": 4.0914, |
| "lr": 0.000755944055944056, |
| "step": 2246, |
| "tokens_trained": 1.103935728 |
| }, |
| { |
| "epoch": 0.637685270548188, |
| "grad_norm": 10.757996559143066, |
| "loss": 4.0711, |
| "lr": 0.0007556643356643357, |
| "step": 2248, |
| "tokens_trained": 1.104917112 |
| }, |
| { |
| "epoch": 0.638252606198142, |
| "grad_norm": 14.822528839111328, |
| "loss": 4.1311, |
| "lr": 0.0007553846153846154, |
| "step": 2250, |
| "tokens_trained": 1.105899872 |
| }, |
| { |
| "epoch": 0.638252606198142, |
| "eval_loss": 1.0298579931259155, |
| "eval_runtime": 20.7482, |
| "step": 2250, |
| "tokens_trained": 1.105899872 |
| }, |
| { |
| "epoch": 0.6388199418480959, |
| "grad_norm": 12.402534484863281, |
| "loss": 4.0729, |
| "lr": 0.0007551048951048951, |
| "step": 2252, |
| "tokens_trained": 1.106885776 |
| }, |
| { |
| "epoch": 0.6393872774980498, |
| "grad_norm": 8.585915565490723, |
| "loss": 4.1026, |
| "lr": 0.0007548251748251748, |
| "step": 2254, |
| "tokens_trained": 1.107867784 |
| }, |
| { |
| "epoch": 0.6399546131480037, |
| "grad_norm": 9.298388481140137, |
| "loss": 4.1033, |
| "lr": 0.0007545454545454546, |
| "step": 2256, |
| "tokens_trained": 1.108846136 |
| }, |
| { |
| "epoch": 0.6405219487979575, |
| "grad_norm": 10.894235610961914, |
| "loss": 4.1212, |
| "lr": 0.0007542657342657343, |
| "step": 2258, |
| "tokens_trained": 1.10982972 |
| }, |
| { |
| "epoch": 0.6410892844479115, |
| "grad_norm": 7.488401889801025, |
| "loss": 4.1268, |
| "lr": 0.000753986013986014, |
| "step": 2260, |
| "tokens_trained": 1.110815128 |
| }, |
| { |
| "epoch": 0.6416566200978654, |
| "grad_norm": 10.087981224060059, |
| "loss": 4.0819, |
| "lr": 0.0007537062937062937, |
| "step": 2262, |
| "tokens_trained": 1.111796896 |
| }, |
| { |
| "epoch": 0.6422239557478193, |
| "grad_norm": 8.851993560791016, |
| "loss": 4.0903, |
| "lr": 0.0007534265734265734, |
| "step": 2264, |
| "tokens_trained": 1.112779032 |
| }, |
| { |
| "epoch": 0.6427912913977732, |
| "grad_norm": 7.973280429840088, |
| "loss": 4.1251, |
| "lr": 0.0007531468531468532, |
| "step": 2266, |
| "tokens_trained": 1.11376248 |
| }, |
| { |
| "epoch": 0.6433586270477271, |
| "grad_norm": 10.600922584533691, |
| "loss": 4.1062, |
| "lr": 0.0007528671328671329, |
| "step": 2268, |
| "tokens_trained": 1.11474752 |
| }, |
| { |
| "epoch": 0.643925962697681, |
| "grad_norm": 6.029149532318115, |
| "loss": 4.1174, |
| "lr": 0.0007525874125874126, |
| "step": 2270, |
| "tokens_trained": 1.115730304 |
| }, |
| { |
| "epoch": 0.6444932983476349, |
| "grad_norm": 5.804802417755127, |
| "loss": 4.0634, |
| "lr": 0.0007523076923076923, |
| "step": 2272, |
| "tokens_trained": 1.116712712 |
| }, |
| { |
| "epoch": 0.6450606339975888, |
| "grad_norm": 12.601567268371582, |
| "loss": 4.111, |
| "lr": 0.0007520279720279721, |
| "step": 2274, |
| "tokens_trained": 1.117692824 |
| }, |
| { |
| "epoch": 0.6456279696475428, |
| "grad_norm": 6.2783203125, |
| "loss": 4.1375, |
| "lr": 0.0007517482517482518, |
| "step": 2276, |
| "tokens_trained": 1.118681616 |
| }, |
| { |
| "epoch": 0.6461953052974966, |
| "grad_norm": 3.368333339691162, |
| "loss": 4.096, |
| "lr": 0.0007514685314685314, |
| "step": 2278, |
| "tokens_trained": 1.119662896 |
| }, |
| { |
| "epoch": 0.6467626409474505, |
| "grad_norm": 28.135610580444336, |
| "loss": 4.1362, |
| "lr": 0.0007511888111888112, |
| "step": 2280, |
| "tokens_trained": 1.120644592 |
| }, |
| { |
| "epoch": 0.6473299765974044, |
| "grad_norm": 31.932798385620117, |
| "loss": 4.177, |
| "lr": 0.0007509090909090909, |
| "step": 2282, |
| "tokens_trained": 1.1216274 |
| }, |
| { |
| "epoch": 0.6478973122473584, |
| "grad_norm": 18.303653717041016, |
| "loss": 4.2105, |
| "lr": 0.0007506293706293707, |
| "step": 2284, |
| "tokens_trained": 1.122610568 |
| }, |
| { |
| "epoch": 0.6484646478973123, |
| "grad_norm": 24.33900260925293, |
| "loss": 4.1685, |
| "lr": 0.0007503496503496504, |
| "step": 2286, |
| "tokens_trained": 1.1235948 |
| }, |
| { |
| "epoch": 0.6490319835472661, |
| "grad_norm": 14.718119621276855, |
| "loss": 4.1309, |
| "lr": 0.00075006993006993, |
| "step": 2288, |
| "tokens_trained": 1.124576952 |
| }, |
| { |
| "epoch": 0.64959931919722, |
| "grad_norm": 10.44218921661377, |
| "loss": 4.1178, |
| "lr": 0.0007497902097902098, |
| "step": 2290, |
| "tokens_trained": 1.12555812 |
| }, |
| { |
| "epoch": 0.650166654847174, |
| "grad_norm": 12.619060516357422, |
| "loss": 4.088, |
| "lr": 0.0007495104895104895, |
| "step": 2292, |
| "tokens_trained": 1.126542504 |
| }, |
| { |
| "epoch": 0.6507339904971279, |
| "grad_norm": 12.677931785583496, |
| "loss": 4.1146, |
| "lr": 0.0007492307692307693, |
| "step": 2294, |
| "tokens_trained": 1.127527144 |
| }, |
| { |
| "epoch": 0.6513013261470818, |
| "grad_norm": 9.913066864013672, |
| "loss": 4.1376, |
| "lr": 0.0007489510489510489, |
| "step": 2296, |
| "tokens_trained": 1.128511472 |
| }, |
| { |
| "epoch": 0.6518686617970356, |
| "grad_norm": 10.902573585510254, |
| "loss": 4.1184, |
| "lr": 0.0007486713286713287, |
| "step": 2298, |
| "tokens_trained": 1.129493144 |
| }, |
| { |
| "epoch": 0.6524359974469895, |
| "grad_norm": 11.475235939025879, |
| "loss": 4.098, |
| "lr": 0.0007483916083916084, |
| "step": 2300, |
| "tokens_trained": 1.13047816 |
| }, |
| { |
| "epoch": 0.6530033330969435, |
| "grad_norm": 11.541910171508789, |
| "loss": 4.106, |
| "lr": 0.0007481118881118882, |
| "step": 2302, |
| "tokens_trained": 1.131461952 |
| }, |
| { |
| "epoch": 0.6535706687468974, |
| "grad_norm": 8.055131912231445, |
| "loss": 4.0913, |
| "lr": 0.0007478321678321679, |
| "step": 2304, |
| "tokens_trained": 1.132445928 |
| }, |
| { |
| "epoch": 0.6541380043968513, |
| "grad_norm": 11.786042213439941, |
| "loss": 4.14, |
| "lr": 0.0007475524475524475, |
| "step": 2306, |
| "tokens_trained": 1.133430104 |
| }, |
| { |
| "epoch": 0.6547053400468051, |
| "grad_norm": 7.311541557312012, |
| "loss": 4.0989, |
| "lr": 0.0007472727272727273, |
| "step": 2308, |
| "tokens_trained": 1.1344128 |
| }, |
| { |
| "epoch": 0.6552726756967591, |
| "grad_norm": 5.909560680389404, |
| "loss": 4.1226, |
| "lr": 0.000746993006993007, |
| "step": 2310, |
| "tokens_trained": 1.135395456 |
| }, |
| { |
| "epoch": 0.655840011346713, |
| "grad_norm": 15.199941635131836, |
| "loss": 4.1003, |
| "lr": 0.0007467132867132868, |
| "step": 2312, |
| "tokens_trained": 1.136377952 |
| }, |
| { |
| "epoch": 0.6564073469966669, |
| "grad_norm": 11.078165054321289, |
| "loss": 4.1273, |
| "lr": 0.0007464335664335664, |
| "step": 2314, |
| "tokens_trained": 1.137364488 |
| }, |
| { |
| "epoch": 0.6569746826466208, |
| "grad_norm": 14.202346801757812, |
| "loss": 4.074, |
| "lr": 0.0007461538461538462, |
| "step": 2316, |
| "tokens_trained": 1.138348624 |
| }, |
| { |
| "epoch": 0.6575420182965747, |
| "grad_norm": 12.573927879333496, |
| "loss": 4.0749, |
| "lr": 0.0007458741258741259, |
| "step": 2318, |
| "tokens_trained": 1.139332304 |
| }, |
| { |
| "epoch": 0.6581093539465286, |
| "grad_norm": 4.582006454467773, |
| "loss": 4.1204, |
| "lr": 0.0007455944055944056, |
| "step": 2320, |
| "tokens_trained": 1.140317248 |
| }, |
| { |
| "epoch": 0.6586766895964825, |
| "grad_norm": 12.172183990478516, |
| "loss": 4.1045, |
| "lr": 0.0007453146853146854, |
| "step": 2322, |
| "tokens_trained": 1.141300976 |
| }, |
| { |
| "epoch": 0.6592440252464364, |
| "grad_norm": 8.110429763793945, |
| "loss": 4.1081, |
| "lr": 0.000745034965034965, |
| "step": 2324, |
| "tokens_trained": 1.142283576 |
| }, |
| { |
| "epoch": 0.6598113608963904, |
| "grad_norm": 7.653029918670654, |
| "loss": 4.1272, |
| "lr": 0.0007447552447552448, |
| "step": 2326, |
| "tokens_trained": 1.143264144 |
| }, |
| { |
| "epoch": 0.6603786965463442, |
| "grad_norm": 8.91545295715332, |
| "loss": 4.0604, |
| "lr": 0.0007444755244755245, |
| "step": 2328, |
| "tokens_trained": 1.144248336 |
| }, |
| { |
| "epoch": 0.6609460321962981, |
| "grad_norm": 8.173501014709473, |
| "loss": 4.1033, |
| "lr": 0.0007441958041958043, |
| "step": 2330, |
| "tokens_trained": 1.145231936 |
| }, |
| { |
| "epoch": 0.661513367846252, |
| "grad_norm": 6.748053550720215, |
| "loss": 4.1, |
| "lr": 0.0007439160839160839, |
| "step": 2332, |
| "tokens_trained": 1.146214208 |
| }, |
| { |
| "epoch": 0.662080703496206, |
| "grad_norm": 8.997527122497559, |
| "loss": 4.0642, |
| "lr": 0.0007436363636363636, |
| "step": 2334, |
| "tokens_trained": 1.147203592 |
| }, |
| { |
| "epoch": 0.6626480391461599, |
| "grad_norm": 5.39633321762085, |
| "loss": 4.0531, |
| "lr": 0.0007433566433566433, |
| "step": 2336, |
| "tokens_trained": 1.148189176 |
| }, |
| { |
| "epoch": 0.6632153747961137, |
| "grad_norm": 11.717559814453125, |
| "loss": 4.1069, |
| "lr": 0.0007430769230769231, |
| "step": 2338, |
| "tokens_trained": 1.14917232 |
| }, |
| { |
| "epoch": 0.6637827104460676, |
| "grad_norm": 4.895142078399658, |
| "loss": 4.1119, |
| "lr": 0.0007427972027972029, |
| "step": 2340, |
| "tokens_trained": 1.150150104 |
| }, |
| { |
| "epoch": 0.6643500460960216, |
| "grad_norm": 7.677682399749756, |
| "loss": 4.0787, |
| "lr": 0.0007425174825174825, |
| "step": 2342, |
| "tokens_trained": 1.15113228 |
| }, |
| { |
| "epoch": 0.6649173817459755, |
| "grad_norm": 9.910654067993164, |
| "loss": 4.114, |
| "lr": 0.0007422377622377622, |
| "step": 2344, |
| "tokens_trained": 1.152119112 |
| }, |
| { |
| "epoch": 0.6654847173959294, |
| "grad_norm": 7.880978107452393, |
| "loss": 4.1188, |
| "lr": 0.000741958041958042, |
| "step": 2346, |
| "tokens_trained": 1.153100688 |
| }, |
| { |
| "epoch": 0.6660520530458832, |
| "grad_norm": 3.284940242767334, |
| "loss": 4.0736, |
| "lr": 0.0007416783216783217, |
| "step": 2348, |
| "tokens_trained": 1.1540818 |
| }, |
| { |
| "epoch": 0.6666193886958371, |
| "grad_norm": 13.524490356445312, |
| "loss": 4.0621, |
| "lr": 0.0007413986013986014, |
| "step": 2350, |
| "tokens_trained": 1.155065608 |
| }, |
| { |
| "epoch": 0.6671867243457911, |
| "grad_norm": 5.8569135665893555, |
| "loss": 4.0904, |
| "lr": 0.0007411188811188811, |
| "step": 2352, |
| "tokens_trained": 1.156048544 |
| }, |
| { |
| "epoch": 0.667754059995745, |
| "grad_norm": 7.1157450675964355, |
| "loss": 4.0774, |
| "lr": 0.0007408391608391608, |
| "step": 2354, |
| "tokens_trained": 1.157030432 |
| }, |
| { |
| "epoch": 0.6683213956456989, |
| "grad_norm": 7.612982273101807, |
| "loss": 4.0829, |
| "lr": 0.0007405594405594406, |
| "step": 2356, |
| "tokens_trained": 1.158012728 |
| }, |
| { |
| "epoch": 0.6688887312956527, |
| "grad_norm": 8.317691802978516, |
| "loss": 4.1176, |
| "lr": 0.0007402797202797204, |
| "step": 2358, |
| "tokens_trained": 1.158993632 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 5.272528648376465, |
| "loss": 4.0977, |
| "lr": 0.00074, |
| "step": 2360, |
| "tokens_trained": 1.159976328 |
| }, |
| { |
| "epoch": 0.6700234025955606, |
| "grad_norm": 11.313931465148926, |
| "loss": 4.0792, |
| "lr": 0.0007397202797202797, |
| "step": 2362, |
| "tokens_trained": 1.160962072 |
| }, |
| { |
| "epoch": 0.6705907382455145, |
| "grad_norm": 12.588369369506836, |
| "loss": 4.0491, |
| "lr": 0.0007394405594405595, |
| "step": 2364, |
| "tokens_trained": 1.161947664 |
| }, |
| { |
| "epoch": 0.6711580738954684, |
| "grad_norm": 23.921968460083008, |
| "loss": 4.1085, |
| "lr": 0.0007391608391608392, |
| "step": 2366, |
| "tokens_trained": 1.16292872 |
| }, |
| { |
| "epoch": 0.6717254095454223, |
| "grad_norm": 9.100578308105469, |
| "loss": 4.1305, |
| "lr": 0.0007388811188811189, |
| "step": 2368, |
| "tokens_trained": 1.163913888 |
| }, |
| { |
| "epoch": 0.6722927451953762, |
| "grad_norm": 35.22720718383789, |
| "loss": 4.1538, |
| "lr": 0.0007386013986013986, |
| "step": 2370, |
| "tokens_trained": 1.164894912 |
| }, |
| { |
| "epoch": 0.6728600808453301, |
| "grad_norm": 16.7394962310791, |
| "loss": 4.1449, |
| "lr": 0.0007383216783216782, |
| "step": 2372, |
| "tokens_trained": 1.165879832 |
| }, |
| { |
| "epoch": 0.673427416495284, |
| "grad_norm": 11.066312789916992, |
| "loss": 4.1172, |
| "lr": 0.0007380419580419581, |
| "step": 2374, |
| "tokens_trained": 1.166864736 |
| }, |
| { |
| "epoch": 0.6737110843202609, |
| "eval_loss": 1.0303717851638794, |
| "eval_runtime": 20.7454, |
| "step": 2375, |
| "tokens_trained": 1.167358632 |
| }, |
| { |
| "epoch": 0.673994752145238, |
| "grad_norm": 12.827569007873535, |
| "loss": 4.1377, |
| "lr": 0.0007377622377622378, |
| "step": 2376, |
| "tokens_trained": 1.16784964 |
| }, |
| { |
| "epoch": 0.6745620877951918, |
| "grad_norm": 13.321866035461426, |
| "loss": 4.0747, |
| "lr": 0.0007374825174825175, |
| "step": 2378, |
| "tokens_trained": 1.168834992 |
| }, |
| { |
| "epoch": 0.6751294234451457, |
| "grad_norm": 15.812009811401367, |
| "loss": 4.1107, |
| "lr": 0.0007372027972027972, |
| "step": 2380, |
| "tokens_trained": 1.169817608 |
| }, |
| { |
| "epoch": 0.6756967590950996, |
| "grad_norm": 16.37995719909668, |
| "loss": 4.1556, |
| "lr": 0.000736923076923077, |
| "step": 2382, |
| "tokens_trained": 1.170800952 |
| }, |
| { |
| "epoch": 0.6762640947450536, |
| "grad_norm": 3.3421339988708496, |
| "loss": 4.1199, |
| "lr": 0.0007366433566433567, |
| "step": 2384, |
| "tokens_trained": 1.1717818 |
| }, |
| { |
| "epoch": 0.6768314303950075, |
| "grad_norm": 9.120339393615723, |
| "loss": 4.0834, |
| "lr": 0.0007363636363636363, |
| "step": 2386, |
| "tokens_trained": 1.172767384 |
| }, |
| { |
| "epoch": 0.6773987660449613, |
| "grad_norm": 12.614449501037598, |
| "loss": 4.0852, |
| "lr": 0.0007360839160839161, |
| "step": 2388, |
| "tokens_trained": 1.173755008 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 4.983767986297607, |
| "loss": 4.0881, |
| "lr": 0.0007358041958041957, |
| "step": 2390, |
| "tokens_trained": 1.174738528 |
| }, |
| { |
| "epoch": 0.6785334373448692, |
| "grad_norm": 4.194960117340088, |
| "loss": 4.1279, |
| "lr": 0.0007355244755244756, |
| "step": 2392, |
| "tokens_trained": 1.175724848 |
| }, |
| { |
| "epoch": 0.6791007729948231, |
| "grad_norm": 5.257171154022217, |
| "loss": 4.1044, |
| "lr": 0.0007352447552447553, |
| "step": 2394, |
| "tokens_trained": 1.176708808 |
| }, |
| { |
| "epoch": 0.679668108644777, |
| "grad_norm": 10.38420295715332, |
| "loss": 4.124, |
| "lr": 0.000734965034965035, |
| "step": 2396, |
| "tokens_trained": 1.177695552 |
| }, |
| { |
| "epoch": 0.6802354442947308, |
| "grad_norm": 8.629493713378906, |
| "loss": 4.0992, |
| "lr": 0.0007346853146853147, |
| "step": 2398, |
| "tokens_trained": 1.17868064 |
| }, |
| { |
| "epoch": 0.6808027799446847, |
| "grad_norm": 9.099041938781738, |
| "loss": 4.1047, |
| "lr": 0.0007344055944055944, |
| "step": 2400, |
| "tokens_trained": 1.179664536 |
| }, |
| { |
| "epoch": 0.6813701155946387, |
| "grad_norm": 11.343080520629883, |
| "loss": 4.1027, |
| "lr": 0.0007341258741258742, |
| "step": 2402, |
| "tokens_trained": 1.180644264 |
| }, |
| { |
| "epoch": 0.6819374512445926, |
| "grad_norm": 5.834907054901123, |
| "loss": 4.098, |
| "lr": 0.0007338461538461538, |
| "step": 2404, |
| "tokens_trained": 1.181629672 |
| }, |
| { |
| "epoch": 0.6825047868945465, |
| "grad_norm": 4.648270606994629, |
| "loss": 4.0775, |
| "lr": 0.0007335664335664336, |
| "step": 2406, |
| "tokens_trained": 1.182614064 |
| }, |
| { |
| "epoch": 0.6830721225445003, |
| "grad_norm": 6.934843063354492, |
| "loss": 4.1206, |
| "lr": 0.0007332867132867132, |
| "step": 2408, |
| "tokens_trained": 1.183597056 |
| }, |
| { |
| "epoch": 0.6836394581944543, |
| "grad_norm": 9.745563507080078, |
| "loss": 4.0921, |
| "lr": 0.0007330069930069931, |
| "step": 2410, |
| "tokens_trained": 1.184579832 |
| }, |
| { |
| "epoch": 0.6842067938444082, |
| "grad_norm": 7.189306259155273, |
| "loss": 4.095, |
| "lr": 0.0007327272727272728, |
| "step": 2412, |
| "tokens_trained": 1.185567912 |
| }, |
| { |
| "epoch": 0.6847741294943621, |
| "grad_norm": 6.303226947784424, |
| "loss": 4.0462, |
| "lr": 0.0007324475524475524, |
| "step": 2414, |
| "tokens_trained": 1.186550184 |
| }, |
| { |
| "epoch": 0.685341465144316, |
| "grad_norm": 6.373469352722168, |
| "loss": 4.1126, |
| "lr": 0.0007321678321678322, |
| "step": 2416, |
| "tokens_trained": 1.1875374 |
| }, |
| { |
| "epoch": 0.6859088007942699, |
| "grad_norm": 7.8680853843688965, |
| "loss": 4.0954, |
| "lr": 0.0007318881118881119, |
| "step": 2418, |
| "tokens_trained": 1.188519808 |
| }, |
| { |
| "epoch": 0.6864761364442238, |
| "grad_norm": 6.305267810821533, |
| "loss": 4.0951, |
| "lr": 0.0007316083916083917, |
| "step": 2420, |
| "tokens_trained": 1.18950228 |
| }, |
| { |
| "epoch": 0.6870434720941777, |
| "grad_norm": 9.990362167358398, |
| "loss": 4.0902, |
| "lr": 0.0007313286713286713, |
| "step": 2422, |
| "tokens_trained": 1.190483872 |
| }, |
| { |
| "epoch": 0.6876108077441316, |
| "grad_norm": 7.421126365661621, |
| "loss": 4.082, |
| "lr": 0.0007310489510489511, |
| "step": 2424, |
| "tokens_trained": 1.191465424 |
| }, |
| { |
| "epoch": 0.6881781433940856, |
| "grad_norm": 7.08989953994751, |
| "loss": 4.057, |
| "lr": 0.0007307692307692307, |
| "step": 2426, |
| "tokens_trained": 1.192446 |
| }, |
| { |
| "epoch": 0.6887454790440394, |
| "grad_norm": 16.008317947387695, |
| "loss": 4.0857, |
| "lr": 0.0007304895104895105, |
| "step": 2428, |
| "tokens_trained": 1.193428632 |
| }, |
| { |
| "epoch": 0.6893128146939933, |
| "grad_norm": 14.471416473388672, |
| "loss": 4.127, |
| "lr": 0.0007302097902097902, |
| "step": 2430, |
| "tokens_trained": 1.194413624 |
| }, |
| { |
| "epoch": 0.6898801503439472, |
| "grad_norm": 8.250576972961426, |
| "loss": 4.1244, |
| "lr": 0.0007299300699300699, |
| "step": 2432, |
| "tokens_trained": 1.195396768 |
| }, |
| { |
| "epoch": 0.6904474859939012, |
| "grad_norm": 17.120845794677734, |
| "loss": 4.107, |
| "lr": 0.0007296503496503497, |
| "step": 2434, |
| "tokens_trained": 1.196377144 |
| }, |
| { |
| "epoch": 0.6910148216438551, |
| "grad_norm": 24.250490188598633, |
| "loss": 4.1443, |
| "lr": 0.0007293706293706294, |
| "step": 2436, |
| "tokens_trained": 1.197361496 |
| }, |
| { |
| "epoch": 0.6915821572938089, |
| "grad_norm": 9.916406631469727, |
| "loss": 4.1308, |
| "lr": 0.0007290909090909092, |
| "step": 2438, |
| "tokens_trained": 1.198343376 |
| }, |
| { |
| "epoch": 0.6921494929437628, |
| "grad_norm": 29.035507202148438, |
| "loss": 4.1809, |
| "lr": 0.0007288111888111888, |
| "step": 2440, |
| "tokens_trained": 1.19932396 |
| }, |
| { |
| "epoch": 0.6927168285937167, |
| "grad_norm": 26.963102340698242, |
| "loss": 4.1343, |
| "lr": 0.0007285314685314685, |
| "step": 2442, |
| "tokens_trained": 1.200310088 |
| }, |
| { |
| "epoch": 0.6932841642436707, |
| "grad_norm": 9.7550048828125, |
| "loss": 4.0746, |
| "lr": 0.0007282517482517482, |
| "step": 2444, |
| "tokens_trained": 1.201291576 |
| }, |
| { |
| "epoch": 0.6938514998936246, |
| "grad_norm": 18.56088638305664, |
| "loss": 4.1634, |
| "lr": 0.000727972027972028, |
| "step": 2446, |
| "tokens_trained": 1.202271312 |
| }, |
| { |
| "epoch": 0.6944188355435784, |
| "grad_norm": 20.842105865478516, |
| "loss": 4.128, |
| "lr": 0.0007276923076923077, |
| "step": 2448, |
| "tokens_trained": 1.203252912 |
| }, |
| { |
| "epoch": 0.6949861711935323, |
| "grad_norm": 21.38428497314453, |
| "loss": 4.1263, |
| "lr": 0.0007274125874125874, |
| "step": 2450, |
| "tokens_trained": 1.204231328 |
| }, |
| { |
| "epoch": 0.6955535068434863, |
| "grad_norm": 9.129469871520996, |
| "loss": 4.0964, |
| "lr": 0.0007271328671328672, |
| "step": 2452, |
| "tokens_trained": 1.205215552 |
| }, |
| { |
| "epoch": 0.6961208424934402, |
| "grad_norm": 25.37588882446289, |
| "loss": 4.1568, |
| "lr": 0.0007268531468531469, |
| "step": 2454, |
| "tokens_trained": 1.206202536 |
| }, |
| { |
| "epoch": 0.6966881781433941, |
| "grad_norm": 17.409656524658203, |
| "loss": 4.1214, |
| "lr": 0.0007265734265734266, |
| "step": 2456, |
| "tokens_trained": 1.207182664 |
| }, |
| { |
| "epoch": 0.6972555137933479, |
| "grad_norm": 12.378538131713867, |
| "loss": 4.1235, |
| "lr": 0.0007262937062937063, |
| "step": 2458, |
| "tokens_trained": 1.208164408 |
| }, |
| { |
| "epoch": 0.6978228494433019, |
| "grad_norm": 15.208183288574219, |
| "loss": 4.0724, |
| "lr": 0.000726013986013986, |
| "step": 2460, |
| "tokens_trained": 1.209151056 |
| }, |
| { |
| "epoch": 0.6983901850932558, |
| "grad_norm": 15.311476707458496, |
| "loss": 4.1146, |
| "lr": 0.0007257342657342657, |
| "step": 2462, |
| "tokens_trained": 1.210135672 |
| }, |
| { |
| "epoch": 0.6989575207432097, |
| "grad_norm": 8.551816940307617, |
| "loss": 4.0944, |
| "lr": 0.0007254545454545455, |
| "step": 2464, |
| "tokens_trained": 1.211118992 |
| }, |
| { |
| "epoch": 0.6995248563931636, |
| "grad_norm": 5.893448829650879, |
| "loss": 4.0777, |
| "lr": 0.0007251748251748252, |
| "step": 2466, |
| "tokens_trained": 1.212102 |
| }, |
| { |
| "epoch": 0.7000921920431175, |
| "grad_norm": 12.23680591583252, |
| "loss": 4.0998, |
| "lr": 0.0007248951048951049, |
| "step": 2468, |
| "tokens_trained": 1.213078936 |
| }, |
| { |
| "epoch": 0.7006595276930714, |
| "grad_norm": 6.285398006439209, |
| "loss": 4.0691, |
| "lr": 0.0007246153846153846, |
| "step": 2470, |
| "tokens_trained": 1.214058832 |
| }, |
| { |
| "epoch": 0.7012268633430253, |
| "grad_norm": 5.049949645996094, |
| "loss": 4.0849, |
| "lr": 0.0007243356643356644, |
| "step": 2472, |
| "tokens_trained": 1.215045384 |
| }, |
| { |
| "epoch": 0.7017941989929792, |
| "grad_norm": 8.333894729614258, |
| "loss": 4.1072, |
| "lr": 0.0007240559440559441, |
| "step": 2474, |
| "tokens_trained": 1.216029416 |
| }, |
| { |
| "epoch": 0.7023615346429332, |
| "grad_norm": 10.236394882202148, |
| "loss": 4.1144, |
| "lr": 0.0007237762237762238, |
| "step": 2476, |
| "tokens_trained": 1.217012872 |
| }, |
| { |
| "epoch": 0.702928870292887, |
| "grad_norm": 7.674532413482666, |
| "loss": 4.0948, |
| "lr": 0.0007234965034965035, |
| "step": 2478, |
| "tokens_trained": 1.2179988 |
| }, |
| { |
| "epoch": 0.7034962059428409, |
| "grad_norm": 8.445834159851074, |
| "loss": 4.0937, |
| "lr": 0.0007232167832167831, |
| "step": 2480, |
| "tokens_trained": 1.218980608 |
| }, |
| { |
| "epoch": 0.7040635415927948, |
| "grad_norm": 6.923468112945557, |
| "loss": 4.0756, |
| "lr": 0.000722937062937063, |
| "step": 2482, |
| "tokens_trained": 1.219966912 |
| }, |
| { |
| "epoch": 0.7046308772427488, |
| "grad_norm": 5.95997428894043, |
| "loss": 4.0618, |
| "lr": 0.0007226573426573426, |
| "step": 2484, |
| "tokens_trained": 1.220952696 |
| }, |
| { |
| "epoch": 0.7051982128927027, |
| "grad_norm": 3.7207870483398438, |
| "loss": 4.0869, |
| "lr": 0.0007223776223776224, |
| "step": 2486, |
| "tokens_trained": 1.22193476 |
| }, |
| { |
| "epoch": 0.7057655485426565, |
| "grad_norm": 8.434130668640137, |
| "loss": 4.0965, |
| "lr": 0.0007220979020979021, |
| "step": 2488, |
| "tokens_trained": 1.222914616 |
| }, |
| { |
| "epoch": 0.7063328841926104, |
| "grad_norm": 10.180377006530762, |
| "loss": 4.0871, |
| "lr": 0.0007218181818181819, |
| "step": 2490, |
| "tokens_trained": 1.22389764 |
| }, |
| { |
| "epoch": 0.7069002198425643, |
| "grad_norm": 8.211799621582031, |
| "loss": 4.0811, |
| "lr": 0.0007215384615384616, |
| "step": 2492, |
| "tokens_trained": 1.224875448 |
| }, |
| { |
| "epoch": 0.7074675554925183, |
| "grad_norm": 5.268981456756592, |
| "loss": 4.0926, |
| "lr": 0.0007212587412587412, |
| "step": 2494, |
| "tokens_trained": 1.225858112 |
| }, |
| { |
| "epoch": 0.7080348911424722, |
| "grad_norm": 7.387131690979004, |
| "loss": 4.1097, |
| "lr": 0.000720979020979021, |
| "step": 2496, |
| "tokens_trained": 1.226838472 |
| }, |
| { |
| "epoch": 0.708602226792426, |
| "grad_norm": 7.289080619812012, |
| "loss": 4.0566, |
| "lr": 0.0007206993006993006, |
| "step": 2498, |
| "tokens_trained": 1.227821848 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "grad_norm": 6.981493949890137, |
| "loss": 4.062, |
| "lr": 0.0007204195804195805, |
| "step": 2500, |
| "tokens_trained": 1.228806208 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "eval_loss": 1.0222537517547607, |
| "eval_runtime": 20.7945, |
| "step": 2500, |
| "tokens_trained": 1.228806208 |
| }, |
| { |
| "epoch": 0.7097368980923339, |
| "grad_norm": 6.244803428649902, |
| "loss": 4.1417, |
| "lr": 0.0007201398601398601, |
| "step": 2502, |
| "tokens_trained": 1.229787872 |
| }, |
| { |
| "epoch": 0.7103042337422878, |
| "grad_norm": 4.354197978973389, |
| "loss": 4.0663, |
| "lr": 0.0007198601398601399, |
| "step": 2504, |
| "tokens_trained": 1.23077076 |
| }, |
| { |
| "epoch": 0.7108715693922417, |
| "grad_norm": 4.971379280090332, |
| "loss": 4.0495, |
| "lr": 0.0007195804195804196, |
| "step": 2506, |
| "tokens_trained": 1.231752344 |
| }, |
| { |
| "epoch": 0.7114389050421955, |
| "grad_norm": 5.990703582763672, |
| "loss": 4.0837, |
| "lr": 0.0007193006993006994, |
| "step": 2508, |
| "tokens_trained": 1.232733864 |
| }, |
| { |
| "epoch": 0.7120062406921495, |
| "grad_norm": 8.498222351074219, |
| "loss": 4.0379, |
| "lr": 0.0007190209790209791, |
| "step": 2510, |
| "tokens_trained": 1.233716744 |
| }, |
| { |
| "epoch": 0.7125735763421034, |
| "grad_norm": 13.36562442779541, |
| "loss": 4.0187, |
| "lr": 0.0007187412587412587, |
| "step": 2512, |
| "tokens_trained": 1.234699872 |
| }, |
| { |
| "epoch": 0.7131409119920573, |
| "grad_norm": 8.733027458190918, |
| "loss": 4.092, |
| "lr": 0.0007184615384615385, |
| "step": 2514, |
| "tokens_trained": 1.235684584 |
| }, |
| { |
| "epoch": 0.7137082476420112, |
| "grad_norm": 4.150378227233887, |
| "loss": 4.1277, |
| "lr": 0.0007181818181818181, |
| "step": 2516, |
| "tokens_trained": 1.236669584 |
| }, |
| { |
| "epoch": 0.714275583291965, |
| "grad_norm": 5.051011085510254, |
| "loss": 4.0942, |
| "lr": 0.000717902097902098, |
| "step": 2518, |
| "tokens_trained": 1.237654456 |
| }, |
| { |
| "epoch": 0.714842918941919, |
| "grad_norm": 19.51820945739746, |
| "loss": 4.0784, |
| "lr": 0.0007176223776223776, |
| "step": 2520, |
| "tokens_trained": 1.238634888 |
| }, |
| { |
| "epoch": 0.7154102545918729, |
| "grad_norm": 12.287970542907715, |
| "loss": 4.1096, |
| "lr": 0.0007173426573426573, |
| "step": 2522, |
| "tokens_trained": 1.239617096 |
| }, |
| { |
| "epoch": 0.7159775902418268, |
| "grad_norm": 7.280889511108398, |
| "loss": 4.1173, |
| "lr": 0.0007170629370629371, |
| "step": 2524, |
| "tokens_trained": 1.240599456 |
| }, |
| { |
| "epoch": 0.7165449258917808, |
| "grad_norm": 7.321331024169922, |
| "loss": 4.1011, |
| "lr": 0.0007167832167832168, |
| "step": 2526, |
| "tokens_trained": 1.2415852 |
| }, |
| { |
| "epoch": 0.7171122615417346, |
| "grad_norm": 12.695849418640137, |
| "loss": 4.0652, |
| "lr": 0.0007165034965034966, |
| "step": 2528, |
| "tokens_trained": 1.242566296 |
| }, |
| { |
| "epoch": 0.7176795971916885, |
| "grad_norm": 10.30766487121582, |
| "loss": 4.0683, |
| "lr": 0.0007162237762237762, |
| "step": 2530, |
| "tokens_trained": 1.24354928 |
| }, |
| { |
| "epoch": 0.7182469328416424, |
| "grad_norm": 6.451354503631592, |
| "loss": 4.0712, |
| "lr": 0.000715944055944056, |
| "step": 2532, |
| "tokens_trained": 1.244534464 |
| }, |
| { |
| "epoch": 0.7188142684915964, |
| "grad_norm": 13.049304962158203, |
| "loss": 4.0662, |
| "lr": 0.0007156643356643356, |
| "step": 2534, |
| "tokens_trained": 1.245514976 |
| }, |
| { |
| "epoch": 0.7193816041415503, |
| "grad_norm": 6.242895603179932, |
| "loss": 4.089, |
| "lr": 0.0007153846153846155, |
| "step": 2536, |
| "tokens_trained": 1.246499648 |
| }, |
| { |
| "epoch": 0.7199489397915041, |
| "grad_norm": 9.09418773651123, |
| "loss": 4.0727, |
| "lr": 0.0007151048951048951, |
| "step": 2538, |
| "tokens_trained": 1.247482424 |
| }, |
| { |
| "epoch": 0.720516275441458, |
| "grad_norm": 5.704024791717529, |
| "loss": 4.0973, |
| "lr": 0.0007148251748251748, |
| "step": 2540, |
| "tokens_trained": 1.248465776 |
| }, |
| { |
| "epoch": 0.721083611091412, |
| "grad_norm": 1.818793535232544, |
| "loss": 4.0928, |
| "lr": 0.0007145454545454546, |
| "step": 2542, |
| "tokens_trained": 1.249446792 |
| }, |
| { |
| "epoch": 0.7216509467413659, |
| "grad_norm": 8.157804489135742, |
| "loss": 4.1082, |
| "lr": 0.0007142657342657343, |
| "step": 2544, |
| "tokens_trained": 1.25042832 |
| }, |
| { |
| "epoch": 0.7222182823913198, |
| "grad_norm": 12.176240921020508, |
| "loss": 4.0472, |
| "lr": 0.0007139860139860141, |
| "step": 2546, |
| "tokens_trained": 1.251411112 |
| }, |
| { |
| "epoch": 0.7227856180412736, |
| "grad_norm": 9.750322341918945, |
| "loss": 4.0892, |
| "lr": 0.0007137062937062937, |
| "step": 2548, |
| "tokens_trained": 1.25239148 |
| }, |
| { |
| "epoch": 0.7233529536912275, |
| "grad_norm": 7.636045455932617, |
| "loss": 4.0939, |
| "lr": 0.0007134265734265734, |
| "step": 2550, |
| "tokens_trained": 1.253374936 |
| }, |
| { |
| "epoch": 0.7239202893411815, |
| "grad_norm": 9.795125007629395, |
| "loss": 4.0542, |
| "lr": 0.0007131468531468531, |
| "step": 2552, |
| "tokens_trained": 1.254359048 |
| }, |
| { |
| "epoch": 0.7244876249911354, |
| "grad_norm": 7.851208686828613, |
| "loss": 4.0546, |
| "lr": 0.0007128671328671329, |
| "step": 2554, |
| "tokens_trained": 1.255343552 |
| }, |
| { |
| "epoch": 0.7250549606410893, |
| "grad_norm": 7.749396800994873, |
| "loss": 4.0834, |
| "lr": 0.0007125874125874126, |
| "step": 2556, |
| "tokens_trained": 1.256332976 |
| }, |
| { |
| "epoch": 0.7256222962910431, |
| "grad_norm": 7.826572418212891, |
| "loss": 4.0914, |
| "lr": 0.0007123076923076923, |
| "step": 2558, |
| "tokens_trained": 1.257315376 |
| }, |
| { |
| "epoch": 0.7261896319409971, |
| "grad_norm": 7.173867225646973, |
| "loss": 4.0721, |
| "lr": 0.0007120279720279721, |
| "step": 2560, |
| "tokens_trained": 1.258296944 |
| }, |
| { |
| "epoch": 0.726756967590951, |
| "grad_norm": 7.722167015075684, |
| "loss": 4.092, |
| "lr": 0.0007117482517482518, |
| "step": 2562, |
| "tokens_trained": 1.259278984 |
| }, |
| { |
| "epoch": 0.7273243032409049, |
| "grad_norm": 5.8100690841674805, |
| "loss": 4.0592, |
| "lr": 0.0007114685314685315, |
| "step": 2564, |
| "tokens_trained": 1.260261648 |
| }, |
| { |
| "epoch": 0.7278916388908588, |
| "grad_norm": 6.633793830871582, |
| "loss": 4.0871, |
| "lr": 0.0007111888111888112, |
| "step": 2566, |
| "tokens_trained": 1.261235168 |
| }, |
| { |
| "epoch": 0.7284589745408127, |
| "grad_norm": 9.645057678222656, |
| "loss": 4.0707, |
| "lr": 0.0007109090909090909, |
| "step": 2568, |
| "tokens_trained": 1.26221864 |
| }, |
| { |
| "epoch": 0.7290263101907666, |
| "grad_norm": 8.770727157592773, |
| "loss": 4.0757, |
| "lr": 0.0007106293706293706, |
| "step": 2570, |
| "tokens_trained": 1.263199256 |
| }, |
| { |
| "epoch": 0.7295936458407205, |
| "grad_norm": 6.190083980560303, |
| "loss": 4.0911, |
| "lr": 0.0007103496503496504, |
| "step": 2572, |
| "tokens_trained": 1.264180424 |
| }, |
| { |
| "epoch": 0.7301609814906744, |
| "grad_norm": 11.070337295532227, |
| "loss": 4.0566, |
| "lr": 0.0007100699300699301, |
| "step": 2574, |
| "tokens_trained": 1.265164384 |
| }, |
| { |
| "epoch": 0.7307283171406284, |
| "grad_norm": 8.301725387573242, |
| "loss": 4.0636, |
| "lr": 0.0007097902097902098, |
| "step": 2576, |
| "tokens_trained": 1.266148592 |
| }, |
| { |
| "epoch": 0.7312956527905822, |
| "grad_norm": 5.524992942810059, |
| "loss": 4.0974, |
| "lr": 0.0007095104895104895, |
| "step": 2578, |
| "tokens_trained": 1.26712948 |
| }, |
| { |
| "epoch": 0.7318629884405361, |
| "grad_norm": 11.42268180847168, |
| "loss": 4.0858, |
| "lr": 0.0007092307692307692, |
| "step": 2580, |
| "tokens_trained": 1.268107968 |
| }, |
| { |
| "epoch": 0.73243032409049, |
| "grad_norm": 6.110471725463867, |
| "loss": 4.0563, |
| "lr": 0.000708951048951049, |
| "step": 2582, |
| "tokens_trained": 1.26909272 |
| }, |
| { |
| "epoch": 0.732997659740444, |
| "grad_norm": 4.583469867706299, |
| "loss": 4.0907, |
| "lr": 0.0007086713286713287, |
| "step": 2584, |
| "tokens_trained": 1.270074432 |
| }, |
| { |
| "epoch": 0.7335649953903979, |
| "grad_norm": 4.348790645599365, |
| "loss": 4.0768, |
| "lr": 0.0007083916083916084, |
| "step": 2586, |
| "tokens_trained": 1.271059184 |
| }, |
| { |
| "epoch": 0.7341323310403517, |
| "grad_norm": 9.383113861083984, |
| "loss": 4.0829, |
| "lr": 0.000708111888111888, |
| "step": 2588, |
| "tokens_trained": 1.272044288 |
| }, |
| { |
| "epoch": 0.7346996666903056, |
| "grad_norm": 8.594022750854492, |
| "loss": 4.097, |
| "lr": 0.0007078321678321679, |
| "step": 2590, |
| "tokens_trained": 1.273026808 |
| }, |
| { |
| "epoch": 0.7352670023402595, |
| "grad_norm": 8.971443176269531, |
| "loss": 4.0689, |
| "lr": 0.0007075524475524475, |
| "step": 2592, |
| "tokens_trained": 1.274011272 |
| }, |
| { |
| "epoch": 0.7358343379902135, |
| "grad_norm": 14.21872615814209, |
| "loss": 4.0892, |
| "lr": 0.0007072727272727273, |
| "step": 2594, |
| "tokens_trained": 1.274995728 |
| }, |
| { |
| "epoch": 0.7364016736401674, |
| "grad_norm": 5.579262733459473, |
| "loss": 4.1151, |
| "lr": 0.000706993006993007, |
| "step": 2596, |
| "tokens_trained": 1.27598244 |
| }, |
| { |
| "epoch": 0.7369690092901212, |
| "grad_norm": 7.760303974151611, |
| "loss": 4.0923, |
| "lr": 0.0007067132867132867, |
| "step": 2598, |
| "tokens_trained": 1.276966176 |
| }, |
| { |
| "epoch": 0.7375363449400751, |
| "grad_norm": 8.493928909301758, |
| "loss": 4.1002, |
| "lr": 0.0007064335664335665, |
| "step": 2600, |
| "tokens_trained": 1.277946064 |
| }, |
| { |
| "epoch": 0.7381036805900291, |
| "grad_norm": 7.7460126876831055, |
| "loss": 4.0464, |
| "lr": 0.0007061538461538462, |
| "step": 2602, |
| "tokens_trained": 1.278928016 |
| }, |
| { |
| "epoch": 0.738671016239983, |
| "grad_norm": 14.752384185791016, |
| "loss": 4.0694, |
| "lr": 0.0007058741258741259, |
| "step": 2604, |
| "tokens_trained": 1.27991464 |
| }, |
| { |
| "epoch": 0.7392383518899369, |
| "grad_norm": 4.13566255569458, |
| "loss": 4.0852, |
| "lr": 0.0007055944055944055, |
| "step": 2606, |
| "tokens_trained": 1.280898424 |
| }, |
| { |
| "epoch": 0.7398056875398907, |
| "grad_norm": 9.910110473632812, |
| "loss": 4.0819, |
| "lr": 0.0007053146853146854, |
| "step": 2608, |
| "tokens_trained": 1.281880448 |
| }, |
| { |
| "epoch": 0.7403730231898447, |
| "grad_norm": 8.776302337646484, |
| "loss": 4.0908, |
| "lr": 0.000705034965034965, |
| "step": 2610, |
| "tokens_trained": 1.282866224 |
| }, |
| { |
| "epoch": 0.7409403588397986, |
| "grad_norm": 7.437447547912598, |
| "loss": 4.0914, |
| "lr": 0.0007047552447552448, |
| "step": 2612, |
| "tokens_trained": 1.283846848 |
| }, |
| { |
| "epoch": 0.7415076944897525, |
| "grad_norm": 5.371145248413086, |
| "loss": 4.0601, |
| "lr": 0.0007044755244755245, |
| "step": 2614, |
| "tokens_trained": 1.284828288 |
| }, |
| { |
| "epoch": 0.7420750301397064, |
| "grad_norm": 5.754990100860596, |
| "loss": 4.034, |
| "lr": 0.0007041958041958041, |
| "step": 2616, |
| "tokens_trained": 1.285813632 |
| }, |
| { |
| "epoch": 0.7426423657896603, |
| "grad_norm": 12.21330738067627, |
| "loss": 4.0893, |
| "lr": 0.000703916083916084, |
| "step": 2618, |
| "tokens_trained": 1.286796048 |
| }, |
| { |
| "epoch": 0.7432097014396142, |
| "grad_norm": 6.313106060028076, |
| "loss": 4.1348, |
| "lr": 0.0007036363636363636, |
| "step": 2620, |
| "tokens_trained": 1.287779984 |
| }, |
| { |
| "epoch": 0.7437770370895681, |
| "grad_norm": 3.671832323074341, |
| "loss": 4.0892, |
| "lr": 0.0007033566433566434, |
| "step": 2622, |
| "tokens_trained": 1.288763704 |
| }, |
| { |
| "epoch": 0.744344372739522, |
| "grad_norm": 7.610039710998535, |
| "loss": 4.0544, |
| "lr": 0.000703076923076923, |
| "step": 2624, |
| "tokens_trained": 1.289748608 |
| }, |
| { |
| "epoch": 0.7446280405644989, |
| "eval_loss": 1.0216281414031982, |
| "eval_runtime": 21.3239, |
| "step": 2625, |
| "tokens_trained": 1.290237248 |
| }, |
| { |
| "epoch": 0.744911708389476, |
| "grad_norm": 10.805936813354492, |
| "loss": 4.0702, |
| "lr": 0.0007027972027972029, |
| "step": 2626, |
| "tokens_trained": 1.290726104 |
| }, |
| { |
| "epoch": 0.7454790440394298, |
| "grad_norm": 8.497400283813477, |
| "loss": 4.056, |
| "lr": 0.0007025174825174825, |
| "step": 2628, |
| "tokens_trained": 1.291710888 |
| }, |
| { |
| "epoch": 0.7460463796893837, |
| "grad_norm": 7.71652364730835, |
| "loss": 4.0428, |
| "lr": 0.0007022377622377623, |
| "step": 2630, |
| "tokens_trained": 1.2926998 |
| }, |
| { |
| "epoch": 0.7466137153393376, |
| "grad_norm": 11.314064979553223, |
| "loss": 4.0442, |
| "lr": 0.000701958041958042, |
| "step": 2632, |
| "tokens_trained": 1.293681648 |
| }, |
| { |
| "epoch": 0.7471810509892916, |
| "grad_norm": 8.498956680297852, |
| "loss": 4.0806, |
| "lr": 0.0007016783216783216, |
| "step": 2634, |
| "tokens_trained": 1.29466332 |
| }, |
| { |
| "epoch": 0.7477483866392455, |
| "grad_norm": 8.315062522888184, |
| "loss": 4.0496, |
| "lr": 0.0007013986013986015, |
| "step": 2636, |
| "tokens_trained": 1.29565108 |
| }, |
| { |
| "epoch": 0.7483157222891993, |
| "grad_norm": 7.541136264801025, |
| "loss": 4.0901, |
| "lr": 0.0007011188811188811, |
| "step": 2638, |
| "tokens_trained": 1.296633192 |
| }, |
| { |
| "epoch": 0.7488830579391532, |
| "grad_norm": 5.977221965789795, |
| "loss": 4.0612, |
| "lr": 0.0007008391608391609, |
| "step": 2640, |
| "tokens_trained": 1.297621272 |
| }, |
| { |
| "epoch": 0.7494503935891071, |
| "grad_norm": 5.02126932144165, |
| "loss": 4.0944, |
| "lr": 0.0007005594405594405, |
| "step": 2642, |
| "tokens_trained": 1.298601744 |
| }, |
| { |
| "epoch": 0.7500177292390611, |
| "grad_norm": 6.345284938812256, |
| "loss": 4.0578, |
| "lr": 0.0007002797202797204, |
| "step": 2644, |
| "tokens_trained": 1.299583072 |
| }, |
| { |
| "epoch": 0.750585064889015, |
| "grad_norm": 7.036267280578613, |
| "loss": 4.0472, |
| "lr": 0.0007, |
| "step": 2646, |
| "tokens_trained": 1.300567448 |
| }, |
| { |
| "epoch": 0.7511524005389689, |
| "grad_norm": 2.7125253677368164, |
| "loss": 4.0534, |
| "lr": 0.0006997202797202797, |
| "step": 2648, |
| "tokens_trained": 1.301554096 |
| }, |
| { |
| "epoch": 0.7517197361889227, |
| "grad_norm": 3.862492322921753, |
| "loss": 4.0696, |
| "lr": 0.0006994405594405595, |
| "step": 2650, |
| "tokens_trained": 1.302540112 |
| }, |
| { |
| "epoch": 0.7522870718388767, |
| "grad_norm": 2.0384063720703125, |
| "loss": 4.0662, |
| "lr": 0.0006991608391608391, |
| "step": 2652, |
| "tokens_trained": 1.30352596 |
| }, |
| { |
| "epoch": 0.7528544074888306, |
| "grad_norm": 5.195199966430664, |
| "loss": 4.0819, |
| "lr": 0.000698881118881119, |
| "step": 2654, |
| "tokens_trained": 1.30450616 |
| }, |
| { |
| "epoch": 0.7534217431387845, |
| "grad_norm": 14.55208969116211, |
| "loss": 4.0757, |
| "lr": 0.0006986013986013986, |
| "step": 2656, |
| "tokens_trained": 1.305488752 |
| }, |
| { |
| "epoch": 0.7539890787887384, |
| "grad_norm": 10.982531547546387, |
| "loss": 4.0474, |
| "lr": 0.0006983216783216784, |
| "step": 2658, |
| "tokens_trained": 1.306474856 |
| }, |
| { |
| "epoch": 0.7545564144386923, |
| "grad_norm": 7.926928997039795, |
| "loss": 4.0497, |
| "lr": 0.000698041958041958, |
| "step": 2660, |
| "tokens_trained": 1.307456136 |
| }, |
| { |
| "epoch": 0.7551237500886462, |
| "grad_norm": 5.156681537628174, |
| "loss": 4.098, |
| "lr": 0.0006977622377622378, |
| "step": 2662, |
| "tokens_trained": 1.308442664 |
| }, |
| { |
| "epoch": 0.7556910857386001, |
| "grad_norm": 8.156705856323242, |
| "loss": 4.0828, |
| "lr": 0.0006974825174825175, |
| "step": 2664, |
| "tokens_trained": 1.309422976 |
| }, |
| { |
| "epoch": 0.756258421388554, |
| "grad_norm": 8.489871978759766, |
| "loss": 4.0668, |
| "lr": 0.0006972027972027972, |
| "step": 2666, |
| "tokens_trained": 1.310406152 |
| }, |
| { |
| "epoch": 0.756825757038508, |
| "grad_norm": 13.065528869628906, |
| "loss": 4.0915, |
| "lr": 0.000696923076923077, |
| "step": 2668, |
| "tokens_trained": 1.311392576 |
| }, |
| { |
| "epoch": 0.7573930926884618, |
| "grad_norm": 7.475847244262695, |
| "loss": 4.0308, |
| "lr": 0.0006966433566433566, |
| "step": 2670, |
| "tokens_trained": 1.312378776 |
| }, |
| { |
| "epoch": 0.7579604283384157, |
| "grad_norm": 7.049544334411621, |
| "loss": 4.0662, |
| "lr": 0.0006963636363636365, |
| "step": 2672, |
| "tokens_trained": 1.313358848 |
| }, |
| { |
| "epoch": 0.7585277639883696, |
| "grad_norm": 5.037269115447998, |
| "loss": 4.1016, |
| "lr": 0.0006960839160839161, |
| "step": 2674, |
| "tokens_trained": 1.3143412 |
| }, |
| { |
| "epoch": 0.7590950996383236, |
| "grad_norm": 10.421965599060059, |
| "loss": 4.0655, |
| "lr": 0.0006958041958041958, |
| "step": 2676, |
| "tokens_trained": 1.315322968 |
| }, |
| { |
| "epoch": 0.7596624352882775, |
| "grad_norm": 8.08486557006836, |
| "loss": 4.0933, |
| "lr": 0.0006955244755244755, |
| "step": 2678, |
| "tokens_trained": 1.316306592 |
| }, |
| { |
| "epoch": 0.7602297709382313, |
| "grad_norm": 10.121665954589844, |
| "loss": 4.0673, |
| "lr": 0.0006952447552447553, |
| "step": 2680, |
| "tokens_trained": 1.317292536 |
| }, |
| { |
| "epoch": 0.7607971065881852, |
| "grad_norm": 4.840561389923096, |
| "loss": 4.089, |
| "lr": 0.000694965034965035, |
| "step": 2682, |
| "tokens_trained": 1.318278512 |
| }, |
| { |
| "epoch": 0.7613644422381391, |
| "grad_norm": 5.03504753112793, |
| "loss": 4.0696, |
| "lr": 0.0006946853146853147, |
| "step": 2684, |
| "tokens_trained": 1.319263032 |
| }, |
| { |
| "epoch": 0.7619317778880931, |
| "grad_norm": 12.180596351623535, |
| "loss": 4.1166, |
| "lr": 0.0006944055944055943, |
| "step": 2686, |
| "tokens_trained": 1.320252752 |
| }, |
| { |
| "epoch": 0.762499113538047, |
| "grad_norm": 8.842597007751465, |
| "loss": 4.0946, |
| "lr": 0.0006941258741258741, |
| "step": 2688, |
| "tokens_trained": 1.321239648 |
| }, |
| { |
| "epoch": 0.7630664491880008, |
| "grad_norm": 4.742710113525391, |
| "loss": 4.0894, |
| "lr": 0.0006938461538461539, |
| "step": 2690, |
| "tokens_trained": 1.322224872 |
| }, |
| { |
| "epoch": 0.7636337848379547, |
| "grad_norm": 2.7827649116516113, |
| "loss": 4.0453, |
| "lr": 0.0006935664335664336, |
| "step": 2692, |
| "tokens_trained": 1.323211432 |
| }, |
| { |
| "epoch": 0.7642011204879087, |
| "grad_norm": 8.263550758361816, |
| "loss": 4.0034, |
| "lr": 0.0006932867132867133, |
| "step": 2694, |
| "tokens_trained": 1.324190272 |
| }, |
| { |
| "epoch": 0.7647684561378626, |
| "grad_norm": 14.927130699157715, |
| "loss": 4.0243, |
| "lr": 0.000693006993006993, |
| "step": 2696, |
| "tokens_trained": 1.325175184 |
| }, |
| { |
| "epoch": 0.7653357917878165, |
| "grad_norm": 9.046390533447266, |
| "loss": 4.0646, |
| "lr": 0.0006927272727272728, |
| "step": 2698, |
| "tokens_trained": 1.326156856 |
| }, |
| { |
| "epoch": 0.7659031274377703, |
| "grad_norm": 7.640266418457031, |
| "loss": 4.0581, |
| "lr": 0.0006924475524475524, |
| "step": 2700, |
| "tokens_trained": 1.327134224 |
| }, |
| { |
| "epoch": 0.7664704630877243, |
| "grad_norm": 11.179667472839355, |
| "loss": 4.0286, |
| "lr": 0.0006921678321678322, |
| "step": 2702, |
| "tokens_trained": 1.328119376 |
| }, |
| { |
| "epoch": 0.7670377987376782, |
| "grad_norm": 13.961971282958984, |
| "loss": 4.072, |
| "lr": 0.0006918881118881118, |
| "step": 2704, |
| "tokens_trained": 1.329097248 |
| }, |
| { |
| "epoch": 0.7676051343876321, |
| "grad_norm": 5.873361110687256, |
| "loss": 4.1069, |
| "lr": 0.0006916083916083916, |
| "step": 2706, |
| "tokens_trained": 1.330079272 |
| }, |
| { |
| "epoch": 0.768172470037586, |
| "grad_norm": 5.7134623527526855, |
| "loss": 4.0483, |
| "lr": 0.0006913286713286714, |
| "step": 2708, |
| "tokens_trained": 1.331062968 |
| }, |
| { |
| "epoch": 0.7687398056875399, |
| "grad_norm": 8.088322639465332, |
| "loss": 4.0806, |
| "lr": 0.0006910489510489511, |
| "step": 2710, |
| "tokens_trained": 1.3320508 |
| }, |
| { |
| "epoch": 0.7693071413374938, |
| "grad_norm": 12.358318328857422, |
| "loss": 4.0281, |
| "lr": 0.0006907692307692308, |
| "step": 2712, |
| "tokens_trained": 1.333034392 |
| }, |
| { |
| "epoch": 0.7698744769874477, |
| "grad_norm": 6.448056221008301, |
| "loss": 4.0449, |
| "lr": 0.0006904895104895104, |
| "step": 2714, |
| "tokens_trained": 1.334018424 |
| }, |
| { |
| "epoch": 0.7704418126374016, |
| "grad_norm": 10.305964469909668, |
| "loss": 4.0611, |
| "lr": 0.0006902097902097903, |
| "step": 2716, |
| "tokens_trained": 1.33500044 |
| }, |
| { |
| "epoch": 0.7710091482873556, |
| "grad_norm": 8.82204532623291, |
| "loss": 4.0697, |
| "lr": 0.0006899300699300699, |
| "step": 2718, |
| "tokens_trained": 1.335985304 |
| }, |
| { |
| "epoch": 0.7715764839373094, |
| "grad_norm": 11.34217643737793, |
| "loss": 4.0471, |
| "lr": 0.0006896503496503497, |
| "step": 2720, |
| "tokens_trained": 1.336971752 |
| }, |
| { |
| "epoch": 0.7721438195872633, |
| "grad_norm": 9.843841552734375, |
| "loss": 4.1015, |
| "lr": 0.0006893706293706293, |
| "step": 2722, |
| "tokens_trained": 1.337955296 |
| }, |
| { |
| "epoch": 0.7727111552372172, |
| "grad_norm": 8.029809951782227, |
| "loss": 4.0432, |
| "lr": 0.0006890909090909091, |
| "step": 2724, |
| "tokens_trained": 1.338936912 |
| }, |
| { |
| "epoch": 0.7732784908871712, |
| "grad_norm": 8.858033180236816, |
| "loss": 4.0841, |
| "lr": 0.0006888111888111889, |
| "step": 2726, |
| "tokens_trained": 1.339920296 |
| }, |
| { |
| "epoch": 0.7738458265371251, |
| "grad_norm": 6.917725086212158, |
| "loss": 4.0701, |
| "lr": 0.0006885314685314685, |
| "step": 2728, |
| "tokens_trained": 1.340910088 |
| }, |
| { |
| "epoch": 0.7744131621870789, |
| "grad_norm": 9.695552825927734, |
| "loss": 4.0818, |
| "lr": 0.0006882517482517483, |
| "step": 2730, |
| "tokens_trained": 1.341895264 |
| }, |
| { |
| "epoch": 0.7749804978370328, |
| "grad_norm": 8.998181343078613, |
| "loss": 4.0734, |
| "lr": 0.0006879720279720279, |
| "step": 2732, |
| "tokens_trained": 1.342875544 |
| }, |
| { |
| "epoch": 0.7755478334869867, |
| "grad_norm": 7.250143527984619, |
| "loss": 4.0511, |
| "lr": 0.0006876923076923078, |
| "step": 2734, |
| "tokens_trained": 1.34386044 |
| }, |
| { |
| "epoch": 0.7761151691369407, |
| "grad_norm": 8.95149040222168, |
| "loss": 4.0671, |
| "lr": 0.0006874125874125874, |
| "step": 2736, |
| "tokens_trained": 1.344844568 |
| }, |
| { |
| "epoch": 0.7766825047868946, |
| "grad_norm": 9.469155311584473, |
| "loss": 4.0549, |
| "lr": 0.0006871328671328672, |
| "step": 2738, |
| "tokens_trained": 1.3458226 |
| }, |
| { |
| "epoch": 0.7772498404368484, |
| "grad_norm": 6.303086757659912, |
| "loss": 4.0808, |
| "lr": 0.0006868531468531468, |
| "step": 2740, |
| "tokens_trained": 1.346809256 |
| }, |
| { |
| "epoch": 0.7778171760868023, |
| "grad_norm": 6.282865524291992, |
| "loss": 4.0425, |
| "lr": 0.0006865734265734265, |
| "step": 2742, |
| "tokens_trained": 1.347790504 |
| }, |
| { |
| "epoch": 0.7783845117367563, |
| "grad_norm": 6.448110103607178, |
| "loss": 4.0512, |
| "lr": 0.0006862937062937064, |
| "step": 2744, |
| "tokens_trained": 1.348770416 |
| }, |
| { |
| "epoch": 0.7789518473867102, |
| "grad_norm": 3.967651128768921, |
| "loss": 4.0189, |
| "lr": 0.000686013986013986, |
| "step": 2746, |
| "tokens_trained": 1.34975288 |
| }, |
| { |
| "epoch": 0.7795191830366641, |
| "grad_norm": 4.253781318664551, |
| "loss": 4.0774, |
| "lr": 0.0006857342657342658, |
| "step": 2748, |
| "tokens_trained": 1.350729672 |
| }, |
| { |
| "epoch": 0.7800865186866179, |
| "grad_norm": 15.237231254577637, |
| "loss": 4.0929, |
| "lr": 0.0006854545454545454, |
| "step": 2750, |
| "tokens_trained": 1.351711184 |
| }, |
| { |
| "epoch": 0.7800865186866179, |
| "eval_loss": 1.0141865015029907, |
| "eval_runtime": 20.7754, |
| "step": 2750, |
| "tokens_trained": 1.351711184 |
| }, |
| { |
| "epoch": 0.7806538543365719, |
| "grad_norm": 14.367753028869629, |
| "loss": 4.0422, |
| "lr": 0.0006851748251748253, |
| "step": 2752, |
| "tokens_trained": 1.352694296 |
| }, |
| { |
| "epoch": 0.7812211899865258, |
| "grad_norm": 4.344571590423584, |
| "loss": 4.018, |
| "lr": 0.0006848951048951049, |
| "step": 2754, |
| "tokens_trained": 1.353678976 |
| }, |
| { |
| "epoch": 0.7817885256364797, |
| "grad_norm": 4.031637191772461, |
| "loss": 4.0568, |
| "lr": 0.0006846153846153846, |
| "step": 2756, |
| "tokens_trained": 1.354661624 |
| }, |
| { |
| "epoch": 0.7823558612864336, |
| "grad_norm": 11.08716106414795, |
| "loss": 4.0717, |
| "lr": 0.0006843356643356643, |
| "step": 2758, |
| "tokens_trained": 1.355644416 |
| }, |
| { |
| "epoch": 0.7829231969363875, |
| "grad_norm": 10.119296073913574, |
| "loss": 4.0726, |
| "lr": 0.000684055944055944, |
| "step": 2760, |
| "tokens_trained": 1.356625632 |
| }, |
| { |
| "epoch": 0.7834905325863414, |
| "grad_norm": 14.678930282592773, |
| "loss": 4.065, |
| "lr": 0.0006837762237762239, |
| "step": 2762, |
| "tokens_trained": 1.357605968 |
| }, |
| { |
| "epoch": 0.7840578682362953, |
| "grad_norm": 2.6932129859924316, |
| "loss": 4.0831, |
| "lr": 0.0006834965034965035, |
| "step": 2764, |
| "tokens_trained": 1.358590808 |
| }, |
| { |
| "epoch": 0.7846252038862492, |
| "grad_norm": 22.138845443725586, |
| "loss": 4.1011, |
| "lr": 0.0006832167832167833, |
| "step": 2766, |
| "tokens_trained": 1.359570928 |
| }, |
| { |
| "epoch": 0.7851925395362032, |
| "grad_norm": 17.627702713012695, |
| "loss": 4.1441, |
| "lr": 0.0006829370629370629, |
| "step": 2768, |
| "tokens_trained": 1.36055716 |
| }, |
| { |
| "epoch": 0.785759875186157, |
| "grad_norm": 9.9471435546875, |
| "loss": 4.122, |
| "lr": 0.0006826573426573427, |
| "step": 2770, |
| "tokens_trained": 1.361539352 |
| }, |
| { |
| "epoch": 0.7863272108361109, |
| "grad_norm": 11.452835083007812, |
| "loss": 4.0928, |
| "lr": 0.0006823776223776224, |
| "step": 2772, |
| "tokens_trained": 1.362519 |
| }, |
| { |
| "epoch": 0.7868945464860648, |
| "grad_norm": 15.566934585571289, |
| "loss": 4.0816, |
| "lr": 0.0006820979020979021, |
| "step": 2774, |
| "tokens_trained": 1.363505808 |
| }, |
| { |
| "epoch": 0.7874618821360188, |
| "grad_norm": 8.46238899230957, |
| "loss": 4.0924, |
| "lr": 0.0006818181818181818, |
| "step": 2776, |
| "tokens_trained": 1.364484496 |
| }, |
| { |
| "epoch": 0.7880292177859727, |
| "grad_norm": 4.6673688888549805, |
| "loss": 4.0732, |
| "lr": 0.0006815384615384615, |
| "step": 2778, |
| "tokens_trained": 1.365468696 |
| }, |
| { |
| "epoch": 0.7885965534359265, |
| "grad_norm": 10.422809600830078, |
| "loss": 4.0285, |
| "lr": 0.0006812587412587414, |
| "step": 2780, |
| "tokens_trained": 1.36645104 |
| }, |
| { |
| "epoch": 0.7891638890858804, |
| "grad_norm": 11.707451820373535, |
| "loss": 4.0645, |
| "lr": 0.000680979020979021, |
| "step": 2782, |
| "tokens_trained": 1.367433136 |
| }, |
| { |
| "epoch": 0.7897312247358343, |
| "grad_norm": 6.887526988983154, |
| "loss": 4.0591, |
| "lr": 0.0006806993006993007, |
| "step": 2784, |
| "tokens_trained": 1.368420024 |
| }, |
| { |
| "epoch": 0.7902985603857883, |
| "grad_norm": 7.914979457855225, |
| "loss": 4.0641, |
| "lr": 0.0006804195804195804, |
| "step": 2786, |
| "tokens_trained": 1.369401936 |
| }, |
| { |
| "epoch": 0.7908658960357422, |
| "grad_norm": 7.964488506317139, |
| "loss": 4.0462, |
| "lr": 0.0006801398601398602, |
| "step": 2788, |
| "tokens_trained": 1.370384896 |
| }, |
| { |
| "epoch": 0.791433231685696, |
| "grad_norm": 7.16652774810791, |
| "loss": 4.026, |
| "lr": 0.0006798601398601399, |
| "step": 2790, |
| "tokens_trained": 1.371365304 |
| }, |
| { |
| "epoch": 0.7920005673356499, |
| "grad_norm": 8.604512214660645, |
| "loss": 4.0407, |
| "lr": 0.0006795804195804196, |
| "step": 2792, |
| "tokens_trained": 1.372349584 |
| }, |
| { |
| "epoch": 0.7925679029856039, |
| "grad_norm": 6.616272449493408, |
| "loss": 4.0417, |
| "lr": 0.0006793006993006992, |
| "step": 2794, |
| "tokens_trained": 1.373330584 |
| }, |
| { |
| "epoch": 0.7931352386355578, |
| "grad_norm": 3.8474340438842773, |
| "loss": 4.0322, |
| "lr": 0.000679020979020979, |
| "step": 2796, |
| "tokens_trained": 1.374312888 |
| }, |
| { |
| "epoch": 0.7937025742855117, |
| "grad_norm": 11.628402709960938, |
| "loss": 4.0378, |
| "lr": 0.0006787412587412588, |
| "step": 2798, |
| "tokens_trained": 1.375294704 |
| }, |
| { |
| "epoch": 0.7942699099354655, |
| "grad_norm": 7.480481147766113, |
| "loss": 4.1031, |
| "lr": 0.0006784615384615385, |
| "step": 2800, |
| "tokens_trained": 1.376279072 |
| }, |
| { |
| "epoch": 0.7948372455854195, |
| "grad_norm": 6.449431896209717, |
| "loss": 4.0397, |
| "lr": 0.0006781818181818182, |
| "step": 2802, |
| "tokens_trained": 1.377265568 |
| }, |
| { |
| "epoch": 0.7954045812353734, |
| "grad_norm": 5.179644584655762, |
| "loss": 4.0826, |
| "lr": 0.0006779020979020979, |
| "step": 2804, |
| "tokens_trained": 1.378250776 |
| }, |
| { |
| "epoch": 0.7959719168853273, |
| "grad_norm": 8.918203353881836, |
| "loss": 4.0358, |
| "lr": 0.0006776223776223777, |
| "step": 2806, |
| "tokens_trained": 1.379235464 |
| }, |
| { |
| "epoch": 0.7965392525352812, |
| "grad_norm": 6.065394878387451, |
| "loss": 4.0754, |
| "lr": 0.0006773426573426574, |
| "step": 2808, |
| "tokens_trained": 1.380215248 |
| }, |
| { |
| "epoch": 0.797106588185235, |
| "grad_norm": 3.9142706394195557, |
| "loss": 4.0274, |
| "lr": 0.0006770629370629371, |
| "step": 2810, |
| "tokens_trained": 1.381197872 |
| }, |
| { |
| "epoch": 0.797673923835189, |
| "grad_norm": 12.86207103729248, |
| "loss": 4.0471, |
| "lr": 0.0006767832167832167, |
| "step": 2812, |
| "tokens_trained": 1.38218364 |
| }, |
| { |
| "epoch": 0.7982412594851429, |
| "grad_norm": 10.052533149719238, |
| "loss": 4.0628, |
| "lr": 0.0006765034965034965, |
| "step": 2814, |
| "tokens_trained": 1.383170176 |
| }, |
| { |
| "epoch": 0.7988085951350968, |
| "grad_norm": 5.910792827606201, |
| "loss": 4.0358, |
| "lr": 0.0006762237762237763, |
| "step": 2816, |
| "tokens_trained": 1.384154592 |
| }, |
| { |
| "epoch": 0.7993759307850508, |
| "grad_norm": 13.312492370605469, |
| "loss": 4.0694, |
| "lr": 0.000675944055944056, |
| "step": 2818, |
| "tokens_trained": 1.385138352 |
| }, |
| { |
| "epoch": 0.7999432664350046, |
| "grad_norm": 12.467507362365723, |
| "loss": 4.0705, |
| "lr": 0.0006756643356643357, |
| "step": 2820, |
| "tokens_trained": 1.386123232 |
| }, |
| { |
| "epoch": 0.8005106020849585, |
| "grad_norm": 4.8490824699401855, |
| "loss": 4.0387, |
| "lr": 0.0006753846153846153, |
| "step": 2822, |
| "tokens_trained": 1.387107008 |
| }, |
| { |
| "epoch": 0.8010779377349124, |
| "grad_norm": 13.596024513244629, |
| "loss": 4.0505, |
| "lr": 0.0006751048951048951, |
| "step": 2824, |
| "tokens_trained": 1.388091632 |
| }, |
| { |
| "epoch": 0.8016452733848664, |
| "grad_norm": 13.633816719055176, |
| "loss": 4.0894, |
| "lr": 0.0006748251748251748, |
| "step": 2826, |
| "tokens_trained": 1.389077456 |
| }, |
| { |
| "epoch": 0.8022126090348203, |
| "grad_norm": 4.448362827301025, |
| "loss": 4.0623, |
| "lr": 0.0006745454545454546, |
| "step": 2828, |
| "tokens_trained": 1.39006124 |
| }, |
| { |
| "epoch": 0.8027799446847741, |
| "grad_norm": 21.12818717956543, |
| "loss": 4.1275, |
| "lr": 0.0006742657342657342, |
| "step": 2830, |
| "tokens_trained": 1.391043016 |
| }, |
| { |
| "epoch": 0.803347280334728, |
| "grad_norm": 10.096168518066406, |
| "loss": 4.0858, |
| "lr": 0.000673986013986014, |
| "step": 2832, |
| "tokens_trained": 1.392026656 |
| }, |
| { |
| "epoch": 0.803914615984682, |
| "grad_norm": 4.614907264709473, |
| "loss": 4.0075, |
| "lr": 0.0006737062937062938, |
| "step": 2834, |
| "tokens_trained": 1.393006784 |
| }, |
| { |
| "epoch": 0.8044819516346359, |
| "grad_norm": 13.106852531433105, |
| "loss": 4.1113, |
| "lr": 0.0006734265734265734, |
| "step": 2836, |
| "tokens_trained": 1.393990424 |
| }, |
| { |
| "epoch": 0.8050492872845898, |
| "grad_norm": 4.287477493286133, |
| "loss": 4.0818, |
| "lr": 0.0006731468531468532, |
| "step": 2838, |
| "tokens_trained": 1.39497072 |
| }, |
| { |
| "epoch": 0.8056166229345436, |
| "grad_norm": 9.295431137084961, |
| "loss": 4.0652, |
| "lr": 0.0006728671328671328, |
| "step": 2840, |
| "tokens_trained": 1.395951488 |
| }, |
| { |
| "epoch": 0.8061839585844975, |
| "grad_norm": 12.001997947692871, |
| "loss": 4.1061, |
| "lr": 0.0006725874125874126, |
| "step": 2842, |
| "tokens_trained": 1.396933744 |
| }, |
| { |
| "epoch": 0.8067512942344515, |
| "grad_norm": 15.18830680847168, |
| "loss": 4.0483, |
| "lr": 0.0006723076923076923, |
| "step": 2844, |
| "tokens_trained": 1.397915696 |
| }, |
| { |
| "epoch": 0.8073186298844054, |
| "grad_norm": 9.936029434204102, |
| "loss": 4.0559, |
| "lr": 0.0006720279720279721, |
| "step": 2846, |
| "tokens_trained": 1.398900048 |
| }, |
| { |
| "epoch": 0.8078859655343593, |
| "grad_norm": 4.903693199157715, |
| "loss": 4.0474, |
| "lr": 0.0006717482517482517, |
| "step": 2848, |
| "tokens_trained": 1.399885336 |
| }, |
| { |
| "epoch": 0.8084533011843131, |
| "grad_norm": 6.753813743591309, |
| "loss": 4.0365, |
| "lr": 0.0006714685314685314, |
| "step": 2850, |
| "tokens_trained": 1.400867432 |
| }, |
| { |
| "epoch": 0.8090206368342671, |
| "grad_norm": 10.53545093536377, |
| "loss": 4.0697, |
| "lr": 0.0006711888111888113, |
| "step": 2852, |
| "tokens_trained": 1.401849552 |
| }, |
| { |
| "epoch": 0.809587972484221, |
| "grad_norm": 7.666012763977051, |
| "loss": 3.9955, |
| "lr": 0.0006709090909090909, |
| "step": 2854, |
| "tokens_trained": 1.402832496 |
| }, |
| { |
| "epoch": 0.8101553081341749, |
| "grad_norm": 11.65257740020752, |
| "loss": 4.0377, |
| "lr": 0.0006706293706293707, |
| "step": 2856, |
| "tokens_trained": 1.403816768 |
| }, |
| { |
| "epoch": 0.8107226437841288, |
| "grad_norm": 10.997775077819824, |
| "loss": 4.0145, |
| "lr": 0.0006703496503496503, |
| "step": 2858, |
| "tokens_trained": 1.404804968 |
| }, |
| { |
| "epoch": 0.8112899794340827, |
| "grad_norm": 3.699673652648926, |
| "loss": 4.1053, |
| "lr": 0.0006700699300699301, |
| "step": 2860, |
| "tokens_trained": 1.40578656 |
| }, |
| { |
| "epoch": 0.8118573150840366, |
| "grad_norm": 17.54732894897461, |
| "loss": 4.121, |
| "lr": 0.0006697902097902098, |
| "step": 2862, |
| "tokens_trained": 1.406773056 |
| }, |
| { |
| "epoch": 0.8124246507339905, |
| "grad_norm": 10.354470252990723, |
| "loss": 4.0353, |
| "lr": 0.0006695104895104895, |
| "step": 2864, |
| "tokens_trained": 1.407756592 |
| }, |
| { |
| "epoch": 0.8129919863839444, |
| "grad_norm": 7.760607719421387, |
| "loss": 4.0529, |
| "lr": 0.0006692307692307692, |
| "step": 2866, |
| "tokens_trained": 1.408742176 |
| }, |
| { |
| "epoch": 0.8135593220338984, |
| "grad_norm": 11.074470520019531, |
| "loss": 4.0223, |
| "lr": 0.0006689510489510489, |
| "step": 2868, |
| "tokens_trained": 1.409727856 |
| }, |
| { |
| "epoch": 0.8141266576838522, |
| "grad_norm": 12.221083641052246, |
| "loss": 4.0228, |
| "lr": 0.0006686713286713288, |
| "step": 2870, |
| "tokens_trained": 1.410712016 |
| }, |
| { |
| "epoch": 0.8146939933338061, |
| "grad_norm": 8.933589935302734, |
| "loss": 4.1234, |
| "lr": 0.0006683916083916084, |
| "step": 2872, |
| "tokens_trained": 1.411694496 |
| }, |
| { |
| "epoch": 0.81526132898376, |
| "grad_norm": 12.326020240783691, |
| "loss": 4.0772, |
| "lr": 0.0006681118881118882, |
| "step": 2874, |
| "tokens_trained": 1.412676992 |
| }, |
| { |
| "epoch": 0.8155449968087369, |
| "eval_loss": 1.015201449394226, |
| "eval_runtime": 20.3991, |
| "step": 2875, |
| "tokens_trained": 1.413169416 |
| }, |
| { |
| "epoch": 0.815828664633714, |
| "grad_norm": 8.320648193359375, |
| "loss": 4.0045, |
| "lr": 0.0006678321678321678, |
| "step": 2876, |
| "tokens_trained": 1.413657912 |
| }, |
| { |
| "epoch": 0.8163960002836679, |
| "grad_norm": 4.708253383636475, |
| "loss": 4.022, |
| "lr": 0.0006675524475524475, |
| "step": 2878, |
| "tokens_trained": 1.414641576 |
| }, |
| { |
| "epoch": 0.8169633359336217, |
| "grad_norm": 13.005586624145508, |
| "loss": 4.0305, |
| "lr": 0.0006672727272727273, |
| "step": 2880, |
| "tokens_trained": 1.415624992 |
| }, |
| { |
| "epoch": 0.8175306715835756, |
| "grad_norm": 8.445854187011719, |
| "loss": 4.0723, |
| "lr": 0.000666993006993007, |
| "step": 2882, |
| "tokens_trained": 1.416605936 |
| }, |
| { |
| "epoch": 0.8180980072335295, |
| "grad_norm": 5.153830528259277, |
| "loss": 4.0766, |
| "lr": 0.0006667132867132867, |
| "step": 2884, |
| "tokens_trained": 1.417593408 |
| }, |
| { |
| "epoch": 0.8186653428834835, |
| "grad_norm": 13.989762306213379, |
| "loss": 4.043, |
| "lr": 0.0006664335664335664, |
| "step": 2886, |
| "tokens_trained": 1.418577984 |
| }, |
| { |
| "epoch": 0.8192326785334374, |
| "grad_norm": 6.2893805503845215, |
| "loss": 4.0576, |
| "lr": 0.0006661538461538463, |
| "step": 2888, |
| "tokens_trained": 1.419557304 |
| }, |
| { |
| "epoch": 0.8198000141833912, |
| "grad_norm": 3.1825716495513916, |
| "loss": 4.0216, |
| "lr": 0.0006658741258741259, |
| "step": 2890, |
| "tokens_trained": 1.420538736 |
| }, |
| { |
| "epoch": 0.8203673498333451, |
| "grad_norm": 13.280265808105469, |
| "loss": 4.0665, |
| "lr": 0.0006655944055944056, |
| "step": 2892, |
| "tokens_trained": 1.421523048 |
| }, |
| { |
| "epoch": 0.8209346854832991, |
| "grad_norm": 8.963871955871582, |
| "loss": 4.0996, |
| "lr": 0.0006653146853146853, |
| "step": 2894, |
| "tokens_trained": 1.422504352 |
| }, |
| { |
| "epoch": 0.821502021133253, |
| "grad_norm": 9.463395118713379, |
| "loss": 4.0638, |
| "lr": 0.000665034965034965, |
| "step": 2896, |
| "tokens_trained": 1.423490256 |
| }, |
| { |
| "epoch": 0.8220693567832069, |
| "grad_norm": 10.848092079162598, |
| "loss": 4.0767, |
| "lr": 0.0006647552447552448, |
| "step": 2898, |
| "tokens_trained": 1.424473728 |
| }, |
| { |
| "epoch": 0.8226366924331607, |
| "grad_norm": 9.271900177001953, |
| "loss": 4.0675, |
| "lr": 0.0006644755244755245, |
| "step": 2900, |
| "tokens_trained": 1.425456216 |
| }, |
| { |
| "epoch": 0.8232040280831147, |
| "grad_norm": 8.910347938537598, |
| "loss": 4.031, |
| "lr": 0.0006641958041958042, |
| "step": 2902, |
| "tokens_trained": 1.426442408 |
| }, |
| { |
| "epoch": 0.8237713637330686, |
| "grad_norm": 6.92717981338501, |
| "loss": 4.1025, |
| "lr": 0.0006639160839160839, |
| "step": 2904, |
| "tokens_trained": 1.42742624 |
| }, |
| { |
| "epoch": 0.8243386993830225, |
| "grad_norm": 6.383159637451172, |
| "loss": 4.0057, |
| "lr": 0.0006636363636363638, |
| "step": 2906, |
| "tokens_trained": 1.428414912 |
| }, |
| { |
| "epoch": 0.8249060350329764, |
| "grad_norm": 5.782074451446533, |
| "loss": 4.0169, |
| "lr": 0.0006633566433566434, |
| "step": 2908, |
| "tokens_trained": 1.42939668 |
| }, |
| { |
| "epoch": 0.8254733706829303, |
| "grad_norm": 10.663660049438477, |
| "loss": 4.0504, |
| "lr": 0.0006630769230769231, |
| "step": 2910, |
| "tokens_trained": 1.430382648 |
| }, |
| { |
| "epoch": 0.8260407063328842, |
| "grad_norm": 11.806394577026367, |
| "loss": 4.065, |
| "lr": 0.0006627972027972028, |
| "step": 2912, |
| "tokens_trained": 1.43136304 |
| }, |
| { |
| "epoch": 0.8266080419828381, |
| "grad_norm": 5.7375617027282715, |
| "loss": 4.0133, |
| "lr": 0.0006625174825174825, |
| "step": 2914, |
| "tokens_trained": 1.432347472 |
| }, |
| { |
| "epoch": 0.827175377632792, |
| "grad_norm": 6.814542293548584, |
| "loss": 4.0656, |
| "lr": 0.0006622377622377623, |
| "step": 2916, |
| "tokens_trained": 1.433329632 |
| }, |
| { |
| "epoch": 0.827742713282746, |
| "grad_norm": 8.265726089477539, |
| "loss": 4.0206, |
| "lr": 0.000661958041958042, |
| "step": 2918, |
| "tokens_trained": 1.434312216 |
| }, |
| { |
| "epoch": 0.8283100489326998, |
| "grad_norm": 6.937063694000244, |
| "loss": 4.0372, |
| "lr": 0.0006616783216783216, |
| "step": 2920, |
| "tokens_trained": 1.435294504 |
| }, |
| { |
| "epoch": 0.8288773845826537, |
| "grad_norm": 6.773707866668701, |
| "loss": 4.0496, |
| "lr": 0.0006613986013986014, |
| "step": 2922, |
| "tokens_trained": 1.436276344 |
| }, |
| { |
| "epoch": 0.8294447202326076, |
| "grad_norm": 8.471631050109863, |
| "loss": 4.0834, |
| "lr": 0.0006611188811188812, |
| "step": 2924, |
| "tokens_trained": 1.43725852 |
| }, |
| { |
| "epoch": 0.8300120558825616, |
| "grad_norm": 10.602453231811523, |
| "loss": 4.0445, |
| "lr": 0.0006608391608391609, |
| "step": 2926, |
| "tokens_trained": 1.438239768 |
| }, |
| { |
| "epoch": 0.8305793915325155, |
| "grad_norm": 8.173192977905273, |
| "loss": 4.0423, |
| "lr": 0.0006605594405594406, |
| "step": 2928, |
| "tokens_trained": 1.43921892 |
| }, |
| { |
| "epoch": 0.8311467271824693, |
| "grad_norm": 9.510146141052246, |
| "loss": 4.0012, |
| "lr": 0.0006602797202797203, |
| "step": 2930, |
| "tokens_trained": 1.440203128 |
| }, |
| { |
| "epoch": 0.8317140628324232, |
| "grad_norm": 4.894539833068848, |
| "loss": 4.0574, |
| "lr": 0.00066, |
| "step": 2932, |
| "tokens_trained": 1.441187856 |
| }, |
| { |
| "epoch": 0.8322813984823771, |
| "grad_norm": 4.4945149421691895, |
| "loss": 4.0107, |
| "lr": 0.0006597202797202797, |
| "step": 2934, |
| "tokens_trained": 1.442164056 |
| }, |
| { |
| "epoch": 0.8328487341323311, |
| "grad_norm": 7.323387145996094, |
| "loss": 4.0779, |
| "lr": 0.0006594405594405595, |
| "step": 2936, |
| "tokens_trained": 1.44314688 |
| }, |
| { |
| "epoch": 0.833416069782285, |
| "grad_norm": 9.858680725097656, |
| "loss": 4.03, |
| "lr": 0.0006591608391608391, |
| "step": 2938, |
| "tokens_trained": 1.444127552 |
| }, |
| { |
| "epoch": 0.8339834054322388, |
| "grad_norm": 8.214831352233887, |
| "loss": 4.0591, |
| "lr": 0.0006588811188811189, |
| "step": 2940, |
| "tokens_trained": 1.445109336 |
| }, |
| { |
| "epoch": 0.8345507410821927, |
| "grad_norm": 6.628262996673584, |
| "loss": 4.0834, |
| "lr": 0.0006586013986013986, |
| "step": 2942, |
| "tokens_trained": 1.4460904 |
| }, |
| { |
| "epoch": 0.8351180767321467, |
| "grad_norm": 11.043391227722168, |
| "loss": 4.0516, |
| "lr": 0.0006583216783216784, |
| "step": 2944, |
| "tokens_trained": 1.447068776 |
| }, |
| { |
| "epoch": 0.8356854123821006, |
| "grad_norm": 8.013843536376953, |
| "loss": 4.0309, |
| "lr": 0.0006580419580419581, |
| "step": 2946, |
| "tokens_trained": 1.448046952 |
| }, |
| { |
| "epoch": 0.8362527480320545, |
| "grad_norm": 4.856717586517334, |
| "loss": 4.0547, |
| "lr": 0.0006577622377622377, |
| "step": 2948, |
| "tokens_trained": 1.449033752 |
| }, |
| { |
| "epoch": 0.8368200836820083, |
| "grad_norm": 4.799930572509766, |
| "loss": 4.0044, |
| "lr": 0.0006574825174825175, |
| "step": 2950, |
| "tokens_trained": 1.450019912 |
| }, |
| { |
| "epoch": 0.8373874193319623, |
| "grad_norm": 8.492339134216309, |
| "loss": 4.0368, |
| "lr": 0.0006572027972027972, |
| "step": 2952, |
| "tokens_trained": 1.451002976 |
| }, |
| { |
| "epoch": 0.8379547549819162, |
| "grad_norm": 7.098823547363281, |
| "loss": 3.9807, |
| "lr": 0.000656923076923077, |
| "step": 2954, |
| "tokens_trained": 1.45198412 |
| }, |
| { |
| "epoch": 0.8385220906318701, |
| "grad_norm": 8.705301284790039, |
| "loss": 4.0749, |
| "lr": 0.0006566433566433566, |
| "step": 2956, |
| "tokens_trained": 1.452963832 |
| }, |
| { |
| "epoch": 0.839089426281824, |
| "grad_norm": 2.8292014598846436, |
| "loss": 4.0241, |
| "lr": 0.0006563636363636364, |
| "step": 2958, |
| "tokens_trained": 1.453947688 |
| }, |
| { |
| "epoch": 0.8396567619317779, |
| "grad_norm": 3.7414586544036865, |
| "loss": 4.0554, |
| "lr": 0.0006560839160839161, |
| "step": 2960, |
| "tokens_trained": 1.45492676 |
| }, |
| { |
| "epoch": 0.8402240975817318, |
| "grad_norm": 11.956228256225586, |
| "loss": 4.0343, |
| "lr": 0.0006558041958041958, |
| "step": 2962, |
| "tokens_trained": 1.455907464 |
| }, |
| { |
| "epoch": 0.8407914332316857, |
| "grad_norm": 11.086222648620605, |
| "loss": 4.0324, |
| "lr": 0.0006555244755244756, |
| "step": 2964, |
| "tokens_trained": 1.456891688 |
| }, |
| { |
| "epoch": 0.8413587688816396, |
| "grad_norm": 8.380780220031738, |
| "loss": 4.0335, |
| "lr": 0.0006552447552447552, |
| "step": 2966, |
| "tokens_trained": 1.457880016 |
| }, |
| { |
| "epoch": 0.8419261045315936, |
| "grad_norm": 8.568910598754883, |
| "loss": 4.0431, |
| "lr": 0.000654965034965035, |
| "step": 2968, |
| "tokens_trained": 1.458866944 |
| }, |
| { |
| "epoch": 0.8424934401815474, |
| "grad_norm": 10.840734481811523, |
| "loss": 4.0275, |
| "lr": 0.0006546853146853147, |
| "step": 2970, |
| "tokens_trained": 1.459849096 |
| }, |
| { |
| "epoch": 0.8430607758315013, |
| "grad_norm": 5.364732265472412, |
| "loss": 4.0464, |
| "lr": 0.0006544055944055945, |
| "step": 2972, |
| "tokens_trained": 1.460833976 |
| }, |
| { |
| "epoch": 0.8436281114814552, |
| "grad_norm": 8.918869018554688, |
| "loss": 4.0501, |
| "lr": 0.0006541258741258741, |
| "step": 2974, |
| "tokens_trained": 1.461811472 |
| }, |
| { |
| "epoch": 0.8441954471314091, |
| "grad_norm": 10.94211483001709, |
| "loss": 4.0284, |
| "lr": 0.0006538461538461538, |
| "step": 2976, |
| "tokens_trained": 1.462798528 |
| }, |
| { |
| "epoch": 0.8447627827813631, |
| "grad_norm": 14.475136756896973, |
| "loss": 4.0597, |
| "lr": 0.0006535664335664336, |
| "step": 2978, |
| "tokens_trained": 1.46378116 |
| }, |
| { |
| "epoch": 0.8453301184313169, |
| "grad_norm": 8.219613075256348, |
| "loss": 4.0499, |
| "lr": 0.0006532867132867133, |
| "step": 2980, |
| "tokens_trained": 1.464758752 |
| }, |
| { |
| "epoch": 0.8458974540812708, |
| "grad_norm": 8.898524284362793, |
| "loss": 4.0472, |
| "lr": 0.0006530069930069931, |
| "step": 2982, |
| "tokens_trained": 1.465737992 |
| }, |
| { |
| "epoch": 0.8464647897312247, |
| "grad_norm": 6.673952579498291, |
| "loss": 3.9971, |
| "lr": 0.0006527272727272727, |
| "step": 2984, |
| "tokens_trained": 1.466724672 |
| }, |
| { |
| "epoch": 0.8470321253811787, |
| "grad_norm": 6.514251708984375, |
| "loss": 4.0245, |
| "lr": 0.0006524475524475524, |
| "step": 2986, |
| "tokens_trained": 1.46770572 |
| }, |
| { |
| "epoch": 0.8475994610311326, |
| "grad_norm": 8.130202293395996, |
| "loss": 4.0332, |
| "lr": 0.0006521678321678322, |
| "step": 2988, |
| "tokens_trained": 1.468690624 |
| }, |
| { |
| "epoch": 0.8481667966810864, |
| "grad_norm": 4.283686637878418, |
| "loss": 4.0551, |
| "lr": 0.0006518881118881119, |
| "step": 2990, |
| "tokens_trained": 1.469674696 |
| }, |
| { |
| "epoch": 0.8487341323310403, |
| "grad_norm": 4.8144426345825195, |
| "loss": 4.0408, |
| "lr": 0.0006516083916083916, |
| "step": 2992, |
| "tokens_trained": 1.470659816 |
| }, |
| { |
| "epoch": 0.8493014679809943, |
| "grad_norm": 11.117393493652344, |
| "loss": 4.0423, |
| "lr": 0.0006513286713286713, |
| "step": 2994, |
| "tokens_trained": 1.47164192 |
| }, |
| { |
| "epoch": 0.8498688036309482, |
| "grad_norm": 8.022162437438965, |
| "loss": 4.064, |
| "lr": 0.0006510489510489511, |
| "step": 2996, |
| "tokens_trained": 1.472624344 |
| }, |
| { |
| "epoch": 0.8504361392809021, |
| "grad_norm": 5.267605304718018, |
| "loss": 3.9804, |
| "lr": 0.0006507692307692308, |
| "step": 2998, |
| "tokens_trained": 1.473606552 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "grad_norm": 9.365017890930176, |
| "loss": 4.0223, |
| "lr": 0.0006504895104895106, |
| "step": 3000, |
| "tokens_trained": 1.474586552 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "eval_loss": 1.0078805685043335, |
| "eval_runtime": 20.7752, |
| "step": 3000, |
| "tokens_trained": 1.474586552 |
| }, |
| { |
| "epoch": 0.8515708105808099, |
| "grad_norm": 10.311480522155762, |
| "loss": 3.969, |
| "lr": 0.0006502097902097902, |
| "step": 3002, |
| "tokens_trained": 1.475564304 |
| }, |
| { |
| "epoch": 0.8521381462307638, |
| "grad_norm": 5.622078895568848, |
| "loss": 3.9803, |
| "lr": 0.0006499300699300699, |
| "step": 3004, |
| "tokens_trained": 1.476547088 |
| }, |
| { |
| "epoch": 0.8527054818807177, |
| "grad_norm": 6.005502223968506, |
| "loss": 4.0584, |
| "lr": 0.0006496503496503497, |
| "step": 3006, |
| "tokens_trained": 1.477531352 |
| }, |
| { |
| "epoch": 0.8532728175306716, |
| "grad_norm": 5.769370079040527, |
| "loss": 4.0332, |
| "lr": 0.0006493706293706294, |
| "step": 3008, |
| "tokens_trained": 1.478512136 |
| }, |
| { |
| "epoch": 0.8538401531806254, |
| "grad_norm": 4.246579647064209, |
| "loss": 3.9848, |
| "lr": 0.0006490909090909091, |
| "step": 3010, |
| "tokens_trained": 1.47949464 |
| }, |
| { |
| "epoch": 0.8544074888305794, |
| "grad_norm": 3.3972086906433105, |
| "loss": 3.9969, |
| "lr": 0.0006488111888111888, |
| "step": 3012, |
| "tokens_trained": 1.4804812 |
| }, |
| { |
| "epoch": 0.8549748244805333, |
| "grad_norm": 4.793631553649902, |
| "loss": 3.9748, |
| "lr": 0.0006485314685314685, |
| "step": 3014, |
| "tokens_trained": 1.481469176 |
| }, |
| { |
| "epoch": 0.8555421601304872, |
| "grad_norm": 7.709076881408691, |
| "loss": 4.0399, |
| "lr": 0.0006482517482517483, |
| "step": 3016, |
| "tokens_trained": 1.482450232 |
| }, |
| { |
| "epoch": 0.8561094957804412, |
| "grad_norm": 9.06294059753418, |
| "loss": 4.0279, |
| "lr": 0.000647972027972028, |
| "step": 3018, |
| "tokens_trained": 1.48343416 |
| }, |
| { |
| "epoch": 0.856676831430395, |
| "grad_norm": 7.496627330780029, |
| "loss": 4.047, |
| "lr": 0.0006476923076923077, |
| "step": 3020, |
| "tokens_trained": 1.484423072 |
| }, |
| { |
| "epoch": 0.8572441670803489, |
| "grad_norm": 6.635293006896973, |
| "loss": 4.0583, |
| "lr": 0.0006474125874125874, |
| "step": 3022, |
| "tokens_trained": 1.485406296 |
| }, |
| { |
| "epoch": 0.8578115027303028, |
| "grad_norm": 6.3066864013671875, |
| "loss": 3.9902, |
| "lr": 0.0006471328671328672, |
| "step": 3024, |
| "tokens_trained": 1.486391472 |
| }, |
| { |
| "epoch": 0.8583788383802567, |
| "grad_norm": 1.1249172687530518, |
| "loss": 4.0032, |
| "lr": 0.0006468531468531469, |
| "step": 3026, |
| "tokens_trained": 1.487377128 |
| }, |
| { |
| "epoch": 0.8589461740302107, |
| "grad_norm": 2.966470241546631, |
| "loss": 3.9859, |
| "lr": 0.0006465734265734265, |
| "step": 3028, |
| "tokens_trained": 1.488359656 |
| }, |
| { |
| "epoch": 0.8595135096801645, |
| "grad_norm": 6.611581325531006, |
| "loss": 4.0259, |
| "lr": 0.0006462937062937063, |
| "step": 3030, |
| "tokens_trained": 1.489340552 |
| }, |
| { |
| "epoch": 0.8600808453301184, |
| "grad_norm": 7.76756477355957, |
| "loss": 4.0223, |
| "lr": 0.0006460139860139859, |
| "step": 3032, |
| "tokens_trained": 1.49032648 |
| }, |
| { |
| "epoch": 0.8606481809800723, |
| "grad_norm": 10.86517333984375, |
| "loss": 4.0457, |
| "lr": 0.0006457342657342658, |
| "step": 3034, |
| "tokens_trained": 1.491312608 |
| }, |
| { |
| "epoch": 0.8612155166300263, |
| "grad_norm": 4.524630546569824, |
| "loss": 4.0882, |
| "lr": 0.0006454545454545455, |
| "step": 3036, |
| "tokens_trained": 1.49229724 |
| }, |
| { |
| "epoch": 0.8617828522799802, |
| "grad_norm": 10.601529121398926, |
| "loss": 4.0466, |
| "lr": 0.0006451748251748252, |
| "step": 3038, |
| "tokens_trained": 1.49327952 |
| }, |
| { |
| "epoch": 0.862350187929934, |
| "grad_norm": 10.691457748413086, |
| "loss": 4.0239, |
| "lr": 0.0006448951048951049, |
| "step": 3040, |
| "tokens_trained": 1.494263528 |
| }, |
| { |
| "epoch": 0.8629175235798879, |
| "grad_norm": 5.371310710906982, |
| "loss": 4.0864, |
| "lr": 0.0006446153846153846, |
| "step": 3042, |
| "tokens_trained": 1.49524708 |
| }, |
| { |
| "epoch": 0.8634848592298419, |
| "grad_norm": 5.7418999671936035, |
| "loss": 4.0618, |
| "lr": 0.0006443356643356644, |
| "step": 3044, |
| "tokens_trained": 1.496229136 |
| }, |
| { |
| "epoch": 0.8640521948797958, |
| "grad_norm": 7.521689414978027, |
| "loss": 4.0235, |
| "lr": 0.000644055944055944, |
| "step": 3046, |
| "tokens_trained": 1.497212944 |
| }, |
| { |
| "epoch": 0.8646195305297497, |
| "grad_norm": 6.966773509979248, |
| "loss": 4.0187, |
| "lr": 0.0006437762237762238, |
| "step": 3048, |
| "tokens_trained": 1.498198992 |
| }, |
| { |
| "epoch": 0.8651868661797035, |
| "grad_norm": 12.514280319213867, |
| "loss": 4.0306, |
| "lr": 0.0006434965034965034, |
| "step": 3050, |
| "tokens_trained": 1.499181312 |
| }, |
| { |
| "epoch": 0.8657542018296575, |
| "grad_norm": 4.849910736083984, |
| "loss": 4.033, |
| "lr": 0.0006432167832167833, |
| "step": 3052, |
| "tokens_trained": 1.500163288 |
| }, |
| { |
| "epoch": 0.8663215374796114, |
| "grad_norm": 9.553950309753418, |
| "loss": 4.0465, |
| "lr": 0.000642937062937063, |
| "step": 3054, |
| "tokens_trained": 1.501147464 |
| }, |
| { |
| "epoch": 0.8668888731295653, |
| "grad_norm": 8.58786678314209, |
| "loss": 4.0584, |
| "lr": 0.0006426573426573426, |
| "step": 3056, |
| "tokens_trained": 1.50212956 |
| }, |
| { |
| "epoch": 0.8674562087795192, |
| "grad_norm": 11.174147605895996, |
| "loss": 4.0152, |
| "lr": 0.0006423776223776224, |
| "step": 3058, |
| "tokens_trained": 1.503112168 |
| }, |
| { |
| "epoch": 0.868023544429473, |
| "grad_norm": 1.879528522491455, |
| "loss": 3.999, |
| "lr": 0.0006420979020979021, |
| "step": 3060, |
| "tokens_trained": 1.504099584 |
| }, |
| { |
| "epoch": 0.868590880079427, |
| "grad_norm": 19.370494842529297, |
| "loss": 4.1039, |
| "lr": 0.0006418181818181819, |
| "step": 3062, |
| "tokens_trained": 1.50508356 |
| }, |
| { |
| "epoch": 0.8691582157293809, |
| "grad_norm": 10.598268508911133, |
| "loss": 4.0542, |
| "lr": 0.0006415384615384615, |
| "step": 3064, |
| "tokens_trained": 1.506063304 |
| }, |
| { |
| "epoch": 0.8697255513793348, |
| "grad_norm": 8.537477493286133, |
| "loss": 4.0529, |
| "lr": 0.0006412587412587413, |
| "step": 3066, |
| "tokens_trained": 1.507046368 |
| }, |
| { |
| "epoch": 0.8702928870292888, |
| "grad_norm": 8.395747184753418, |
| "loss": 3.9941, |
| "lr": 0.0006409790209790209, |
| "step": 3068, |
| "tokens_trained": 1.508029128 |
| }, |
| { |
| "epoch": 0.8708602226792426, |
| "grad_norm": 5.918806552886963, |
| "loss": 4.0078, |
| "lr": 0.0006406993006993007, |
| "step": 3070, |
| "tokens_trained": 1.5090132 |
| }, |
| { |
| "epoch": 0.8714275583291965, |
| "grad_norm": 3.845099925994873, |
| "loss": 4.0564, |
| "lr": 0.0006404195804195805, |
| "step": 3072, |
| "tokens_trained": 1.509994832 |
| }, |
| { |
| "epoch": 0.8719948939791504, |
| "grad_norm": 3.3807923793792725, |
| "loss": 4.0438, |
| "lr": 0.0006401398601398601, |
| "step": 3074, |
| "tokens_trained": 1.510975552 |
| }, |
| { |
| "epoch": 0.8725622296291043, |
| "grad_norm": 4.468081951141357, |
| "loss": 4.066, |
| "lr": 0.0006398601398601399, |
| "step": 3076, |
| "tokens_trained": 1.511959576 |
| }, |
| { |
| "epoch": 0.8731295652790583, |
| "grad_norm": 1.8455613851547241, |
| "loss": 4.0247, |
| "lr": 0.0006395804195804196, |
| "step": 3078, |
| "tokens_trained": 1.512939112 |
| }, |
| { |
| "epoch": 0.8736969009290121, |
| "grad_norm": 7.184399127960205, |
| "loss": 4.081, |
| "lr": 0.0006393006993006994, |
| "step": 3080, |
| "tokens_trained": 1.513924792 |
| }, |
| { |
| "epoch": 0.874264236578966, |
| "grad_norm": 8.416154861450195, |
| "loss": 4.0372, |
| "lr": 0.000639020979020979, |
| "step": 3082, |
| "tokens_trained": 1.514905096 |
| }, |
| { |
| "epoch": 0.8748315722289199, |
| "grad_norm": 6.620309829711914, |
| "loss": 4.0822, |
| "lr": 0.0006387412587412587, |
| "step": 3084, |
| "tokens_trained": 1.51588724 |
| }, |
| { |
| "epoch": 0.8753989078788739, |
| "grad_norm": 7.424724102020264, |
| "loss": 4.053, |
| "lr": 0.0006384615384615384, |
| "step": 3086, |
| "tokens_trained": 1.516871792 |
| }, |
| { |
| "epoch": 0.8759662435288278, |
| "grad_norm": 7.8764448165893555, |
| "loss": 4.059, |
| "lr": 0.0006381818181818182, |
| "step": 3088, |
| "tokens_trained": 1.517857872 |
| }, |
| { |
| "epoch": 0.8765335791787816, |
| "grad_norm": 7.330927848815918, |
| "loss": 4.0182, |
| "lr": 0.000637902097902098, |
| "step": 3090, |
| "tokens_trained": 1.518840616 |
| }, |
| { |
| "epoch": 0.8771009148287355, |
| "grad_norm": 8.612639427185059, |
| "loss": 4.0181, |
| "lr": 0.0006376223776223776, |
| "step": 3092, |
| "tokens_trained": 1.519826616 |
| }, |
| { |
| "epoch": 0.8776682504786895, |
| "grad_norm": 9.889811515808105, |
| "loss": 4.0434, |
| "lr": 0.0006373426573426574, |
| "step": 3094, |
| "tokens_trained": 1.520805784 |
| }, |
| { |
| "epoch": 0.8782355861286434, |
| "grad_norm": 5.421345233917236, |
| "loss": 4.0237, |
| "lr": 0.0006370629370629371, |
| "step": 3096, |
| "tokens_trained": 1.521789344 |
| }, |
| { |
| "epoch": 0.8788029217785973, |
| "grad_norm": 4.9160990715026855, |
| "loss": 4.0497, |
| "lr": 0.0006367832167832168, |
| "step": 3098, |
| "tokens_trained": 1.522772664 |
| }, |
| { |
| "epoch": 0.8793702574285511, |
| "grad_norm": 8.828028678894043, |
| "loss": 4.0381, |
| "lr": 0.0006365034965034965, |
| "step": 3100, |
| "tokens_trained": 1.523755712 |
| }, |
| { |
| "epoch": 0.879937593078505, |
| "grad_norm": 5.6704182624816895, |
| "loss": 4.0017, |
| "lr": 0.0006362237762237762, |
| "step": 3102, |
| "tokens_trained": 1.52473876 |
| }, |
| { |
| "epoch": 0.880504928728459, |
| "grad_norm": 4.982235908508301, |
| "loss": 3.9826, |
| "lr": 0.0006359440559440559, |
| "step": 3104, |
| "tokens_trained": 1.52571756 |
| }, |
| { |
| "epoch": 0.8810722643784129, |
| "grad_norm": 8.639644622802734, |
| "loss": 4.0177, |
| "lr": 0.0006356643356643357, |
| "step": 3106, |
| "tokens_trained": 1.526695632 |
| }, |
| { |
| "epoch": 0.8816396000283668, |
| "grad_norm": 6.1896820068359375, |
| "loss": 4.0248, |
| "lr": 0.0006353846153846155, |
| "step": 3108, |
| "tokens_trained": 1.527678296 |
| }, |
| { |
| "epoch": 0.8822069356783206, |
| "grad_norm": 3.787477731704712, |
| "loss": 4.0489, |
| "lr": 0.0006351048951048951, |
| "step": 3110, |
| "tokens_trained": 1.528665456 |
| }, |
| { |
| "epoch": 0.8827742713282746, |
| "grad_norm": 4.418561935424805, |
| "loss": 4.0422, |
| "lr": 0.0006348251748251748, |
| "step": 3112, |
| "tokens_trained": 1.529648584 |
| }, |
| { |
| "epoch": 0.8833416069782285, |
| "grad_norm": 8.951369285583496, |
| "loss": 4.028, |
| "lr": 0.0006345454545454546, |
| "step": 3114, |
| "tokens_trained": 1.530628808 |
| }, |
| { |
| "epoch": 0.8839089426281824, |
| "grad_norm": 4.903277397155762, |
| "loss": 4.0772, |
| "lr": 0.0006342657342657343, |
| "step": 3116, |
| "tokens_trained": 1.531612144 |
| }, |
| { |
| "epoch": 0.8844762782781364, |
| "grad_norm": 4.366726875305176, |
| "loss": 3.9975, |
| "lr": 0.000633986013986014, |
| "step": 3118, |
| "tokens_trained": 1.532595304 |
| }, |
| { |
| "epoch": 0.8850436139280902, |
| "grad_norm": 6.9316911697387695, |
| "loss": 4.0019, |
| "lr": 0.0006337062937062937, |
| "step": 3120, |
| "tokens_trained": 1.533578888 |
| }, |
| { |
| "epoch": 0.8856109495780441, |
| "grad_norm": 8.896012306213379, |
| "loss": 4.04, |
| "lr": 0.0006334265734265733, |
| "step": 3122, |
| "tokens_trained": 1.534557552 |
| }, |
| { |
| "epoch": 0.886178285227998, |
| "grad_norm": 5.350147724151611, |
| "loss": 4.0229, |
| "lr": 0.0006331468531468532, |
| "step": 3124, |
| "tokens_trained": 1.535539672 |
| }, |
| { |
| "epoch": 0.8864619530529749, |
| "eval_loss": 1.007444143295288, |
| "eval_runtime": 20.5976, |
| "step": 3125, |
| "tokens_trained": 1.53603052 |
| }, |
| { |
| "epoch": 0.886745620877952, |
| "grad_norm": 5.331796646118164, |
| "loss": 4.0331, |
| "lr": 0.0006328671328671329, |
| "step": 3126, |
| "tokens_trained": 1.536525432 |
| }, |
| { |
| "epoch": 0.8873129565279059, |
| "grad_norm": 11.335051536560059, |
| "loss": 4.041, |
| "lr": 0.0006325874125874126, |
| "step": 3128, |
| "tokens_trained": 1.537508928 |
| }, |
| { |
| "epoch": 0.8878802921778597, |
| "grad_norm": 8.185080528259277, |
| "loss": 4.0299, |
| "lr": 0.0006323076923076923, |
| "step": 3130, |
| "tokens_trained": 1.53848672 |
| }, |
| { |
| "epoch": 0.8884476278278136, |
| "grad_norm": 4.136550426483154, |
| "loss": 4.0268, |
| "lr": 0.0006320279720279721, |
| "step": 3132, |
| "tokens_trained": 1.5394682 |
| }, |
| { |
| "epoch": 0.8890149634777675, |
| "grad_norm": 4.993428707122803, |
| "loss": 3.9808, |
| "lr": 0.0006317482517482518, |
| "step": 3134, |
| "tokens_trained": 1.540449416 |
| }, |
| { |
| "epoch": 0.8895822991277215, |
| "grad_norm": 5.485887050628662, |
| "loss": 4.0201, |
| "lr": 0.0006314685314685314, |
| "step": 3136, |
| "tokens_trained": 1.541436136 |
| }, |
| { |
| "epoch": 0.8901496347776754, |
| "grad_norm": 4.517815589904785, |
| "loss": 3.9985, |
| "lr": 0.0006311888111888112, |
| "step": 3138, |
| "tokens_trained": 1.542421992 |
| }, |
| { |
| "epoch": 0.8907169704276292, |
| "grad_norm": 3.8219170570373535, |
| "loss": 4.0299, |
| "lr": 0.0006309090909090908, |
| "step": 3140, |
| "tokens_trained": 1.543399648 |
| }, |
| { |
| "epoch": 0.8912843060775831, |
| "grad_norm": 7.318249702453613, |
| "loss": 4.0377, |
| "lr": 0.0006306293706293707, |
| "step": 3142, |
| "tokens_trained": 1.54438384 |
| }, |
| { |
| "epoch": 0.8918516417275371, |
| "grad_norm": 9.09650707244873, |
| "loss": 4.0572, |
| "lr": 0.0006303496503496504, |
| "step": 3144, |
| "tokens_trained": 1.545367632 |
| }, |
| { |
| "epoch": 0.892418977377491, |
| "grad_norm": 6.241589069366455, |
| "loss": 4.025, |
| "lr": 0.0006300699300699301, |
| "step": 3146, |
| "tokens_trained": 1.546355136 |
| }, |
| { |
| "epoch": 0.8929863130274449, |
| "grad_norm": 6.9915385246276855, |
| "loss": 4.0177, |
| "lr": 0.0006297902097902098, |
| "step": 3148, |
| "tokens_trained": 1.547340304 |
| }, |
| { |
| "epoch": 0.8935536486773987, |
| "grad_norm": 5.599451541900635, |
| "loss": 3.9892, |
| "lr": 0.0006295104895104896, |
| "step": 3150, |
| "tokens_trained": 1.54832164 |
| }, |
| { |
| "epoch": 0.8941209843273527, |
| "grad_norm": 7.765986442565918, |
| "loss": 4.0232, |
| "lr": 0.0006292307692307693, |
| "step": 3152, |
| "tokens_trained": 1.54930228 |
| }, |
| { |
| "epoch": 0.8946883199773066, |
| "grad_norm": 10.365357398986816, |
| "loss": 4.0254, |
| "lr": 0.0006289510489510489, |
| "step": 3154, |
| "tokens_trained": 1.550282888 |
| }, |
| { |
| "epoch": 0.8952556556272605, |
| "grad_norm": 7.8539276123046875, |
| "loss": 4.008, |
| "lr": 0.0006286713286713287, |
| "step": 3156, |
| "tokens_trained": 1.551265008 |
| }, |
| { |
| "epoch": 0.8958229912772144, |
| "grad_norm": 8.106318473815918, |
| "loss": 4.0351, |
| "lr": 0.0006283916083916083, |
| "step": 3158, |
| "tokens_trained": 1.552245928 |
| }, |
| { |
| "epoch": 0.8963903269271682, |
| "grad_norm": 10.22494125366211, |
| "loss": 3.9873, |
| "lr": 0.0006281118881118882, |
| "step": 3160, |
| "tokens_trained": 1.553227848 |
| }, |
| { |
| "epoch": 0.8969576625771222, |
| "grad_norm": 2.8810367584228516, |
| "loss": 4.0399, |
| "lr": 0.0006278321678321679, |
| "step": 3162, |
| "tokens_trained": 1.554208112 |
| }, |
| { |
| "epoch": 0.8975249982270761, |
| "grad_norm": 10.036259651184082, |
| "loss": 4.0072, |
| "lr": 0.0006275524475524475, |
| "step": 3164, |
| "tokens_trained": 1.555186496 |
| }, |
| { |
| "epoch": 0.89809233387703, |
| "grad_norm": 6.596704006195068, |
| "loss": 4.0306, |
| "lr": 0.0006272727272727273, |
| "step": 3166, |
| "tokens_trained": 1.556170896 |
| }, |
| { |
| "epoch": 0.898659669526984, |
| "grad_norm": 4.411632537841797, |
| "loss": 4.035, |
| "lr": 0.000626993006993007, |
| "step": 3168, |
| "tokens_trained": 1.55715312 |
| }, |
| { |
| "epoch": 0.8992270051769378, |
| "grad_norm": 4.391601085662842, |
| "loss": 3.9973, |
| "lr": 0.0006267132867132868, |
| "step": 3170, |
| "tokens_trained": 1.558133552 |
| }, |
| { |
| "epoch": 0.8997943408268917, |
| "grad_norm": 9.456700325012207, |
| "loss": 4.0255, |
| "lr": 0.0006264335664335664, |
| "step": 3172, |
| "tokens_trained": 1.559115752 |
| }, |
| { |
| "epoch": 0.9003616764768456, |
| "grad_norm": 8.490089416503906, |
| "loss": 4.0368, |
| "lr": 0.0006261538461538462, |
| "step": 3174, |
| "tokens_trained": 1.560095384 |
| }, |
| { |
| "epoch": 0.9009290121267995, |
| "grad_norm": 7.3357744216918945, |
| "loss": 4.0528, |
| "lr": 0.0006258741258741258, |
| "step": 3176, |
| "tokens_trained": 1.561078856 |
| }, |
| { |
| "epoch": 0.9014963477767535, |
| "grad_norm": 6.7389092445373535, |
| "loss": 4.0457, |
| "lr": 0.0006255944055944057, |
| "step": 3178, |
| "tokens_trained": 1.562063936 |
| }, |
| { |
| "epoch": 0.9020636834267073, |
| "grad_norm": 7.586348056793213, |
| "loss": 4.0516, |
| "lr": 0.0006253146853146854, |
| "step": 3180, |
| "tokens_trained": 1.5630424 |
| }, |
| { |
| "epoch": 0.9026310190766612, |
| "grad_norm": 5.646294116973877, |
| "loss": 4.0048, |
| "lr": 0.000625034965034965, |
| "step": 3182, |
| "tokens_trained": 1.564028064 |
| }, |
| { |
| "epoch": 0.9031983547266151, |
| "grad_norm": 7.30889368057251, |
| "loss": 3.9952, |
| "lr": 0.0006247552447552448, |
| "step": 3184, |
| "tokens_trained": 1.565010296 |
| }, |
| { |
| "epoch": 0.9037656903765691, |
| "grad_norm": 6.234517574310303, |
| "loss": 4.0267, |
| "lr": 0.0006244755244755245, |
| "step": 3186, |
| "tokens_trained": 1.565993536 |
| }, |
| { |
| "epoch": 0.904333026026523, |
| "grad_norm": 4.630068302154541, |
| "loss": 4.0638, |
| "lr": 0.0006241958041958043, |
| "step": 3188, |
| "tokens_trained": 1.566973648 |
| }, |
| { |
| "epoch": 0.9049003616764768, |
| "grad_norm": 10.530085563659668, |
| "loss": 4.056, |
| "lr": 0.0006239160839160839, |
| "step": 3190, |
| "tokens_trained": 1.567954192 |
| }, |
| { |
| "epoch": 0.9054676973264307, |
| "grad_norm": 6.909562110900879, |
| "loss": 4.0297, |
| "lr": 0.0006236363636363636, |
| "step": 3192, |
| "tokens_trained": 1.568941888 |
| }, |
| { |
| "epoch": 0.9060350329763847, |
| "grad_norm": 3.382798910140991, |
| "loss": 3.9554, |
| "lr": 0.0006233566433566433, |
| "step": 3194, |
| "tokens_trained": 1.569926344 |
| }, |
| { |
| "epoch": 0.9066023686263386, |
| "grad_norm": 6.318317890167236, |
| "loss": 4.0313, |
| "lr": 0.0006230769230769231, |
| "step": 3196, |
| "tokens_trained": 1.570909072 |
| }, |
| { |
| "epoch": 0.9071697042762925, |
| "grad_norm": 8.904982566833496, |
| "loss": 4.0422, |
| "lr": 0.0006227972027972028, |
| "step": 3198, |
| "tokens_trained": 1.571891864 |
| }, |
| { |
| "epoch": 0.9077370399262463, |
| "grad_norm": 4.008038520812988, |
| "loss": 4.0254, |
| "lr": 0.0006225174825174825, |
| "step": 3200, |
| "tokens_trained": 1.572877488 |
| }, |
| { |
| "epoch": 0.9083043755762003, |
| "grad_norm": 4.28498649597168, |
| "loss": 3.9916, |
| "lr": 0.0006222377622377623, |
| "step": 3202, |
| "tokens_trained": 1.57385788 |
| }, |
| { |
| "epoch": 0.9088717112261542, |
| "grad_norm": 7.385266304016113, |
| "loss": 3.9841, |
| "lr": 0.000621958041958042, |
| "step": 3204, |
| "tokens_trained": 1.574841232 |
| }, |
| { |
| "epoch": 0.9094390468761081, |
| "grad_norm": 6.1430134773254395, |
| "loss": 3.9886, |
| "lr": 0.0006216783216783217, |
| "step": 3206, |
| "tokens_trained": 1.5758212 |
| }, |
| { |
| "epoch": 0.910006382526062, |
| "grad_norm": 4.640578746795654, |
| "loss": 4.036, |
| "lr": 0.0006213986013986014, |
| "step": 3208, |
| "tokens_trained": 1.576803856 |
| }, |
| { |
| "epoch": 0.9105737181760158, |
| "grad_norm": 2.6749765872955322, |
| "loss": 3.9934, |
| "lr": 0.0006211188811188811, |
| "step": 3210, |
| "tokens_trained": 1.577788136 |
| }, |
| { |
| "epoch": 0.9111410538259698, |
| "grad_norm": 2.5117337703704834, |
| "loss": 3.9924, |
| "lr": 0.0006208391608391608, |
| "step": 3212, |
| "tokens_trained": 1.5787728 |
| }, |
| { |
| "epoch": 0.9117083894759237, |
| "grad_norm": 9.552038192749023, |
| "loss": 4.0141, |
| "lr": 0.0006205594405594406, |
| "step": 3214, |
| "tokens_trained": 1.579757576 |
| }, |
| { |
| "epoch": 0.9122757251258776, |
| "grad_norm": 4.317904949188232, |
| "loss": 4.0242, |
| "lr": 0.0006202797202797203, |
| "step": 3216, |
| "tokens_trained": 1.580737776 |
| }, |
| { |
| "epoch": 0.9128430607758315, |
| "grad_norm": 4.847869873046875, |
| "loss": 4.0037, |
| "lr": 0.00062, |
| "step": 3218, |
| "tokens_trained": 1.58172144 |
| }, |
| { |
| "epoch": 0.9134103964257854, |
| "grad_norm": 8.135149002075195, |
| "loss": 4.056, |
| "lr": 0.0006197202797202797, |
| "step": 3220, |
| "tokens_trained": 1.58270064 |
| }, |
| { |
| "epoch": 0.9139777320757393, |
| "grad_norm": 4.46032190322876, |
| "loss": 4.0037, |
| "lr": 0.0006194405594405595, |
| "step": 3222, |
| "tokens_trained": 1.58368244 |
| }, |
| { |
| "epoch": 0.9145450677256932, |
| "grad_norm": 4.710826873779297, |
| "loss": 4.0083, |
| "lr": 0.0006191608391608392, |
| "step": 3224, |
| "tokens_trained": 1.584669984 |
| }, |
| { |
| "epoch": 0.9151124033756471, |
| "grad_norm": 6.524029731750488, |
| "loss": 4.0394, |
| "lr": 0.0006188811188811189, |
| "step": 3226, |
| "tokens_trained": 1.585651952 |
| }, |
| { |
| "epoch": 0.9156797390256011, |
| "grad_norm": 8.807348251342773, |
| "loss": 4.0215, |
| "lr": 0.0006186013986013986, |
| "step": 3228, |
| "tokens_trained": 1.586634416 |
| }, |
| { |
| "epoch": 0.9162470746755549, |
| "grad_norm": 8.313971519470215, |
| "loss": 4.048, |
| "lr": 0.0006183216783216783, |
| "step": 3230, |
| "tokens_trained": 1.587616352 |
| }, |
| { |
| "epoch": 0.9168144103255088, |
| "grad_norm": 7.2862868309021, |
| "loss": 4.0326, |
| "lr": 0.0006180419580419581, |
| "step": 3232, |
| "tokens_trained": 1.588597696 |
| }, |
| { |
| "epoch": 0.9173817459754627, |
| "grad_norm": 6.1933746337890625, |
| "loss": 4.0232, |
| "lr": 0.0006177622377622377, |
| "step": 3234, |
| "tokens_trained": 1.589579384 |
| }, |
| { |
| "epoch": 0.9179490816254167, |
| "grad_norm": 6.848970890045166, |
| "loss": 4.0134, |
| "lr": 0.0006174825174825175, |
| "step": 3236, |
| "tokens_trained": 1.590563936 |
| }, |
| { |
| "epoch": 0.9185164172753706, |
| "grad_norm": 6.213261604309082, |
| "loss": 3.9622, |
| "lr": 0.0006172027972027972, |
| "step": 3238, |
| "tokens_trained": 1.591546488 |
| }, |
| { |
| "epoch": 0.9190837529253244, |
| "grad_norm": 11.642724990844727, |
| "loss": 4.0487, |
| "lr": 0.000616923076923077, |
| "step": 3240, |
| "tokens_trained": 1.592528992 |
| }, |
| { |
| "epoch": 0.9196510885752783, |
| "grad_norm": 2.465311288833618, |
| "loss": 3.9996, |
| "lr": 0.0006166433566433567, |
| "step": 3242, |
| "tokens_trained": 1.593514088 |
| }, |
| { |
| "epoch": 0.9202184242252323, |
| "grad_norm": 14.788623809814453, |
| "loss": 4.1041, |
| "lr": 0.0006163636363636364, |
| "step": 3244, |
| "tokens_trained": 1.594498768 |
| }, |
| { |
| "epoch": 0.9207857598751862, |
| "grad_norm": 11.614027976989746, |
| "loss": 3.99, |
| "lr": 0.0006160839160839161, |
| "step": 3246, |
| "tokens_trained": 1.595477496 |
| }, |
| { |
| "epoch": 0.9213530955251401, |
| "grad_norm": 8.917405128479004, |
| "loss": 4.0626, |
| "lr": 0.0006158041958041957, |
| "step": 3248, |
| "tokens_trained": 1.596459208 |
| }, |
| { |
| "epoch": 0.9219204311750939, |
| "grad_norm": 9.843046188354492, |
| "loss": 4.0256, |
| "lr": 0.0006155244755244756, |
| "step": 3250, |
| "tokens_trained": 1.59744676 |
| }, |
| { |
| "epoch": 0.9219204311750939, |
| "eval_loss": 1.0055779218673706, |
| "eval_runtime": 20.5405, |
| "step": 3250, |
| "tokens_trained": 1.59744676 |
| }, |
| { |
| "epoch": 0.9224877668250479, |
| "grad_norm": 5.153568267822266, |
| "loss": 3.9596, |
| "lr": 0.0006152447552447552, |
| "step": 3252, |
| "tokens_trained": 1.598428968 |
| }, |
| { |
| "epoch": 0.9230551024750018, |
| "grad_norm": 3.321300745010376, |
| "loss": 3.969, |
| "lr": 0.000614965034965035, |
| "step": 3254, |
| "tokens_trained": 1.599406304 |
| }, |
| { |
| "epoch": 0.9236224381249557, |
| "grad_norm": 5.910068511962891, |
| "loss": 3.9806, |
| "lr": 0.0006146853146853147, |
| "step": 3256, |
| "tokens_trained": 1.60038644 |
| }, |
| { |
| "epoch": 0.9241897737749096, |
| "grad_norm": 9.364005088806152, |
| "loss": 3.9919, |
| "lr": 0.0006144055944055945, |
| "step": 3258, |
| "tokens_trained": 1.601371288 |
| }, |
| { |
| "epoch": 0.9247571094248634, |
| "grad_norm": 9.865127563476562, |
| "loss": 3.9827, |
| "lr": 0.0006141258741258742, |
| "step": 3260, |
| "tokens_trained": 1.602351528 |
| }, |
| { |
| "epoch": 0.9253244450748174, |
| "grad_norm": 6.053020000457764, |
| "loss": 3.9769, |
| "lr": 0.0006138461538461538, |
| "step": 3262, |
| "tokens_trained": 1.603337336 |
| }, |
| { |
| "epoch": 0.9258917807247713, |
| "grad_norm": 5.632033348083496, |
| "loss": 4.061, |
| "lr": 0.0006135664335664336, |
| "step": 3264, |
| "tokens_trained": 1.6043186 |
| }, |
| { |
| "epoch": 0.9264591163747252, |
| "grad_norm": 6.253534317016602, |
| "loss": 3.9414, |
| "lr": 0.0006132867132867132, |
| "step": 3266, |
| "tokens_trained": 1.605300448 |
| }, |
| { |
| "epoch": 0.9270264520246791, |
| "grad_norm": 7.757418632507324, |
| "loss": 4.0119, |
| "lr": 0.0006130069930069931, |
| "step": 3268, |
| "tokens_trained": 1.60628376 |
| }, |
| { |
| "epoch": 0.927593787674633, |
| "grad_norm": 5.378245830535889, |
| "loss": 3.9746, |
| "lr": 0.0006127272727272727, |
| "step": 3270, |
| "tokens_trained": 1.607265384 |
| }, |
| { |
| "epoch": 0.9281611233245869, |
| "grad_norm": 5.998968124389648, |
| "loss": 4.0218, |
| "lr": 0.0006124475524475525, |
| "step": 3272, |
| "tokens_trained": 1.60824544 |
| }, |
| { |
| "epoch": 0.9287284589745408, |
| "grad_norm": 6.340670585632324, |
| "loss": 4.0204, |
| "lr": 0.0006121678321678322, |
| "step": 3274, |
| "tokens_trained": 1.609232632 |
| }, |
| { |
| "epoch": 0.9292957946244947, |
| "grad_norm": 6.357148170471191, |
| "loss": 3.9686, |
| "lr": 0.0006118881118881118, |
| "step": 3276, |
| "tokens_trained": 1.610216024 |
| }, |
| { |
| "epoch": 0.9298631302744487, |
| "grad_norm": 4.993794918060303, |
| "loss": 3.9812, |
| "lr": 0.0006116083916083917, |
| "step": 3278, |
| "tokens_trained": 1.611196872 |
| }, |
| { |
| "epoch": 0.9304304659244025, |
| "grad_norm": 7.559938430786133, |
| "loss": 4.0018, |
| "lr": 0.0006113286713286713, |
| "step": 3280, |
| "tokens_trained": 1.612184944 |
| }, |
| { |
| "epoch": 0.9309978015743564, |
| "grad_norm": 3.7233004570007324, |
| "loss": 3.9835, |
| "lr": 0.0006110489510489511, |
| "step": 3282, |
| "tokens_trained": 1.613170464 |
| }, |
| { |
| "epoch": 0.9315651372243103, |
| "grad_norm": 7.3292717933654785, |
| "loss": 3.977, |
| "lr": 0.0006107692307692307, |
| "step": 3284, |
| "tokens_trained": 1.614153168 |
| }, |
| { |
| "epoch": 0.9321324728742643, |
| "grad_norm": 8.804302215576172, |
| "loss": 3.962, |
| "lr": 0.0006104895104895106, |
| "step": 3286, |
| "tokens_trained": 1.615134208 |
| }, |
| { |
| "epoch": 0.9326998085242182, |
| "grad_norm": 5.557953834533691, |
| "loss": 3.9729, |
| "lr": 0.0006102097902097902, |
| "step": 3288, |
| "tokens_trained": 1.616116248 |
| }, |
| { |
| "epoch": 0.933267144174172, |
| "grad_norm": 5.135542869567871, |
| "loss": 3.9855, |
| "lr": 0.0006099300699300699, |
| "step": 3290, |
| "tokens_trained": 1.617100064 |
| }, |
| { |
| "epoch": 0.9338344798241259, |
| "grad_norm": 10.206086158752441, |
| "loss": 4.0058, |
| "lr": 0.0006096503496503497, |
| "step": 3292, |
| "tokens_trained": 1.61808084 |
| }, |
| { |
| "epoch": 0.9344018154740799, |
| "grad_norm": 6.490070819854736, |
| "loss": 4.0328, |
| "lr": 0.0006093706293706293, |
| "step": 3294, |
| "tokens_trained": 1.619061608 |
| }, |
| { |
| "epoch": 0.9349691511240338, |
| "grad_norm": 6.246134281158447, |
| "loss": 3.9858, |
| "lr": 0.0006090909090909092, |
| "step": 3296, |
| "tokens_trained": 1.620046896 |
| }, |
| { |
| "epoch": 0.9355364867739877, |
| "grad_norm": 6.82793664932251, |
| "loss": 3.9416, |
| "lr": 0.0006088111888111888, |
| "step": 3298, |
| "tokens_trained": 1.621030544 |
| }, |
| { |
| "epoch": 0.9361038224239415, |
| "grad_norm": 5.400341510772705, |
| "loss": 4.0048, |
| "lr": 0.0006085314685314686, |
| "step": 3300, |
| "tokens_trained": 1.622010024 |
| }, |
| { |
| "epoch": 0.9366711580738954, |
| "grad_norm": 2.7493224143981934, |
| "loss": 3.9987, |
| "lr": 0.0006082517482517482, |
| "step": 3302, |
| "tokens_trained": 1.622992736 |
| }, |
| { |
| "epoch": 0.9372384937238494, |
| "grad_norm": 8.426931381225586, |
| "loss": 4.0074, |
| "lr": 0.000607972027972028, |
| "step": 3304, |
| "tokens_trained": 1.623977336 |
| }, |
| { |
| "epoch": 0.9378058293738033, |
| "grad_norm": 6.779547691345215, |
| "loss": 4.0041, |
| "lr": 0.0006076923076923077, |
| "step": 3306, |
| "tokens_trained": 1.624958504 |
| }, |
| { |
| "epoch": 0.9383731650237572, |
| "grad_norm": 5.38230562210083, |
| "loss": 4.0297, |
| "lr": 0.0006074125874125874, |
| "step": 3308, |
| "tokens_trained": 1.625948568 |
| }, |
| { |
| "epoch": 0.938940500673711, |
| "grad_norm": 5.785275936126709, |
| "loss": 4.0112, |
| "lr": 0.0006071328671328672, |
| "step": 3310, |
| "tokens_trained": 1.626932696 |
| }, |
| { |
| "epoch": 0.939507836323665, |
| "grad_norm": 14.610711097717285, |
| "loss": 3.9558, |
| "lr": 0.0006068531468531468, |
| "step": 3312, |
| "tokens_trained": 1.62791704 |
| }, |
| { |
| "epoch": 0.9400751719736189, |
| "grad_norm": 2.3301351070404053, |
| "loss": 4.0155, |
| "lr": 0.0006065734265734267, |
| "step": 3314, |
| "tokens_trained": 1.628900096 |
| }, |
| { |
| "epoch": 0.9406425076235728, |
| "grad_norm": 17.020362854003906, |
| "loss": 4.0244, |
| "lr": 0.0006062937062937063, |
| "step": 3316, |
| "tokens_trained": 1.629885888 |
| }, |
| { |
| "epoch": 0.9412098432735267, |
| "grad_norm": 8.809579849243164, |
| "loss": 4.0622, |
| "lr": 0.000606013986013986, |
| "step": 3318, |
| "tokens_trained": 1.630868992 |
| }, |
| { |
| "epoch": 0.9417771789234806, |
| "grad_norm": 4.908751964569092, |
| "loss": 4.0464, |
| "lr": 0.0006057342657342657, |
| "step": 3320, |
| "tokens_trained": 1.631855664 |
| }, |
| { |
| "epoch": 0.9423445145734345, |
| "grad_norm": 9.65546989440918, |
| "loss": 4.013, |
| "lr": 0.0006054545454545455, |
| "step": 3322, |
| "tokens_trained": 1.632839496 |
| }, |
| { |
| "epoch": 0.9429118502233884, |
| "grad_norm": 5.595473766326904, |
| "loss": 4.0371, |
| "lr": 0.0006051748251748252, |
| "step": 3324, |
| "tokens_trained": 1.633827536 |
| }, |
| { |
| "epoch": 0.9434791858733423, |
| "grad_norm": 10.249938011169434, |
| "loss": 4.0702, |
| "lr": 0.0006048951048951049, |
| "step": 3326, |
| "tokens_trained": 1.634811888 |
| }, |
| { |
| "epoch": 0.9440465215232963, |
| "grad_norm": 12.086007118225098, |
| "loss": 4.0042, |
| "lr": 0.0006046153846153846, |
| "step": 3328, |
| "tokens_trained": 1.635792824 |
| }, |
| { |
| "epoch": 0.9446138571732501, |
| "grad_norm": 3.0745136737823486, |
| "loss": 4.0355, |
| "lr": 0.0006043356643356643, |
| "step": 3330, |
| "tokens_trained": 1.636776176 |
| }, |
| { |
| "epoch": 0.945181192823204, |
| "grad_norm": 4.060697078704834, |
| "loss": 4.0016, |
| "lr": 0.0006040559440559441, |
| "step": 3332, |
| "tokens_trained": 1.637758008 |
| }, |
| { |
| "epoch": 0.9457485284731579, |
| "grad_norm": 7.648933410644531, |
| "loss": 3.9939, |
| "lr": 0.0006037762237762238, |
| "step": 3334, |
| "tokens_trained": 1.638744408 |
| }, |
| { |
| "epoch": 0.9463158641231119, |
| "grad_norm": 5.033253192901611, |
| "loss": 4.0245, |
| "lr": 0.0006034965034965035, |
| "step": 3336, |
| "tokens_trained": 1.639724776 |
| }, |
| { |
| "epoch": 0.9468831997730658, |
| "grad_norm": 4.653557300567627, |
| "loss": 4.0169, |
| "lr": 0.0006032167832167832, |
| "step": 3338, |
| "tokens_trained": 1.640708864 |
| }, |
| { |
| "epoch": 0.9474505354230196, |
| "grad_norm": 6.682651042938232, |
| "loss": 4.0062, |
| "lr": 0.000602937062937063, |
| "step": 3340, |
| "tokens_trained": 1.641689864 |
| }, |
| { |
| "epoch": 0.9480178710729735, |
| "grad_norm": 5.059361934661865, |
| "loss": 3.9681, |
| "lr": 0.0006026573426573426, |
| "step": 3342, |
| "tokens_trained": 1.64267264 |
| }, |
| { |
| "epoch": 0.9485852067229275, |
| "grad_norm": 4.165974140167236, |
| "loss": 3.9941, |
| "lr": 0.0006023776223776224, |
| "step": 3344, |
| "tokens_trained": 1.643655624 |
| }, |
| { |
| "epoch": 0.9491525423728814, |
| "grad_norm": 6.669079780578613, |
| "loss": 4.0258, |
| "lr": 0.0006020979020979021, |
| "step": 3346, |
| "tokens_trained": 1.644635752 |
| }, |
| { |
| "epoch": 0.9497198780228353, |
| "grad_norm": 5.924664497375488, |
| "loss": 4.0589, |
| "lr": 0.0006018181818181818, |
| "step": 3348, |
| "tokens_trained": 1.64561992 |
| }, |
| { |
| "epoch": 0.9502872136727891, |
| "grad_norm": 1.662906527519226, |
| "loss": 3.9894, |
| "lr": 0.0006015384615384616, |
| "step": 3350, |
| "tokens_trained": 1.646605552 |
| }, |
| { |
| "epoch": 0.950854549322743, |
| "grad_norm": 3.1677517890930176, |
| "loss": 4.0062, |
| "lr": 0.0006012587412587413, |
| "step": 3352, |
| "tokens_trained": 1.647587824 |
| }, |
| { |
| "epoch": 0.951421884972697, |
| "grad_norm": 5.4521918296813965, |
| "loss": 4.0244, |
| "lr": 0.000600979020979021, |
| "step": 3354, |
| "tokens_trained": 1.648566792 |
| }, |
| { |
| "epoch": 0.9519892206226509, |
| "grad_norm": 7.839843273162842, |
| "loss": 3.9954, |
| "lr": 0.0006006993006993006, |
| "step": 3356, |
| "tokens_trained": 1.6495504 |
| }, |
| { |
| "epoch": 0.9525565562726048, |
| "grad_norm": 5.340535640716553, |
| "loss": 3.9915, |
| "lr": 0.0006004195804195805, |
| "step": 3358, |
| "tokens_trained": 1.65053064 |
| }, |
| { |
| "epoch": 0.9531238919225586, |
| "grad_norm": 3.9342992305755615, |
| "loss": 3.9507, |
| "lr": 0.0006001398601398601, |
| "step": 3360, |
| "tokens_trained": 1.651516704 |
| }, |
| { |
| "epoch": 0.9536912275725126, |
| "grad_norm": 3.879631519317627, |
| "loss": 4.0369, |
| "lr": 0.0005998601398601399, |
| "step": 3362, |
| "tokens_trained": 1.652501248 |
| }, |
| { |
| "epoch": 0.9542585632224665, |
| "grad_norm": 4.699181079864502, |
| "loss": 4.0151, |
| "lr": 0.0005995804195804196, |
| "step": 3364, |
| "tokens_trained": 1.653486632 |
| }, |
| { |
| "epoch": 0.9548258988724204, |
| "grad_norm": 7.259454250335693, |
| "loss": 3.9855, |
| "lr": 0.0005993006993006993, |
| "step": 3366, |
| "tokens_trained": 1.654473488 |
| }, |
| { |
| "epoch": 0.9553932345223743, |
| "grad_norm": 6.6725029945373535, |
| "loss": 3.9972, |
| "lr": 0.0005990209790209791, |
| "step": 3368, |
| "tokens_trained": 1.655456328 |
| }, |
| { |
| "epoch": 0.9559605701723282, |
| "grad_norm": 5.077842712402344, |
| "loss": 3.9706, |
| "lr": 0.0005987412587412587, |
| "step": 3370, |
| "tokens_trained": 1.656442256 |
| }, |
| { |
| "epoch": 0.9565279058222821, |
| "grad_norm": 7.882787704467773, |
| "loss": 4.0581, |
| "lr": 0.0005984615384615385, |
| "step": 3372, |
| "tokens_trained": 1.657425912 |
| }, |
| { |
| "epoch": 0.957095241472236, |
| "grad_norm": 7.118039608001709, |
| "loss": 3.9939, |
| "lr": 0.0005981818181818181, |
| "step": 3374, |
| "tokens_trained": 1.658406184 |
| }, |
| { |
| "epoch": 0.9573789092972129, |
| "eval_loss": 1.0043113231658936, |
| "eval_runtime": 20.471, |
| "step": 3375, |
| "tokens_trained": 1.658898224 |
| }, |
| { |
| "epoch": 0.9576625771221899, |
| "grad_norm": 11.206400871276855, |
| "loss": 4.0073, |
| "lr": 0.000597902097902098, |
| "step": 3376, |
| "tokens_trained": 1.65938968 |
| }, |
| { |
| "epoch": 0.9582299127721439, |
| "grad_norm": 3.2221481800079346, |
| "loss": 3.9924, |
| "lr": 0.0005976223776223776, |
| "step": 3378, |
| "tokens_trained": 1.660372856 |
| }, |
| { |
| "epoch": 0.9587972484220977, |
| "grad_norm": 15.000614166259766, |
| "loss": 4.0361, |
| "lr": 0.0005973426573426574, |
| "step": 3380, |
| "tokens_trained": 1.66135512 |
| }, |
| { |
| "epoch": 0.9593645840720516, |
| "grad_norm": 13.365633964538574, |
| "loss": 4.0258, |
| "lr": 0.0005970629370629371, |
| "step": 3382, |
| "tokens_trained": 1.662332728 |
| }, |
| { |
| "epoch": 0.9599319197220055, |
| "grad_norm": 6.362198829650879, |
| "loss": 3.9868, |
| "lr": 0.0005967832167832167, |
| "step": 3384, |
| "tokens_trained": 1.663311392 |
| }, |
| { |
| "epoch": 0.9604992553719595, |
| "grad_norm": 16.104549407958984, |
| "loss": 3.9893, |
| "lr": 0.0005965034965034966, |
| "step": 3386, |
| "tokens_trained": 1.664296088 |
| }, |
| { |
| "epoch": 0.9610665910219134, |
| "grad_norm": 32.109375, |
| "loss": 4.0635, |
| "lr": 0.0005962237762237762, |
| "step": 3388, |
| "tokens_trained": 1.665278232 |
| }, |
| { |
| "epoch": 0.9616339266718672, |
| "grad_norm": 14.814417839050293, |
| "loss": 4.0545, |
| "lr": 0.000595944055944056, |
| "step": 3390, |
| "tokens_trained": 1.666262952 |
| }, |
| { |
| "epoch": 0.9622012623218211, |
| "grad_norm": 8.69149398803711, |
| "loss": 4.0214, |
| "lr": 0.0005956643356643356, |
| "step": 3392, |
| "tokens_trained": 1.66724224 |
| }, |
| { |
| "epoch": 0.962768597971775, |
| "grad_norm": 6.150435447692871, |
| "loss": 4.0675, |
| "lr": 0.0005953846153846155, |
| "step": 3394, |
| "tokens_trained": 1.668222488 |
| }, |
| { |
| "epoch": 0.963335933621729, |
| "grad_norm": 14.53095817565918, |
| "loss": 4.0293, |
| "lr": 0.0005951048951048951, |
| "step": 3396, |
| "tokens_trained": 1.66920572 |
| }, |
| { |
| "epoch": 0.9639032692716829, |
| "grad_norm": 14.750361442565918, |
| "loss": 4.0345, |
| "lr": 0.0005948251748251748, |
| "step": 3398, |
| "tokens_trained": 1.670191456 |
| }, |
| { |
| "epoch": 0.9644706049216367, |
| "grad_norm": 10.563243865966797, |
| "loss": 4.0796, |
| "lr": 0.0005945454545454546, |
| "step": 3400, |
| "tokens_trained": 1.671174992 |
| }, |
| { |
| "epoch": 0.9650379405715906, |
| "grad_norm": 14.203415870666504, |
| "loss": 4.0078, |
| "lr": 0.0005942657342657342, |
| "step": 3402, |
| "tokens_trained": 1.672159048 |
| }, |
| { |
| "epoch": 0.9656052762215446, |
| "grad_norm": 7.918346405029297, |
| "loss": 4.0015, |
| "lr": 0.0005939860139860141, |
| "step": 3404, |
| "tokens_trained": 1.6731408 |
| }, |
| { |
| "epoch": 0.9661726118714985, |
| "grad_norm": 3.3628811836242676, |
| "loss": 4.0656, |
| "lr": 0.0005937062937062937, |
| "step": 3406, |
| "tokens_trained": 1.674120472 |
| }, |
| { |
| "epoch": 0.9667399475214524, |
| "grad_norm": 13.740876197814941, |
| "loss": 4.0296, |
| "lr": 0.0005934265734265735, |
| "step": 3408, |
| "tokens_trained": 1.67510176 |
| }, |
| { |
| "epoch": 0.9673072831714062, |
| "grad_norm": 8.178666114807129, |
| "loss": 3.9804, |
| "lr": 0.0005931468531468531, |
| "step": 3410, |
| "tokens_trained": 1.676087336 |
| }, |
| { |
| "epoch": 0.9678746188213602, |
| "grad_norm": 6.31284761428833, |
| "loss": 3.9905, |
| "lr": 0.000592867132867133, |
| "step": 3412, |
| "tokens_trained": 1.677069328 |
| }, |
| { |
| "epoch": 0.9684419544713141, |
| "grad_norm": 10.166040420532227, |
| "loss": 3.9962, |
| "lr": 0.0005925874125874126, |
| "step": 3414, |
| "tokens_trained": 1.678049672 |
| }, |
| { |
| "epoch": 0.969009290121268, |
| "grad_norm": 6.166718006134033, |
| "loss": 3.9966, |
| "lr": 0.0005923076923076923, |
| "step": 3416, |
| "tokens_trained": 1.679035104 |
| }, |
| { |
| "epoch": 0.969576625771222, |
| "grad_norm": 3.7397615909576416, |
| "loss": 4.0323, |
| "lr": 0.0005920279720279721, |
| "step": 3418, |
| "tokens_trained": 1.680018424 |
| }, |
| { |
| "epoch": 0.9701439614211758, |
| "grad_norm": 12.122432708740234, |
| "loss": 4.0143, |
| "lr": 0.0005917482517482517, |
| "step": 3420, |
| "tokens_trained": 1.681001112 |
| }, |
| { |
| "epoch": 0.9707112970711297, |
| "grad_norm": 5.118746280670166, |
| "loss": 3.9909, |
| "lr": 0.0005914685314685316, |
| "step": 3422, |
| "tokens_trained": 1.681987648 |
| }, |
| { |
| "epoch": 0.9712786327210836, |
| "grad_norm": 5.810860633850098, |
| "loss": 3.9675, |
| "lr": 0.0005911888111888112, |
| "step": 3424, |
| "tokens_trained": 1.68296972 |
| }, |
| { |
| "epoch": 0.9718459683710375, |
| "grad_norm": 7.637686252593994, |
| "loss": 3.9976, |
| "lr": 0.0005909090909090909, |
| "step": 3426, |
| "tokens_trained": 1.683952 |
| }, |
| { |
| "epoch": 0.9724133040209915, |
| "grad_norm": 5.637698173522949, |
| "loss": 3.9829, |
| "lr": 0.0005906293706293706, |
| "step": 3428, |
| "tokens_trained": 1.684933912 |
| }, |
| { |
| "epoch": 0.9729806396709453, |
| "grad_norm": 2.2650809288024902, |
| "loss": 3.9656, |
| "lr": 0.0005903496503496504, |
| "step": 3430, |
| "tokens_trained": 1.685915176 |
| }, |
| { |
| "epoch": 0.9735479753208992, |
| "grad_norm": 6.0117058753967285, |
| "loss": 4.0575, |
| "lr": 0.0005900699300699301, |
| "step": 3432, |
| "tokens_trained": 1.686901184 |
| }, |
| { |
| "epoch": 0.9741153109708531, |
| "grad_norm": 8.301697731018066, |
| "loss": 3.9869, |
| "lr": 0.0005897902097902098, |
| "step": 3434, |
| "tokens_trained": 1.687886888 |
| }, |
| { |
| "epoch": 0.9746826466208071, |
| "grad_norm": 6.436981678009033, |
| "loss": 4.01, |
| "lr": 0.0005895104895104896, |
| "step": 3436, |
| "tokens_trained": 1.68886904 |
| }, |
| { |
| "epoch": 0.975249982270761, |
| "grad_norm": 4.290571212768555, |
| "loss": 3.9953, |
| "lr": 0.0005892307692307692, |
| "step": 3438, |
| "tokens_trained": 1.689850264 |
| }, |
| { |
| "epoch": 0.9758173179207148, |
| "grad_norm": 4.618532657623291, |
| "loss": 3.9995, |
| "lr": 0.000588951048951049, |
| "step": 3440, |
| "tokens_trained": 1.69083728 |
| }, |
| { |
| "epoch": 0.9763846535706687, |
| "grad_norm": 8.481820106506348, |
| "loss": 4.0019, |
| "lr": 0.0005886713286713287, |
| "step": 3442, |
| "tokens_trained": 1.691819976 |
| }, |
| { |
| "epoch": 0.9769519892206227, |
| "grad_norm": 4.643980503082275, |
| "loss": 3.9974, |
| "lr": 0.0005883916083916084, |
| "step": 3444, |
| "tokens_trained": 1.692803784 |
| }, |
| { |
| "epoch": 0.9775193248705766, |
| "grad_norm": 6.828413009643555, |
| "loss": 3.9886, |
| "lr": 0.0005881118881118881, |
| "step": 3446, |
| "tokens_trained": 1.69378512 |
| }, |
| { |
| "epoch": 0.9780866605205305, |
| "grad_norm": 7.530898094177246, |
| "loss": 4.0318, |
| "lr": 0.0005878321678321679, |
| "step": 3448, |
| "tokens_trained": 1.694768152 |
| }, |
| { |
| "epoch": 0.9786539961704843, |
| "grad_norm": 6.020658493041992, |
| "loss": 4.0057, |
| "lr": 0.0005875524475524476, |
| "step": 3450, |
| "tokens_trained": 1.695752832 |
| }, |
| { |
| "epoch": 0.9792213318204382, |
| "grad_norm": 5.292300224304199, |
| "loss": 3.9915, |
| "lr": 0.0005872727272727273, |
| "step": 3452, |
| "tokens_trained": 1.696735104 |
| }, |
| { |
| "epoch": 0.9797886674703922, |
| "grad_norm": 4.932474613189697, |
| "loss": 4.0163, |
| "lr": 0.0005869930069930069, |
| "step": 3454, |
| "tokens_trained": 1.697718208 |
| }, |
| { |
| "epoch": 0.9803560031203461, |
| "grad_norm": 4.504141807556152, |
| "loss": 3.9875, |
| "lr": 0.0005867132867132867, |
| "step": 3456, |
| "tokens_trained": 1.698697752 |
| }, |
| { |
| "epoch": 0.9809233387703, |
| "grad_norm": 4.826939582824707, |
| "loss": 3.9326, |
| "lr": 0.0005864335664335665, |
| "step": 3458, |
| "tokens_trained": 1.699672392 |
| }, |
| { |
| "epoch": 0.9814906744202538, |
| "grad_norm": 7.805232524871826, |
| "loss": 3.9695, |
| "lr": 0.0005861538461538462, |
| "step": 3460, |
| "tokens_trained": 1.700656392 |
| }, |
| { |
| "epoch": 0.9820580100702078, |
| "grad_norm": 6.857801914215088, |
| "loss": 3.995, |
| "lr": 0.0005858741258741259, |
| "step": 3462, |
| "tokens_trained": 1.701644848 |
| }, |
| { |
| "epoch": 0.9826253457201617, |
| "grad_norm": 4.32315731048584, |
| "loss": 3.9701, |
| "lr": 0.0005855944055944055, |
| "step": 3464, |
| "tokens_trained": 1.702624688 |
| }, |
| { |
| "epoch": 0.9831926813701156, |
| "grad_norm": 6.007495880126953, |
| "loss": 3.9887, |
| "lr": 0.0005853146853146854, |
| "step": 3466, |
| "tokens_trained": 1.703607376 |
| }, |
| { |
| "epoch": 0.9837600170200695, |
| "grad_norm": 4.779850006103516, |
| "loss": 3.9852, |
| "lr": 0.000585034965034965, |
| "step": 3468, |
| "tokens_trained": 1.704589808 |
| }, |
| { |
| "epoch": 0.9843273526700234, |
| "grad_norm": 4.593331336975098, |
| "loss": 4.0136, |
| "lr": 0.0005847552447552448, |
| "step": 3470, |
| "tokens_trained": 1.705573184 |
| }, |
| { |
| "epoch": 0.9848946883199773, |
| "grad_norm": 5.466218948364258, |
| "loss": 3.9426, |
| "lr": 0.0005844755244755244, |
| "step": 3472, |
| "tokens_trained": 1.706555864 |
| }, |
| { |
| "epoch": 0.9854620239699312, |
| "grad_norm": 8.283979415893555, |
| "loss": 3.9788, |
| "lr": 0.0005841958041958042, |
| "step": 3474, |
| "tokens_trained": 1.70754036 |
| }, |
| { |
| "epoch": 0.9860293596198851, |
| "grad_norm": 2.4386069774627686, |
| "loss": 3.9413, |
| "lr": 0.000583916083916084, |
| "step": 3476, |
| "tokens_trained": 1.708525528 |
| }, |
| { |
| "epoch": 0.9865966952698391, |
| "grad_norm": 4.485580921173096, |
| "loss": 3.9695, |
| "lr": 0.0005836363636363636, |
| "step": 3478, |
| "tokens_trained": 1.709508232 |
| }, |
| { |
| "epoch": 0.9871640309197929, |
| "grad_norm": 6.725922584533691, |
| "loss": 4.0084, |
| "lr": 0.0005833566433566434, |
| "step": 3480, |
| "tokens_trained": 1.710493288 |
| }, |
| { |
| "epoch": 0.9877313665697468, |
| "grad_norm": 5.532742023468018, |
| "loss": 3.9571, |
| "lr": 0.000583076923076923, |
| "step": 3482, |
| "tokens_trained": 1.711478792 |
| }, |
| { |
| "epoch": 0.9882987022197007, |
| "grad_norm": 5.568683624267578, |
| "loss": 4.0178, |
| "lr": 0.0005827972027972029, |
| "step": 3484, |
| "tokens_trained": 1.712464864 |
| }, |
| { |
| "epoch": 0.9888660378696547, |
| "grad_norm": 5.192487716674805, |
| "loss": 4.0294, |
| "lr": 0.0005825174825174825, |
| "step": 3486, |
| "tokens_trained": 1.713448256 |
| }, |
| { |
| "epoch": 0.9894333735196086, |
| "grad_norm": 5.584596633911133, |
| "loss": 3.9992, |
| "lr": 0.0005822377622377623, |
| "step": 3488, |
| "tokens_trained": 1.714435472 |
| }, |
| { |
| "epoch": 0.9900007091695624, |
| "grad_norm": 5.044432163238525, |
| "loss": 4.0119, |
| "lr": 0.0005819580419580419, |
| "step": 3490, |
| "tokens_trained": 1.715418784 |
| }, |
| { |
| "epoch": 0.9905680448195163, |
| "grad_norm": 3.4799540042877197, |
| "loss": 4.0099, |
| "lr": 0.0005816783216783216, |
| "step": 3492, |
| "tokens_trained": 1.716402544 |
| }, |
| { |
| "epoch": 0.9911353804694703, |
| "grad_norm": 4.949790000915527, |
| "loss": 3.9372, |
| "lr": 0.0005813986013986015, |
| "step": 3494, |
| "tokens_trained": 1.71738848 |
| }, |
| { |
| "epoch": 0.9917027161194242, |
| "grad_norm": 6.527776718139648, |
| "loss": 3.9938, |
| "lr": 0.0005811188811188811, |
| "step": 3496, |
| "tokens_trained": 1.718371984 |
| }, |
| { |
| "epoch": 0.9922700517693781, |
| "grad_norm": 5.616584300994873, |
| "loss": 3.9352, |
| "lr": 0.0005808391608391609, |
| "step": 3498, |
| "tokens_trained": 1.719358256 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "grad_norm": 7.028440952301025, |
| "loss": 3.9494, |
| "lr": 0.0005805594405594405, |
| "step": 3500, |
| "tokens_trained": 1.720339264 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "eval_loss": 0.999991238117218, |
| "eval_runtime": 20.318, |
| "step": 3500, |
| "tokens_trained": 1.720339264 |
| }, |
| { |
| "epoch": 0.9934047230692858, |
| "grad_norm": 5.338140487670898, |
| "loss": 3.9748, |
| "lr": 0.0005802797202797204, |
| "step": 3502, |
| "tokens_trained": 1.72132272 |
| }, |
| { |
| "epoch": 0.9939720587192398, |
| "grad_norm": 3.3448476791381836, |
| "loss": 3.96, |
| "lr": 0.00058, |
| "step": 3504, |
| "tokens_trained": 1.722307576 |
| }, |
| { |
| "epoch": 0.9945393943691937, |
| "grad_norm": 10.660968780517578, |
| "loss": 4.0199, |
| "lr": 0.0005797202797202797, |
| "step": 3506, |
| "tokens_trained": 1.723288472 |
| }, |
| { |
| "epoch": 0.9951067300191476, |
| "grad_norm": 7.261615753173828, |
| "loss": 3.9889, |
| "lr": 0.0005794405594405594, |
| "step": 3508, |
| "tokens_trained": 1.724272744 |
| }, |
| { |
| "epoch": 0.9956740656691014, |
| "grad_norm": 5.103553295135498, |
| "loss": 4.0047, |
| "lr": 0.0005791608391608391, |
| "step": 3510, |
| "tokens_trained": 1.725255576 |
| }, |
| { |
| "epoch": 0.9962414013190554, |
| "grad_norm": 1.5151104927062988, |
| "loss": 4.0228, |
| "lr": 0.000578881118881119, |
| "step": 3512, |
| "tokens_trained": 1.72624092 |
| }, |
| { |
| "epoch": 0.9968087369690093, |
| "grad_norm": 6.042428493499756, |
| "loss": 3.9699, |
| "lr": 0.0005786013986013986, |
| "step": 3514, |
| "tokens_trained": 1.727227176 |
| }, |
| { |
| "epoch": 0.9973760726189632, |
| "grad_norm": 10.020720481872559, |
| "loss": 3.9961, |
| "lr": 0.0005783216783216784, |
| "step": 3516, |
| "tokens_trained": 1.728205072 |
| }, |
| { |
| "epoch": 0.9979434082689171, |
| "grad_norm": 9.385619163513184, |
| "loss": 3.9962, |
| "lr": 0.000578041958041958, |
| "step": 3518, |
| "tokens_trained": 1.729187536 |
| }, |
| { |
| "epoch": 0.998510743918871, |
| "grad_norm": 1.413792371749878, |
| "loss": 4.0256, |
| "lr": 0.0005777622377622377, |
| "step": 3520, |
| "tokens_trained": 1.730168968 |
| }, |
| { |
| "epoch": 0.9990780795688249, |
| "grad_norm": 2.8461780548095703, |
| "loss": 3.9616, |
| "lr": 0.0005774825174825175, |
| "step": 3522, |
| "tokens_trained": 1.731150472 |
| }, |
| { |
| "epoch": 0.9996454152187788, |
| "grad_norm": 4.164590835571289, |
| "loss": 3.9786, |
| "lr": 0.0005772027972027972, |
| "step": 3524, |
| "tokens_trained": 1.732130536 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.0116016864776611, |
| "loss": 2.5007, |
| "lr": 0.0005769230769230769, |
| "step": 3526, |
| "tokens_trained": 1.732744968 |
| }, |
| { |
| "epoch": 1.0005673356499538, |
| "grad_norm": 5.954165458679199, |
| "loss": 3.9598, |
| "lr": 0.0005766433566433566, |
| "step": 3528, |
| "tokens_trained": 1.733727424 |
| }, |
| { |
| "epoch": 1.0011346712999079, |
| "grad_norm": 8.648826599121094, |
| "loss": 3.9773, |
| "lr": 0.0005763636363636365, |
| "step": 3530, |
| "tokens_trained": 1.734708184 |
| }, |
| { |
| "epoch": 1.0017020069498617, |
| "grad_norm": 2.920509099960327, |
| "loss": 3.9745, |
| "lr": 0.0005760839160839161, |
| "step": 3532, |
| "tokens_trained": 1.735688616 |
| }, |
| { |
| "epoch": 1.0022693425998157, |
| "grad_norm": 9.963903427124023, |
| "loss": 3.9742, |
| "lr": 0.0005758041958041958, |
| "step": 3534, |
| "tokens_trained": 1.73667084 |
| }, |
| { |
| "epoch": 1.0028366782497695, |
| "grad_norm": 9.745009422302246, |
| "loss": 4.028, |
| "lr": 0.0005755244755244755, |
| "step": 3536, |
| "tokens_trained": 1.737656328 |
| }, |
| { |
| "epoch": 1.0034040138997233, |
| "grad_norm": 5.159154891967773, |
| "loss": 3.9812, |
| "lr": 0.0005752447552447552, |
| "step": 3538, |
| "tokens_trained": 1.738637688 |
| }, |
| { |
| "epoch": 1.0039713495496774, |
| "grad_norm": 10.829404830932617, |
| "loss": 3.9795, |
| "lr": 0.000574965034965035, |
| "step": 3540, |
| "tokens_trained": 1.739621688 |
| }, |
| { |
| "epoch": 1.0045386851996312, |
| "grad_norm": 8.493478775024414, |
| "loss": 3.9918, |
| "lr": 0.0005746853146853147, |
| "step": 3542, |
| "tokens_trained": 1.740604488 |
| }, |
| { |
| "epoch": 1.0051060208495852, |
| "grad_norm": 4.013627529144287, |
| "loss": 3.9928, |
| "lr": 0.0005744055944055944, |
| "step": 3544, |
| "tokens_trained": 1.74158764 |
| }, |
| { |
| "epoch": 1.005673356499539, |
| "grad_norm": 12.669920921325684, |
| "loss": 4.0114, |
| "lr": 0.0005741258741258741, |
| "step": 3546, |
| "tokens_trained": 1.742573592 |
| }, |
| { |
| "epoch": 1.0062406921494929, |
| "grad_norm": 6.349422931671143, |
| "loss": 4.0294, |
| "lr": 0.000573846153846154, |
| "step": 3548, |
| "tokens_trained": 1.743555672 |
| }, |
| { |
| "epoch": 1.006808027799447, |
| "grad_norm": 4.14855432510376, |
| "loss": 3.9963, |
| "lr": 0.0005735664335664336, |
| "step": 3550, |
| "tokens_trained": 1.744538384 |
| }, |
| { |
| "epoch": 1.0073753634494007, |
| "grad_norm": 9.063926696777344, |
| "loss": 3.9557, |
| "lr": 0.0005732867132867133, |
| "step": 3552, |
| "tokens_trained": 1.745523552 |
| }, |
| { |
| "epoch": 1.0079426990993547, |
| "grad_norm": 11.227505683898926, |
| "loss": 4.0087, |
| "lr": 0.000573006993006993, |
| "step": 3554, |
| "tokens_trained": 1.746510024 |
| }, |
| { |
| "epoch": 1.0085100347493086, |
| "grad_norm": 2.418097972869873, |
| "loss": 3.9942, |
| "lr": 0.0005727272727272727, |
| "step": 3556, |
| "tokens_trained": 1.747493048 |
| }, |
| { |
| "epoch": 1.0090773703992624, |
| "grad_norm": 14.376424789428711, |
| "loss": 3.999, |
| "lr": 0.0005724475524475525, |
| "step": 3558, |
| "tokens_trained": 1.748476808 |
| }, |
| { |
| "epoch": 1.0096447060492164, |
| "grad_norm": 9.035455703735352, |
| "loss": 4.063, |
| "lr": 0.0005721678321678322, |
| "step": 3560, |
| "tokens_trained": 1.749460504 |
| }, |
| { |
| "epoch": 1.0102120416991702, |
| "grad_norm": 3.8785758018493652, |
| "loss": 4.0269, |
| "lr": 0.0005718881118881118, |
| "step": 3562, |
| "tokens_trained": 1.750438936 |
| }, |
| { |
| "epoch": 1.0107793773491243, |
| "grad_norm": 15.488290786743164, |
| "loss": 4.0294, |
| "lr": 0.0005716083916083916, |
| "step": 3564, |
| "tokens_trained": 1.751420168 |
| }, |
| { |
| "epoch": 1.011346712999078, |
| "grad_norm": 10.785538673400879, |
| "loss": 4.0102, |
| "lr": 0.0005713286713286714, |
| "step": 3566, |
| "tokens_trained": 1.752405288 |
| }, |
| { |
| "epoch": 1.011914048649032, |
| "grad_norm": 5.724320888519287, |
| "loss": 4.0148, |
| "lr": 0.0005710489510489511, |
| "step": 3568, |
| "tokens_trained": 1.75338604 |
| }, |
| { |
| "epoch": 1.012481384298986, |
| "grad_norm": 11.051252365112305, |
| "loss": 4.022, |
| "lr": 0.0005707692307692308, |
| "step": 3570, |
| "tokens_trained": 1.75436632 |
| }, |
| { |
| "epoch": 1.0130487199489397, |
| "grad_norm": 10.290446281433105, |
| "loss": 3.9781, |
| "lr": 0.0005704895104895105, |
| "step": 3572, |
| "tokens_trained": 1.755349944 |
| }, |
| { |
| "epoch": 1.0136160555988938, |
| "grad_norm": 4.81416130065918, |
| "loss": 4.0393, |
| "lr": 0.0005702097902097902, |
| "step": 3574, |
| "tokens_trained": 1.756337976 |
| }, |
| { |
| "epoch": 1.0141833912488476, |
| "grad_norm": 14.237113952636719, |
| "loss": 4.087, |
| "lr": 0.0005699300699300699, |
| "step": 3576, |
| "tokens_trained": 1.75732372 |
| }, |
| { |
| "epoch": 1.0147507268988014, |
| "grad_norm": 3.973662853240967, |
| "loss": 3.9692, |
| "lr": 0.0005696503496503497, |
| "step": 3578, |
| "tokens_trained": 1.7583098 |
| }, |
| { |
| "epoch": 1.0153180625487555, |
| "grad_norm": 5.629733562469482, |
| "loss": 4.0003, |
| "lr": 0.0005693706293706293, |
| "step": 3580, |
| "tokens_trained": 1.759300416 |
| }, |
| { |
| "epoch": 1.0158853981987093, |
| "grad_norm": 7.505983352661133, |
| "loss": 4.011, |
| "lr": 0.0005690909090909091, |
| "step": 3582, |
| "tokens_trained": 1.760288632 |
| }, |
| { |
| "epoch": 1.0164527338486633, |
| "grad_norm": 5.501095294952393, |
| "loss": 3.994, |
| "lr": 0.0005688111888111889, |
| "step": 3584, |
| "tokens_trained": 1.761270328 |
| }, |
| { |
| "epoch": 1.0170200694986171, |
| "grad_norm": 4.74052619934082, |
| "loss": 4.0241, |
| "lr": 0.0005685314685314686, |
| "step": 3586, |
| "tokens_trained": 1.762252432 |
| }, |
| { |
| "epoch": 1.017587405148571, |
| "grad_norm": 8.409584045410156, |
| "loss": 4.0137, |
| "lr": 0.0005682517482517483, |
| "step": 3588, |
| "tokens_trained": 1.76323772 |
| }, |
| { |
| "epoch": 1.018154740798525, |
| "grad_norm": 5.391080379486084, |
| "loss": 3.9424, |
| "lr": 0.0005679720279720279, |
| "step": 3590, |
| "tokens_trained": 1.764220272 |
| }, |
| { |
| "epoch": 1.0187220764484788, |
| "grad_norm": 4.679509162902832, |
| "loss": 3.9893, |
| "lr": 0.0005676923076923077, |
| "step": 3592, |
| "tokens_trained": 1.765203832 |
| }, |
| { |
| "epoch": 1.0192894120984328, |
| "grad_norm": 5.354970932006836, |
| "loss": 4.023, |
| "lr": 0.0005674125874125874, |
| "step": 3594, |
| "tokens_trained": 1.76618936 |
| }, |
| { |
| "epoch": 1.0198567477483866, |
| "grad_norm": 5.1085357666015625, |
| "loss": 3.9995, |
| "lr": 0.0005671328671328672, |
| "step": 3596, |
| "tokens_trained": 1.767171216 |
| }, |
| { |
| "epoch": 1.0204240833983405, |
| "grad_norm": 3.0856151580810547, |
| "loss": 4.0084, |
| "lr": 0.0005668531468531468, |
| "step": 3598, |
| "tokens_trained": 1.76815464 |
| }, |
| { |
| "epoch": 1.0209914190482945, |
| "grad_norm": 2.330599308013916, |
| "loss": 3.9838, |
| "lr": 0.0005665734265734265, |
| "step": 3600, |
| "tokens_trained": 1.76913612 |
| }, |
| { |
| "epoch": 1.0215587546982483, |
| "grad_norm": 5.641542434692383, |
| "loss": 3.951, |
| "lr": 0.0005662937062937064, |
| "step": 3602, |
| "tokens_trained": 1.770119592 |
| }, |
| { |
| "epoch": 1.0221260903482023, |
| "grad_norm": 8.442550659179688, |
| "loss": 4.0088, |
| "lr": 0.000566013986013986, |
| "step": 3604, |
| "tokens_trained": 1.771103624 |
| }, |
| { |
| "epoch": 1.0226934259981562, |
| "grad_norm": 6.0125732421875, |
| "loss": 4.0243, |
| "lr": 0.0005657342657342658, |
| "step": 3606, |
| "tokens_trained": 1.772091496 |
| }, |
| { |
| "epoch": 1.02326076164811, |
| "grad_norm": 4.9415388107299805, |
| "loss": 3.9874, |
| "lr": 0.0005654545454545454, |
| "step": 3608, |
| "tokens_trained": 1.77307708 |
| }, |
| { |
| "epoch": 1.023828097298064, |
| "grad_norm": 5.762909889221191, |
| "loss": 4.0242, |
| "lr": 0.0005651748251748252, |
| "step": 3610, |
| "tokens_trained": 1.774058032 |
| }, |
| { |
| "epoch": 1.0243954329480178, |
| "grad_norm": 6.652433395385742, |
| "loss": 3.9908, |
| "lr": 0.0005648951048951049, |
| "step": 3612, |
| "tokens_trained": 1.775036512 |
| }, |
| { |
| "epoch": 1.0249627685979719, |
| "grad_norm": 3.539031505584717, |
| "loss": 3.9406, |
| "lr": 0.0005646153846153847, |
| "step": 3614, |
| "tokens_trained": 1.776021656 |
| }, |
| { |
| "epoch": 1.0255301042479257, |
| "grad_norm": 6.829031467437744, |
| "loss": 3.9839, |
| "lr": 0.0005643356643356643, |
| "step": 3616, |
| "tokens_trained": 1.777000824 |
| }, |
| { |
| "epoch": 1.0260974398978795, |
| "grad_norm": 3.46431040763855, |
| "loss": 4.0013, |
| "lr": 0.000564055944055944, |
| "step": 3618, |
| "tokens_trained": 1.777983504 |
| }, |
| { |
| "epoch": 1.0266647755478335, |
| "grad_norm": 5.163998126983643, |
| "loss": 3.9898, |
| "lr": 0.0005637762237762239, |
| "step": 3620, |
| "tokens_trained": 1.778966368 |
| }, |
| { |
| "epoch": 1.0272321111977873, |
| "grad_norm": 4.270689010620117, |
| "loss": 3.9868, |
| "lr": 0.0005634965034965035, |
| "step": 3622, |
| "tokens_trained": 1.77994468 |
| }, |
| { |
| "epoch": 1.0277994468477414, |
| "grad_norm": 5.297236442565918, |
| "loss": 3.9903, |
| "lr": 0.0005632167832167833, |
| "step": 3624, |
| "tokens_trained": 1.7809246 |
| }, |
| { |
| "epoch": 1.0280831146727183, |
| "eval_loss": 0.9977753162384033, |
| "eval_runtime": 20.5557, |
| "step": 3625, |
| "tokens_trained": 1.781418056 |
| }, |
| { |
| "epoch": 1.0283667824976952, |
| "grad_norm": 4.560519218444824, |
| "loss": 3.9339, |
| "lr": 0.0005629370629370629, |
| "step": 3626, |
| "tokens_trained": 1.781910808 |
| }, |
| { |
| "epoch": 1.028934118147649, |
| "grad_norm": 3.7894208431243896, |
| "loss": 3.9739, |
| "lr": 0.0005626573426573426, |
| "step": 3628, |
| "tokens_trained": 1.782891912 |
| }, |
| { |
| "epoch": 1.029501453797603, |
| "grad_norm": 3.9937522411346436, |
| "loss": 3.9734, |
| "lr": 0.0005623776223776224, |
| "step": 3630, |
| "tokens_trained": 1.783871032 |
| }, |
| { |
| "epoch": 1.0300687894475569, |
| "grad_norm": 5.798377990722656, |
| "loss": 3.9526, |
| "lr": 0.0005620979020979021, |
| "step": 3632, |
| "tokens_trained": 1.784855792 |
| }, |
| { |
| "epoch": 1.030636125097511, |
| "grad_norm": 3.2532927989959717, |
| "loss": 3.9237, |
| "lr": 0.0005618181818181818, |
| "step": 3634, |
| "tokens_trained": 1.785835216 |
| }, |
| { |
| "epoch": 1.0312034607474647, |
| "grad_norm": 3.2262985706329346, |
| "loss": 3.9676, |
| "lr": 0.0005615384615384615, |
| "step": 3636, |
| "tokens_trained": 1.78682184 |
| }, |
| { |
| "epoch": 1.0317707963974185, |
| "grad_norm": 2.4307727813720703, |
| "loss": 3.9376, |
| "lr": 0.0005612587412587414, |
| "step": 3638, |
| "tokens_trained": 1.787804536 |
| }, |
| { |
| "epoch": 1.0323381320473726, |
| "grad_norm": 11.10562515258789, |
| "loss": 4.0096, |
| "lr": 0.000560979020979021, |
| "step": 3640, |
| "tokens_trained": 1.788785152 |
| }, |
| { |
| "epoch": 1.0329054676973264, |
| "grad_norm": 8.139045715332031, |
| "loss": 3.992, |
| "lr": 0.0005606993006993008, |
| "step": 3642, |
| "tokens_trained": 1.789766736 |
| }, |
| { |
| "epoch": 1.0334728033472804, |
| "grad_norm": 5.561949729919434, |
| "loss": 3.9368, |
| "lr": 0.0005604195804195804, |
| "step": 3644, |
| "tokens_trained": 1.790746488 |
| }, |
| { |
| "epoch": 1.0340401389972342, |
| "grad_norm": 6.812232494354248, |
| "loss": 4.0185, |
| "lr": 0.0005601398601398601, |
| "step": 3646, |
| "tokens_trained": 1.79172608 |
| }, |
| { |
| "epoch": 1.034607474647188, |
| "grad_norm": 6.200248718261719, |
| "loss": 3.9072, |
| "lr": 0.0005598601398601399, |
| "step": 3648, |
| "tokens_trained": 1.792710784 |
| }, |
| { |
| "epoch": 1.035174810297142, |
| "grad_norm": 5.059606075286865, |
| "loss": 3.9334, |
| "lr": 0.0005595804195804196, |
| "step": 3650, |
| "tokens_trained": 1.793692736 |
| }, |
| { |
| "epoch": 1.035742145947096, |
| "grad_norm": 2.722522020339966, |
| "loss": 3.9438, |
| "lr": 0.0005593006993006993, |
| "step": 3652, |
| "tokens_trained": 1.79467536 |
| }, |
| { |
| "epoch": 1.03630948159705, |
| "grad_norm": 5.643895626068115, |
| "loss": 4.0213, |
| "lr": 0.000559020979020979, |
| "step": 3654, |
| "tokens_trained": 1.795662048 |
| }, |
| { |
| "epoch": 1.0368768172470038, |
| "grad_norm": 3.948822021484375, |
| "loss": 4.0022, |
| "lr": 0.0005587412587412589, |
| "step": 3656, |
| "tokens_trained": 1.79664468 |
| }, |
| { |
| "epoch": 1.0374441528969576, |
| "grad_norm": 2.5267179012298584, |
| "loss": 3.9655, |
| "lr": 0.0005584615384615385, |
| "step": 3658, |
| "tokens_trained": 1.7976262 |
| }, |
| { |
| "epoch": 1.0380114885469116, |
| "grad_norm": 2.7988510131835938, |
| "loss": 4.0161, |
| "lr": 0.0005581818181818182, |
| "step": 3660, |
| "tokens_trained": 1.79861132 |
| }, |
| { |
| "epoch": 1.0385788241968654, |
| "grad_norm": 8.685417175292969, |
| "loss": 4.0038, |
| "lr": 0.0005579020979020979, |
| "step": 3662, |
| "tokens_trained": 1.799592384 |
| }, |
| { |
| "epoch": 1.0391461598468195, |
| "grad_norm": 8.391874313354492, |
| "loss": 3.9519, |
| "lr": 0.0005576223776223776, |
| "step": 3664, |
| "tokens_trained": 1.800577208 |
| }, |
| { |
| "epoch": 1.0397134954967733, |
| "grad_norm": 7.6766815185546875, |
| "loss": 4.0119, |
| "lr": 0.0005573426573426574, |
| "step": 3666, |
| "tokens_trained": 1.801559128 |
| }, |
| { |
| "epoch": 1.040280831146727, |
| "grad_norm": 6.230587959289551, |
| "loss": 3.9528, |
| "lr": 0.0005570629370629371, |
| "step": 3668, |
| "tokens_trained": 1.802540608 |
| }, |
| { |
| "epoch": 1.0408481667966811, |
| "grad_norm": 7.4818010330200195, |
| "loss": 3.9532, |
| "lr": 0.0005567832167832167, |
| "step": 3670, |
| "tokens_trained": 1.80352688 |
| }, |
| { |
| "epoch": 1.041415502446635, |
| "grad_norm": 7.714044094085693, |
| "loss": 4.0154, |
| "lr": 0.0005565034965034965, |
| "step": 3672, |
| "tokens_trained": 1.804515736 |
| }, |
| { |
| "epoch": 1.041982838096589, |
| "grad_norm": 5.260356426239014, |
| "loss": 3.9931, |
| "lr": 0.0005562237762237763, |
| "step": 3674, |
| "tokens_trained": 1.805497152 |
| }, |
| { |
| "epoch": 1.0425501737465428, |
| "grad_norm": 4.576403617858887, |
| "loss": 4.0345, |
| "lr": 0.000555944055944056, |
| "step": 3676, |
| "tokens_trained": 1.806479328 |
| }, |
| { |
| "epoch": 1.0431175093964966, |
| "grad_norm": 3.378896713256836, |
| "loss": 3.9827, |
| "lr": 0.0005556643356643357, |
| "step": 3678, |
| "tokens_trained": 1.807459232 |
| }, |
| { |
| "epoch": 1.0436848450464506, |
| "grad_norm": 6.739299774169922, |
| "loss": 3.9811, |
| "lr": 0.0005553846153846154, |
| "step": 3680, |
| "tokens_trained": 1.808441944 |
| }, |
| { |
| "epoch": 1.0442521806964045, |
| "grad_norm": 4.965353012084961, |
| "loss": 3.9292, |
| "lr": 0.0005551048951048951, |
| "step": 3682, |
| "tokens_trained": 1.809423488 |
| }, |
| { |
| "epoch": 1.0448195163463585, |
| "grad_norm": 7.479167461395264, |
| "loss": 3.9386, |
| "lr": 0.0005548251748251748, |
| "step": 3684, |
| "tokens_trained": 1.810409008 |
| }, |
| { |
| "epoch": 1.0453868519963123, |
| "grad_norm": 3.754814863204956, |
| "loss": 3.9936, |
| "lr": 0.0005545454545454546, |
| "step": 3686, |
| "tokens_trained": 1.811387856 |
| }, |
| { |
| "epoch": 1.0459541876462661, |
| "grad_norm": 5.744228839874268, |
| "loss": 3.9761, |
| "lr": 0.0005542657342657342, |
| "step": 3688, |
| "tokens_trained": 1.812371104 |
| }, |
| { |
| "epoch": 1.0465215232962202, |
| "grad_norm": 5.926168918609619, |
| "loss": 3.904, |
| "lr": 0.000553986013986014, |
| "step": 3690, |
| "tokens_trained": 1.813356456 |
| }, |
| { |
| "epoch": 1.047088858946174, |
| "grad_norm": 5.209751605987549, |
| "loss": 3.9706, |
| "lr": 0.0005537062937062938, |
| "step": 3692, |
| "tokens_trained": 1.81434056 |
| }, |
| { |
| "epoch": 1.047656194596128, |
| "grad_norm": 4.979823112487793, |
| "loss": 3.972, |
| "lr": 0.0005534265734265735, |
| "step": 3694, |
| "tokens_trained": 1.815319936 |
| }, |
| { |
| "epoch": 1.0482235302460818, |
| "grad_norm": 5.393070220947266, |
| "loss": 3.9694, |
| "lr": 0.0005531468531468532, |
| "step": 3696, |
| "tokens_trained": 1.816299016 |
| }, |
| { |
| "epoch": 1.0487908658960357, |
| "grad_norm": 3.27998423576355, |
| "loss": 3.9706, |
| "lr": 0.0005528671328671328, |
| "step": 3698, |
| "tokens_trained": 1.817284696 |
| }, |
| { |
| "epoch": 1.0493582015459897, |
| "grad_norm": 6.364100456237793, |
| "loss": 3.9803, |
| "lr": 0.0005525874125874126, |
| "step": 3700, |
| "tokens_trained": 1.818268736 |
| }, |
| { |
| "epoch": 1.0499255371959435, |
| "grad_norm": 6.063296794891357, |
| "loss": 3.9761, |
| "lr": 0.0005523076923076923, |
| "step": 3702, |
| "tokens_trained": 1.819255432 |
| }, |
| { |
| "epoch": 1.0504928728458975, |
| "grad_norm": 6.279892444610596, |
| "loss": 3.9792, |
| "lr": 0.0005520279720279721, |
| "step": 3704, |
| "tokens_trained": 1.820241704 |
| }, |
| { |
| "epoch": 1.0510602084958514, |
| "grad_norm": 3.804609537124634, |
| "loss": 3.9763, |
| "lr": 0.0005517482517482517, |
| "step": 3706, |
| "tokens_trained": 1.821226584 |
| }, |
| { |
| "epoch": 1.0516275441458052, |
| "grad_norm": 5.056581497192383, |
| "loss": 3.9886, |
| "lr": 0.0005514685314685315, |
| "step": 3708, |
| "tokens_trained": 1.822208432 |
| }, |
| { |
| "epoch": 1.0521948797957592, |
| "grad_norm": 2.052483081817627, |
| "loss": 3.9485, |
| "lr": 0.0005511888111888111, |
| "step": 3710, |
| "tokens_trained": 1.823195928 |
| }, |
| { |
| "epoch": 1.052762215445713, |
| "grad_norm": 6.076491832733154, |
| "loss": 4.0132, |
| "lr": 0.0005509090909090909, |
| "step": 3712, |
| "tokens_trained": 1.824178568 |
| }, |
| { |
| "epoch": 1.053329551095667, |
| "grad_norm": 7.526022434234619, |
| "loss": 3.9478, |
| "lr": 0.0005506293706293707, |
| "step": 3714, |
| "tokens_trained": 1.82516128 |
| }, |
| { |
| "epoch": 1.0538968867456209, |
| "grad_norm": 2.7086679935455322, |
| "loss": 3.9913, |
| "lr": 0.0005503496503496503, |
| "step": 3716, |
| "tokens_trained": 1.826142864 |
| }, |
| { |
| "epoch": 1.0544642223955747, |
| "grad_norm": 1.7643057107925415, |
| "loss": 3.9813, |
| "lr": 0.0005500699300699301, |
| "step": 3718, |
| "tokens_trained": 1.82712608 |
| }, |
| { |
| "epoch": 1.0550315580455287, |
| "grad_norm": 6.2813029289245605, |
| "loss": 3.9772, |
| "lr": 0.0005497902097902098, |
| "step": 3720, |
| "tokens_trained": 1.828107616 |
| }, |
| { |
| "epoch": 1.0555988936954825, |
| "grad_norm": 7.591973781585693, |
| "loss": 3.938, |
| "lr": 0.0005495104895104896, |
| "step": 3722, |
| "tokens_trained": 1.82909308 |
| }, |
| { |
| "epoch": 1.0561662293454366, |
| "grad_norm": 4.976797580718994, |
| "loss": 3.9889, |
| "lr": 0.0005492307692307692, |
| "step": 3724, |
| "tokens_trained": 1.830079168 |
| }, |
| { |
| "epoch": 1.0567335649953904, |
| "grad_norm": 5.417744159698486, |
| "loss": 4.0039, |
| "lr": 0.0005489510489510489, |
| "step": 3726, |
| "tokens_trained": 1.831062488 |
| }, |
| { |
| "epoch": 1.0573009006453442, |
| "grad_norm": 4.516066074371338, |
| "loss": 3.9845, |
| "lr": 0.0005486713286713286, |
| "step": 3728, |
| "tokens_trained": 1.832046528 |
| }, |
| { |
| "epoch": 1.0578682362952982, |
| "grad_norm": 3.677839756011963, |
| "loss": 3.9446, |
| "lr": 0.0005483916083916084, |
| "step": 3730, |
| "tokens_trained": 1.83303104 |
| }, |
| { |
| "epoch": 1.058435571945252, |
| "grad_norm": 5.22024393081665, |
| "loss": 3.9746, |
| "lr": 0.0005481118881118882, |
| "step": 3732, |
| "tokens_trained": 1.834017736 |
| }, |
| { |
| "epoch": 1.059002907595206, |
| "grad_norm": 7.4156060218811035, |
| "loss": 3.9898, |
| "lr": 0.0005478321678321678, |
| "step": 3734, |
| "tokens_trained": 1.8349996 |
| }, |
| { |
| "epoch": 1.05957024324516, |
| "grad_norm": 3.472533702850342, |
| "loss": 3.9558, |
| "lr": 0.0005475524475524476, |
| "step": 3736, |
| "tokens_trained": 1.835979152 |
| }, |
| { |
| "epoch": 1.0601375788951137, |
| "grad_norm": 2.4360055923461914, |
| "loss": 3.9627, |
| "lr": 0.0005472727272727273, |
| "step": 3738, |
| "tokens_trained": 1.836963416 |
| }, |
| { |
| "epoch": 1.0607049145450678, |
| "grad_norm": 4.8988728523254395, |
| "loss": 3.9492, |
| "lr": 0.000546993006993007, |
| "step": 3740, |
| "tokens_trained": 1.83794088 |
| }, |
| { |
| "epoch": 1.0612722501950216, |
| "grad_norm": 5.711161136627197, |
| "loss": 4.002, |
| "lr": 0.0005467132867132867, |
| "step": 3742, |
| "tokens_trained": 1.838924456 |
| }, |
| { |
| "epoch": 1.0618395858449756, |
| "grad_norm": 4.373830318450928, |
| "loss": 3.9811, |
| "lr": 0.0005464335664335664, |
| "step": 3744, |
| "tokens_trained": 1.839902072 |
| }, |
| { |
| "epoch": 1.0624069214949294, |
| "grad_norm": 3.2446751594543457, |
| "loss": 3.9551, |
| "lr": 0.0005461538461538461, |
| "step": 3746, |
| "tokens_trained": 1.840882688 |
| }, |
| { |
| "epoch": 1.0629742571448832, |
| "grad_norm": 3.3250389099121094, |
| "loss": 3.9556, |
| "lr": 0.0005458741258741259, |
| "step": 3748, |
| "tokens_trained": 1.841863816 |
| }, |
| { |
| "epoch": 1.0635415927948373, |
| "grad_norm": 7.377841949462891, |
| "loss": 4.0118, |
| "lr": 0.0005455944055944057, |
| "step": 3750, |
| "tokens_trained": 1.842844072 |
| }, |
| { |
| "epoch": 1.0635415927948373, |
| "eval_loss": 0.994845449924469, |
| "eval_runtime": 20.2191, |
| "step": 3750, |
| "tokens_trained": 1.842844072 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 750, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|