| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.7225164175033569, |
| "eval_runtime": 2.4692, |
| "eval_samples_per_second": 80.998, |
| "eval_steps_per_second": 10.125, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.5052716732025146, |
| "eval_runtime": 2.4705, |
| "eval_samples_per_second": 80.955, |
| "eval_steps_per_second": 10.119, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.4475671350955963, |
| "eval_runtime": 2.4731, |
| "eval_samples_per_second": 80.871, |
| "eval_steps_per_second": 10.109, |
| "step": 75 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.4104467034339905, |
| "eval_runtime": 2.4763, |
| "eval_samples_per_second": 80.765, |
| "eval_steps_per_second": 10.096, |
| "step": 100 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.39656341075897217, |
| "eval_runtime": 2.4766, |
| "eval_samples_per_second": 80.755, |
| "eval_steps_per_second": 10.094, |
| "step": 125 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.36905747652053833, |
| "eval_runtime": 2.4774, |
| "eval_samples_per_second": 80.731, |
| "eval_steps_per_second": 10.091, |
| "step": 150 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.362547367811203, |
| "eval_runtime": 2.4792, |
| "eval_samples_per_second": 80.672, |
| "eval_steps_per_second": 10.084, |
| "step": 175 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.3497239053249359, |
| "eval_runtime": 2.4872, |
| "eval_samples_per_second": 80.413, |
| "eval_steps_per_second": 10.052, |
| "step": 200 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.3523653447628021, |
| "eval_runtime": 2.4878, |
| "eval_samples_per_second": 80.392, |
| "eval_steps_per_second": 10.049, |
| "step": 225 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.33985191583633423, |
| "eval_runtime": 2.4877, |
| "eval_samples_per_second": 80.395, |
| "eval_steps_per_second": 10.049, |
| "step": 250 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.3415805399417877, |
| "eval_runtime": 2.4896, |
| "eval_samples_per_second": 80.333, |
| "eval_steps_per_second": 10.042, |
| "step": 275 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.33282220363616943, |
| "eval_runtime": 2.49, |
| "eval_samples_per_second": 80.321, |
| "eval_steps_per_second": 10.04, |
| "step": 300 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.33800217509269714, |
| "eval_runtime": 2.491, |
| "eval_samples_per_second": 80.289, |
| "eval_steps_per_second": 10.036, |
| "step": 325 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.3342490792274475, |
| "eval_runtime": 2.491, |
| "eval_samples_per_second": 80.288, |
| "eval_steps_per_second": 10.036, |
| "step": 350 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.34117013216018677, |
| "eval_runtime": 2.4915, |
| "eval_samples_per_second": 80.273, |
| "eval_steps_per_second": 10.034, |
| "step": 375 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.3388213813304901, |
| "eval_runtime": 2.4936, |
| "eval_samples_per_second": 80.205, |
| "eval_steps_per_second": 10.026, |
| "step": 400 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.319783091545105, |
| "eval_runtime": 2.4927, |
| "eval_samples_per_second": 80.234, |
| "eval_steps_per_second": 10.029, |
| "step": 425 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.3183771073818207, |
| "eval_runtime": 2.4921, |
| "eval_samples_per_second": 80.253, |
| "eval_steps_per_second": 10.032, |
| "step": 450 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.31770122051239014, |
| "eval_runtime": 2.4935, |
| "eval_samples_per_second": 80.21, |
| "eval_steps_per_second": 10.026, |
| "step": 475 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.8955293893814087, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.4631, |
| "step": 500 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.31926316022872925, |
| "eval_runtime": 2.4537, |
| "eval_samples_per_second": 81.51, |
| "eval_steps_per_second": 10.189, |
| "step": 500 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.3148637115955353, |
| "eval_runtime": 2.4822, |
| "eval_samples_per_second": 80.572, |
| "eval_steps_per_second": 10.072, |
| "step": 525 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.31756889820098877, |
| "eval_runtime": 2.4835, |
| "eval_samples_per_second": 80.531, |
| "eval_steps_per_second": 10.066, |
| "step": 550 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.3171720504760742, |
| "eval_runtime": 2.4904, |
| "eval_samples_per_second": 80.309, |
| "eval_steps_per_second": 10.039, |
| "step": 575 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.3179776072502136, |
| "eval_runtime": 2.4915, |
| "eval_samples_per_second": 80.273, |
| "eval_steps_per_second": 10.034, |
| "step": 600 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.3148040473461151, |
| "eval_runtime": 2.4928, |
| "eval_samples_per_second": 80.232, |
| "eval_steps_per_second": 10.029, |
| "step": 625 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.3072579503059387, |
| "eval_runtime": 2.4926, |
| "eval_samples_per_second": 80.239, |
| "eval_steps_per_second": 10.03, |
| "step": 650 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.3129171133041382, |
| "eval_runtime": 2.4936, |
| "eval_samples_per_second": 80.206, |
| "eval_steps_per_second": 10.026, |
| "step": 675 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.3081643283367157, |
| "eval_runtime": 2.4941, |
| "eval_samples_per_second": 80.188, |
| "eval_steps_per_second": 10.024, |
| "step": 700 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.3064418435096741, |
| "eval_runtime": 2.4964, |
| "eval_samples_per_second": 80.116, |
| "eval_steps_per_second": 10.014, |
| "step": 725 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.30982914566993713, |
| "eval_runtime": 2.4963, |
| "eval_samples_per_second": 80.12, |
| "eval_steps_per_second": 10.015, |
| "step": 750 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 0.3063754439353943, |
| "eval_runtime": 2.4956, |
| "eval_samples_per_second": 80.142, |
| "eval_steps_per_second": 10.018, |
| "step": 775 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.3113013207912445, |
| "eval_runtime": 2.4972, |
| "eval_samples_per_second": 80.089, |
| "eval_steps_per_second": 10.011, |
| "step": 800 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 0.30704861879348755, |
| "eval_runtime": 2.4962, |
| "eval_samples_per_second": 80.122, |
| "eval_steps_per_second": 10.015, |
| "step": 825 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 0.2988375425338745, |
| "eval_runtime": 2.4974, |
| "eval_samples_per_second": 80.083, |
| "eval_steps_per_second": 10.01, |
| "step": 850 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_loss": 0.3142584264278412, |
| "eval_runtime": 2.4958, |
| "eval_samples_per_second": 80.135, |
| "eval_steps_per_second": 10.017, |
| "step": 875 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_loss": 0.3032761514186859, |
| "eval_runtime": 2.4976, |
| "eval_samples_per_second": 80.077, |
| "eval_steps_per_second": 10.01, |
| "step": 900 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_loss": 0.30415403842926025, |
| "eval_runtime": 2.4973, |
| "eval_samples_per_second": 80.087, |
| "eval_steps_per_second": 10.011, |
| "step": 925 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_loss": 0.30165913701057434, |
| "eval_runtime": 2.4982, |
| "eval_samples_per_second": 80.057, |
| "eval_steps_per_second": 10.007, |
| "step": 950 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 0.3017444908618927, |
| "eval_runtime": 2.4977, |
| "eval_samples_per_second": 80.072, |
| "eval_steps_per_second": 10.009, |
| "step": 975 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 2.4287023544311523, |
| "learning_rate": 1.2e-05, |
| "loss": 0.3457, |
| "step": 1000 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 0.3025864064693451, |
| "eval_runtime": 2.4539, |
| "eval_samples_per_second": 81.504, |
| "eval_steps_per_second": 10.188, |
| "step": 1000 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_loss": 0.30045461654663086, |
| "eval_runtime": 2.4819, |
| "eval_samples_per_second": 80.584, |
| "eval_steps_per_second": 10.073, |
| "step": 1025 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_loss": 0.30064135789871216, |
| "eval_runtime": 2.4896, |
| "eval_samples_per_second": 80.334, |
| "eval_steps_per_second": 10.042, |
| "step": 1050 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_loss": 0.29575350880622864, |
| "eval_runtime": 2.4903, |
| "eval_samples_per_second": 80.312, |
| "eval_steps_per_second": 10.039, |
| "step": 1075 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_loss": 0.30160149931907654, |
| "eval_runtime": 2.4926, |
| "eval_samples_per_second": 80.239, |
| "eval_steps_per_second": 10.03, |
| "step": 1100 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_loss": 0.30429255962371826, |
| "eval_runtime": 2.4921, |
| "eval_samples_per_second": 80.255, |
| "eval_steps_per_second": 10.032, |
| "step": 1125 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_loss": 0.3015859127044678, |
| "eval_runtime": 2.4927, |
| "eval_samples_per_second": 80.236, |
| "eval_steps_per_second": 10.029, |
| "step": 1150 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_loss": 0.29914650321006775, |
| "eval_runtime": 2.4947, |
| "eval_samples_per_second": 80.171, |
| "eval_steps_per_second": 10.021, |
| "step": 1175 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_loss": 0.2971905469894409, |
| "eval_runtime": 2.4958, |
| "eval_samples_per_second": 80.134, |
| "eval_steps_per_second": 10.017, |
| "step": 1200 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_loss": 0.29176658391952515, |
| "eval_runtime": 2.4963, |
| "eval_samples_per_second": 80.118, |
| "eval_steps_per_second": 10.015, |
| "step": 1225 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 0.2934282720088959, |
| "eval_runtime": 2.4976, |
| "eval_samples_per_second": 80.076, |
| "eval_steps_per_second": 10.01, |
| "step": 1250 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_loss": 0.2918751835823059, |
| "eval_runtime": 2.4964, |
| "eval_samples_per_second": 80.115, |
| "eval_steps_per_second": 10.014, |
| "step": 1275 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_loss": 0.2914879620075226, |
| "eval_runtime": 2.4977, |
| "eval_samples_per_second": 80.075, |
| "eval_steps_per_second": 10.009, |
| "step": 1300 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_loss": 0.2925909757614136, |
| "eval_runtime": 2.4975, |
| "eval_samples_per_second": 80.081, |
| "eval_steps_per_second": 10.01, |
| "step": 1325 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_loss": 0.2940743565559387, |
| "eval_runtime": 2.4974, |
| "eval_samples_per_second": 80.085, |
| "eval_steps_per_second": 10.011, |
| "step": 1350 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_loss": 0.2973780930042267, |
| "eval_runtime": 2.4989, |
| "eval_samples_per_second": 80.037, |
| "eval_steps_per_second": 10.005, |
| "step": 1375 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_loss": 0.2954687178134918, |
| "eval_runtime": 2.498, |
| "eval_samples_per_second": 80.065, |
| "eval_steps_per_second": 10.008, |
| "step": 1400 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_loss": 0.29051879048347473, |
| "eval_runtime": 2.4979, |
| "eval_samples_per_second": 80.069, |
| "eval_steps_per_second": 10.009, |
| "step": 1425 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_loss": 0.29731473326683044, |
| "eval_runtime": 2.4993, |
| "eval_samples_per_second": 80.023, |
| "eval_steps_per_second": 10.003, |
| "step": 1450 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_loss": 0.2933524250984192, |
| "eval_runtime": 2.4988, |
| "eval_samples_per_second": 80.04, |
| "eval_steps_per_second": 10.005, |
| "step": 1475 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 1.120781421661377, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.3291, |
| "step": 1500 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_loss": 0.2888854742050171, |
| "eval_runtime": 2.456, |
| "eval_samples_per_second": 81.434, |
| "eval_steps_per_second": 10.179, |
| "step": 1500 |
| }, |
| { |
| "epoch": 61.0, |
| "eval_loss": 0.2901514172554016, |
| "eval_runtime": 2.4865, |
| "eval_samples_per_second": 80.436, |
| "eval_steps_per_second": 10.054, |
| "step": 1525 |
| }, |
| { |
| "epoch": 62.0, |
| "eval_loss": 0.29295194149017334, |
| "eval_runtime": 2.4921, |
| "eval_samples_per_second": 80.253, |
| "eval_steps_per_second": 10.032, |
| "step": 1550 |
| }, |
| { |
| "epoch": 63.0, |
| "eval_loss": 0.29049646854400635, |
| "eval_runtime": 2.4924, |
| "eval_samples_per_second": 80.244, |
| "eval_steps_per_second": 10.031, |
| "step": 1575 |
| }, |
| { |
| "epoch": 64.0, |
| "eval_loss": 0.2913173735141754, |
| "eval_runtime": 2.4934, |
| "eval_samples_per_second": 80.212, |
| "eval_steps_per_second": 10.026, |
| "step": 1600 |
| }, |
| { |
| "epoch": 65.0, |
| "eval_loss": 0.28798389434814453, |
| "eval_runtime": 2.4945, |
| "eval_samples_per_second": 80.176, |
| "eval_steps_per_second": 10.022, |
| "step": 1625 |
| }, |
| { |
| "epoch": 66.0, |
| "eval_loss": 0.28929680585861206, |
| "eval_runtime": 2.4963, |
| "eval_samples_per_second": 80.118, |
| "eval_steps_per_second": 10.015, |
| "step": 1650 |
| }, |
| { |
| "epoch": 67.0, |
| "eval_loss": 0.2856563925743103, |
| "eval_runtime": 2.4957, |
| "eval_samples_per_second": 80.139, |
| "eval_steps_per_second": 10.017, |
| "step": 1675 |
| }, |
| { |
| "epoch": 68.0, |
| "eval_loss": 0.2869837284088135, |
| "eval_runtime": 2.496, |
| "eval_samples_per_second": 80.13, |
| "eval_steps_per_second": 10.016, |
| "step": 1700 |
| }, |
| { |
| "epoch": 69.0, |
| "eval_loss": 0.2902255356311798, |
| "eval_runtime": 2.4969, |
| "eval_samples_per_second": 80.098, |
| "eval_steps_per_second": 10.012, |
| "step": 1725 |
| }, |
| { |
| "epoch": 70.0, |
| "eval_loss": 0.28557059168815613, |
| "eval_runtime": 2.4957, |
| "eval_samples_per_second": 80.139, |
| "eval_steps_per_second": 10.017, |
| "step": 1750 |
| }, |
| { |
| "epoch": 71.0, |
| "eval_loss": 0.2883276343345642, |
| "eval_runtime": 2.4986, |
| "eval_samples_per_second": 80.045, |
| "eval_steps_per_second": 10.006, |
| "step": 1775 |
| }, |
| { |
| "epoch": 72.0, |
| "eval_loss": 0.286774605512619, |
| "eval_runtime": 2.4979, |
| "eval_samples_per_second": 80.068, |
| "eval_steps_per_second": 10.009, |
| "step": 1800 |
| }, |
| { |
| "epoch": 73.0, |
| "eval_loss": 0.28692272305488586, |
| "eval_runtime": 2.4977, |
| "eval_samples_per_second": 80.073, |
| "eval_steps_per_second": 10.009, |
| "step": 1825 |
| }, |
| { |
| "epoch": 74.0, |
| "eval_loss": 0.2842114567756653, |
| "eval_runtime": 2.4982, |
| "eval_samples_per_second": 80.058, |
| "eval_steps_per_second": 10.007, |
| "step": 1850 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_loss": 0.2869337201118469, |
| "eval_runtime": 2.4984, |
| "eval_samples_per_second": 80.052, |
| "eval_steps_per_second": 10.007, |
| "step": 1875 |
| }, |
| { |
| "epoch": 76.0, |
| "eval_loss": 0.2843911647796631, |
| "eval_runtime": 2.498, |
| "eval_samples_per_second": 80.064, |
| "eval_steps_per_second": 10.008, |
| "step": 1900 |
| }, |
| { |
| "epoch": 77.0, |
| "eval_loss": 0.28588855266571045, |
| "eval_runtime": 2.4985, |
| "eval_samples_per_second": 80.047, |
| "eval_steps_per_second": 10.006, |
| "step": 1925 |
| }, |
| { |
| "epoch": 78.0, |
| "eval_loss": 0.2864097058773041, |
| "eval_runtime": 2.4994, |
| "eval_samples_per_second": 80.02, |
| "eval_steps_per_second": 10.002, |
| "step": 1950 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_loss": 0.28731438517570496, |
| "eval_runtime": 2.4981, |
| "eval_samples_per_second": 80.061, |
| "eval_steps_per_second": 10.008, |
| "step": 1975 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 1.4072085618972778, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.3199, |
| "step": 2000 |
| }, |
| { |
| "epoch": 80.0, |
| "eval_loss": 0.2888672947883606, |
| "eval_runtime": 2.4574, |
| "eval_samples_per_second": 81.386, |
| "eval_steps_per_second": 10.173, |
| "step": 2000 |
| }, |
| { |
| "epoch": 81.0, |
| "eval_loss": 0.28676241636276245, |
| "eval_runtime": 2.4848, |
| "eval_samples_per_second": 80.49, |
| "eval_steps_per_second": 10.061, |
| "step": 2025 |
| }, |
| { |
| "epoch": 82.0, |
| "eval_loss": 0.28567585349082947, |
| "eval_runtime": 2.4917, |
| "eval_samples_per_second": 80.268, |
| "eval_steps_per_second": 10.033, |
| "step": 2050 |
| }, |
| { |
| "epoch": 83.0, |
| "eval_loss": 0.2843726873397827, |
| "eval_runtime": 2.4926, |
| "eval_samples_per_second": 80.236, |
| "eval_steps_per_second": 10.03, |
| "step": 2075 |
| }, |
| { |
| "epoch": 84.0, |
| "eval_loss": 0.28754809498786926, |
| "eval_runtime": 2.4936, |
| "eval_samples_per_second": 80.205, |
| "eval_steps_per_second": 10.026, |
| "step": 2100 |
| }, |
| { |
| "epoch": 85.0, |
| "eval_loss": 0.2854582667350769, |
| "eval_runtime": 2.4941, |
| "eval_samples_per_second": 80.188, |
| "eval_steps_per_second": 10.023, |
| "step": 2125 |
| }, |
| { |
| "epoch": 86.0, |
| "eval_loss": 0.2840147316455841, |
| "eval_runtime": 2.4944, |
| "eval_samples_per_second": 80.181, |
| "eval_steps_per_second": 10.023, |
| "step": 2150 |
| }, |
| { |
| "epoch": 87.0, |
| "eval_loss": 0.28520676493644714, |
| "eval_runtime": 2.4963, |
| "eval_samples_per_second": 80.119, |
| "eval_steps_per_second": 10.015, |
| "step": 2175 |
| }, |
| { |
| "epoch": 88.0, |
| "eval_loss": 0.28196609020233154, |
| "eval_runtime": 2.4972, |
| "eval_samples_per_second": 80.091, |
| "eval_steps_per_second": 10.011, |
| "step": 2200 |
| }, |
| { |
| "epoch": 89.0, |
| "eval_loss": 0.28386008739471436, |
| "eval_runtime": 2.4982, |
| "eval_samples_per_second": 80.057, |
| "eval_steps_per_second": 10.007, |
| "step": 2225 |
| }, |
| { |
| "epoch": 90.0, |
| "eval_loss": 0.2850269675254822, |
| "eval_runtime": 2.4975, |
| "eval_samples_per_second": 80.079, |
| "eval_steps_per_second": 10.01, |
| "step": 2250 |
| }, |
| { |
| "epoch": 91.0, |
| "eval_loss": 0.28362977504730225, |
| "eval_runtime": 2.4982, |
| "eval_samples_per_second": 80.057, |
| "eval_steps_per_second": 10.007, |
| "step": 2275 |
| }, |
| { |
| "epoch": 92.0, |
| "eval_loss": 0.2840833365917206, |
| "eval_runtime": 2.4978, |
| "eval_samples_per_second": 80.07, |
| "eval_steps_per_second": 10.009, |
| "step": 2300 |
| }, |
| { |
| "epoch": 93.0, |
| "eval_loss": 0.28477975726127625, |
| "eval_runtime": 2.4997, |
| "eval_samples_per_second": 80.01, |
| "eval_steps_per_second": 10.001, |
| "step": 2325 |
| }, |
| { |
| "epoch": 94.0, |
| "eval_loss": 0.2831202745437622, |
| "eval_runtime": 2.4995, |
| "eval_samples_per_second": 80.016, |
| "eval_steps_per_second": 10.002, |
| "step": 2350 |
| }, |
| { |
| "epoch": 95.0, |
| "eval_loss": 0.28298699855804443, |
| "eval_runtime": 2.5008, |
| "eval_samples_per_second": 79.975, |
| "eval_steps_per_second": 9.997, |
| "step": 2375 |
| }, |
| { |
| "epoch": 96.0, |
| "eval_loss": 0.2848021984100342, |
| "eval_runtime": 2.4987, |
| "eval_samples_per_second": 80.041, |
| "eval_steps_per_second": 10.005, |
| "step": 2400 |
| }, |
| { |
| "epoch": 97.0, |
| "eval_loss": 0.2818942368030548, |
| "eval_runtime": 2.4999, |
| "eval_samples_per_second": 80.004, |
| "eval_steps_per_second": 10.0, |
| "step": 2425 |
| }, |
| { |
| "epoch": 98.0, |
| "eval_loss": 0.28425753116607666, |
| "eval_runtime": 2.5005, |
| "eval_samples_per_second": 79.982, |
| "eval_steps_per_second": 9.998, |
| "step": 2450 |
| }, |
| { |
| "epoch": 99.0, |
| "eval_loss": 0.28273478150367737, |
| "eval_runtime": 2.4987, |
| "eval_samples_per_second": 80.042, |
| "eval_steps_per_second": 10.005, |
| "step": 2475 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 1.4653393030166626, |
| "learning_rate": 0.0, |
| "loss": 0.3157, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "total_flos": 1306483752960000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|