| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0004975, |
| "loss": 1.8757, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.000495, |
| "loss": 1.8758, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0004925, |
| "loss": 1.9307, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00049, |
| "loss": 1.9338, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0004875, |
| "loss": 1.8599, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00048499999999999997, |
| "loss": 1.9875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0004825, |
| "loss": 1.9947, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00048, |
| "loss": 1.9015, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0004775, |
| "loss": 1.8941, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000475, |
| "loss": 1.8592, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0004725, |
| "loss": 1.8977, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00047, |
| "loss": 1.886, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00046750000000000003, |
| "loss": 1.9486, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.000465, |
| "loss": 1.8669, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0004625, |
| "loss": 1.936, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00046, |
| "loss": 1.8385, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0004575, |
| "loss": 1.8045, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.000455, |
| "loss": 1.9058, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00045250000000000005, |
| "loss": 1.868, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 1.8055, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00044750000000000004, |
| "loss": 1.849, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00044500000000000003, |
| "loss": 1.869, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004425, |
| "loss": 1.8587, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00044, |
| "loss": 1.9206, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004375, |
| "loss": 1.8406, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000435, |
| "loss": 1.8721, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004325, |
| "loss": 1.9409, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00043, |
| "loss": 1.9222, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004275, |
| "loss": 1.8705, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.000425, |
| "loss": 1.9348, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00042249999999999997, |
| "loss": 1.8167, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00042, |
| "loss": 1.8904, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004175, |
| "loss": 1.8545, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.000415, |
| "loss": 1.8448, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004125, |
| "loss": 1.8898, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00041, |
| "loss": 1.8338, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004075, |
| "loss": 1.8246, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00040500000000000003, |
| "loss": 1.8754, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004025, |
| "loss": 1.8603, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004, |
| "loss": 1.799, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0003975, |
| "loss": 1.8652, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.000395, |
| "loss": 1.8406, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0003925, |
| "loss": 1.8341, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00039000000000000005, |
| "loss": 1.9399, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00038750000000000004, |
| "loss": 1.8095, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00038500000000000003, |
| "loss": 1.8286, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00038250000000000003, |
| "loss": 1.8846, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00038, |
| "loss": 1.8101, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0003775, |
| "loss": 1.8791, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.000375, |
| "loss": 1.8181, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0003725, |
| "loss": 1.8555, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00037, |
| "loss": 1.8328, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0003675, |
| "loss": 1.814, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.000365, |
| "loss": 1.8647, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0003625, |
| "loss": 1.8754, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00035999999999999997, |
| "loss": 1.8184, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0003575, |
| "loss": 1.8879, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.000355, |
| "loss": 1.8329, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0003525, |
| "loss": 1.7787, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00035, |
| "loss": 1.7543, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0003475, |
| "loss": 1.7782, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.000345, |
| "loss": 1.8857, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00034250000000000003, |
| "loss": 1.7608, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00034, |
| "loss": 1.8622, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0003375, |
| "loss": 1.7055, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.000335, |
| "loss": 1.7356, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0003325, |
| "loss": 1.8353, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00033, |
| "loss": 1.7389, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00032750000000000005, |
| "loss": 1.8115, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00032500000000000004, |
| "loss": 1.7303, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00032250000000000003, |
| "loss": 1.7603, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00032, |
| "loss": 1.7925, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0003175, |
| "loss": 1.806, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.000315, |
| "loss": 1.8047, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0003125, |
| "loss": 1.7939, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00031, |
| "loss": 1.7539, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0003075, |
| "loss": 1.7817, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.000305, |
| "loss": 1.7652, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0003025, |
| "loss": 1.757, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0003, |
| "loss": 1.7845, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00029749999999999997, |
| "loss": 1.7701, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.000295, |
| "loss": 1.7759, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002925, |
| "loss": 1.697, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00029, |
| "loss": 1.7623, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002875, |
| "loss": 1.7926, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.000285, |
| "loss": 1.8367, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002825, |
| "loss": 1.764, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 1.7322, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002775, |
| "loss": 1.7723, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.000275, |
| "loss": 1.7971, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002725, |
| "loss": 1.7938, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00027, |
| "loss": 1.8143, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002675, |
| "loss": 1.735, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00026500000000000004, |
| "loss": 1.7571, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00026250000000000004, |
| "loss": 1.7636, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 1.7344, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002575, |
| "loss": 1.7156, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.000255, |
| "loss": 1.6996, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002525, |
| "loss": 1.7917, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00025, |
| "loss": 1.7578, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "total_flos": 1.31426122150656e+16, |
| "train_batch_size": 10, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|