| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.8225806451612905, |
| "eval_steps": 100, |
| "global_step": 2800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10080645161290322, |
| "eval_accuracy": 0.08417928653841922, |
| "eval_loss": 5.752899646759033, |
| "eval_runtime": 62.0977, |
| "eval_samples_per_second": 24.993, |
| "eval_steps_per_second": 0.789, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.20161290322580644, |
| "eval_accuracy": 0.08268329015442163, |
| "eval_loss": 5.443298816680908, |
| "eval_runtime": 62.0585, |
| "eval_samples_per_second": 25.009, |
| "eval_steps_per_second": 0.79, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3024193548387097, |
| "eval_accuracy": 0.07856146899875044, |
| "eval_loss": 5.327319622039795, |
| "eval_runtime": 62.0363, |
| "eval_samples_per_second": 25.018, |
| "eval_steps_per_second": 0.79, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "eval_accuracy": 0.07751066609990712, |
| "eval_loss": 5.258698463439941, |
| "eval_runtime": 62.0725, |
| "eval_samples_per_second": 25.003, |
| "eval_steps_per_second": 0.789, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5040322580645161, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.1599462365591404e-05, |
| "loss": 5.571, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5040322580645161, |
| "eval_accuracy": 0.07672025056131017, |
| "eval_loss": 5.229703426361084, |
| "eval_runtime": 62.0486, |
| "eval_samples_per_second": 25.013, |
| "eval_steps_per_second": 0.79, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6048387096774194, |
| "eval_accuracy": 0.0763650434369539, |
| "eval_loss": 5.196520805358887, |
| "eval_runtime": 62.0297, |
| "eval_samples_per_second": 25.02, |
| "eval_steps_per_second": 0.79, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7056451612903226, |
| "eval_accuracy": 0.077187363334036, |
| "eval_loss": 5.175901889801025, |
| "eval_runtime": 62.0405, |
| "eval_samples_per_second": 25.016, |
| "eval_steps_per_second": 0.79, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "eval_accuracy": 0.07545865092754048, |
| "eval_loss": 5.157538890838623, |
| "eval_runtime": 62.031, |
| "eval_samples_per_second": 25.02, |
| "eval_steps_per_second": 0.79, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.907258064516129, |
| "eval_accuracy": 0.07520929443264239, |
| "eval_loss": 5.153995037078857, |
| "eval_runtime": 62.0464, |
| "eval_samples_per_second": 25.014, |
| "eval_steps_per_second": 0.79, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0080645161290323, |
| "grad_norm": 1.546875, |
| "learning_rate": 3.31989247311828e-05, |
| "loss": 5.1997, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0080645161290323, |
| "eval_accuracy": 0.07693396983438175, |
| "eval_loss": 5.134342670440674, |
| "eval_runtime": 62.0251, |
| "eval_samples_per_second": 25.022, |
| "eval_steps_per_second": 0.79, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1088709677419355, |
| "eval_accuracy": 0.07625756236294938, |
| "eval_loss": 5.132731914520264, |
| "eval_runtime": 62.1032, |
| "eval_samples_per_second": 24.991, |
| "eval_steps_per_second": 0.789, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2096774193548387, |
| "eval_accuracy": 0.07647123597980147, |
| "eval_loss": 5.128543853759766, |
| "eval_runtime": 62.0211, |
| "eval_samples_per_second": 25.024, |
| "eval_steps_per_second": 0.79, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.310483870967742, |
| "eval_accuracy": 0.07615019590017348, |
| "eval_loss": 5.1188788414001465, |
| "eval_runtime": 62.0621, |
| "eval_samples_per_second": 25.007, |
| "eval_steps_per_second": 0.79, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4112903225806452, |
| "eval_accuracy": 0.07678086190049863, |
| "eval_loss": 5.11050271987915, |
| "eval_runtime": 62.0289, |
| "eval_samples_per_second": 25.021, |
| "eval_steps_per_second": 0.79, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5120967741935485, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.4798387096774196e-05, |
| "loss": 5.1424, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5120967741935485, |
| "eval_accuracy": 0.07600313797499995, |
| "eval_loss": 5.115657329559326, |
| "eval_runtime": 62.0343, |
| "eval_samples_per_second": 25.018, |
| "eval_steps_per_second": 0.79, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "eval_accuracy": 0.07590795945533446, |
| "eval_loss": 5.111791133880615, |
| "eval_runtime": 62.0288, |
| "eval_samples_per_second": 25.021, |
| "eval_steps_per_second": 0.79, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.713709677419355, |
| "eval_accuracy": 0.07696024882654234, |
| "eval_loss": 5.103736877441406, |
| "eval_runtime": 62.0691, |
| "eval_samples_per_second": 25.004, |
| "eval_steps_per_second": 0.789, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.814516129032258, |
| "eval_accuracy": 0.07661788474582673, |
| "eval_loss": 5.100837707519531, |
| "eval_runtime": 62.0201, |
| "eval_samples_per_second": 25.024, |
| "eval_steps_per_second": 0.79, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9153225806451613, |
| "eval_accuracy": 0.07619118061314897, |
| "eval_loss": 5.093105792999268, |
| "eval_runtime": 62.0654, |
| "eval_samples_per_second": 25.006, |
| "eval_steps_per_second": 0.789, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0161290322580645, |
| "grad_norm": 1.65625, |
| "learning_rate": 1.639784946236559e-05, |
| "loss": 5.1271, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.0161290322580645, |
| "eval_accuracy": 0.07656381630226827, |
| "eval_loss": 5.093105792999268, |
| "eval_runtime": 62.0354, |
| "eval_samples_per_second": 25.018, |
| "eval_steps_per_second": 0.79, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1169354838709675, |
| "eval_accuracy": 0.07620813163709154, |
| "eval_loss": 5.098904609680176, |
| "eval_runtime": 62.0701, |
| "eval_samples_per_second": 25.004, |
| "eval_steps_per_second": 0.789, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.217741935483871, |
| "eval_accuracy": 0.07596129493030288, |
| "eval_loss": 5.105669975280762, |
| "eval_runtime": 62.0252, |
| "eval_samples_per_second": 25.022, |
| "eval_steps_per_second": 0.79, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.318548387096774, |
| "eval_accuracy": 0.07612546049970809, |
| "eval_loss": 5.104703426361084, |
| "eval_runtime": 62.0492, |
| "eval_samples_per_second": 25.012, |
| "eval_steps_per_second": 0.79, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.4193548387096775, |
| "eval_accuracy": 0.0759234690424817, |
| "eval_loss": 5.099548816680908, |
| "eval_runtime": 62.037, |
| "eval_samples_per_second": 25.017, |
| "eval_steps_per_second": 0.79, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.5201612903225805, |
| "grad_norm": 1.3125, |
| "learning_rate": 7.99731182795699e-06, |
| "loss": 5.1184, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.5201612903225805, |
| "eval_accuracy": 0.07605458862003364, |
| "eval_loss": 5.100837707519531, |
| "eval_runtime": 62.036, |
| "eval_samples_per_second": 25.018, |
| "eval_steps_per_second": 0.79, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.620967741935484, |
| "eval_accuracy": 0.07616761720212274, |
| "eval_loss": 5.103092670440674, |
| "eval_runtime": 62.0411, |
| "eval_samples_per_second": 25.016, |
| "eval_steps_per_second": 0.79, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.721774193548387, |
| "eval_accuracy": 0.07602115683138232, |
| "eval_loss": 5.096649646759033, |
| "eval_runtime": 62.0513, |
| "eval_samples_per_second": 25.012, |
| "eval_steps_per_second": 0.79, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.8225806451612905, |
| "eval_accuracy": 0.0763081475418867, |
| "eval_loss": 5.093105792999268, |
| "eval_runtime": 62.0511, |
| "eval_samples_per_second": 25.012, |
| "eval_steps_per_second": 0.79, |
| "step": 2800 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2976, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "total_flos": 8.429184193501594e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|