| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 56.0, |
| "eval_steps": 500, |
| "global_step": 140, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.0005714285714285714, |
| "loss": 1.2446, |
| "step": 4 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 0.0011428571428571427, |
| "loss": 0.9828, |
| "step": 8 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 0.0017142857142857142, |
| "loss": 0.7103, |
| "step": 12 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 0.001968253968253968, |
| "loss": 0.4313, |
| "step": 16 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.0019047619047619048, |
| "loss": 0.2152, |
| "step": 20 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 0.0018412698412698413, |
| "loss": 0.1221, |
| "step": 24 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 0.0017777777777777776, |
| "loss": 0.0657, |
| "step": 28 |
| }, |
| { |
| "epoch": 12.8, |
| "learning_rate": 0.001746031746031746, |
| "loss": 0.0725, |
| "step": 32 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 0.0016825396825396826, |
| "loss": 0.0622, |
| "step": 36 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 0.0016190476190476191, |
| "loss": 0.0362, |
| "step": 40 |
| }, |
| { |
| "epoch": 17.6, |
| "learning_rate": 0.0015555555555555557, |
| "loss": 0.0266, |
| "step": 44 |
| }, |
| { |
| "epoch": 19.2, |
| "learning_rate": 0.001492063492063492, |
| "loss": 0.0241, |
| "step": 48 |
| }, |
| { |
| "epoch": 20.8, |
| "learning_rate": 0.0014285714285714286, |
| "loss": 0.018, |
| "step": 52 |
| }, |
| { |
| "epoch": 22.4, |
| "learning_rate": 0.0013650793650793651, |
| "loss": 0.0187, |
| "step": 56 |
| }, |
| { |
| "epoch": 24.0, |
| "learning_rate": 0.0013015873015873017, |
| "loss": 0.0159, |
| "step": 60 |
| }, |
| { |
| "epoch": 25.6, |
| "learning_rate": 0.0012698412698412698, |
| "loss": 0.0463, |
| "step": 64 |
| }, |
| { |
| "epoch": 27.2, |
| "learning_rate": 0.0012063492063492064, |
| "loss": 0.0279, |
| "step": 68 |
| }, |
| { |
| "epoch": 28.8, |
| "learning_rate": 0.0011428571428571427, |
| "loss": 0.0133, |
| "step": 72 |
| }, |
| { |
| "epoch": 30.4, |
| "learning_rate": 0.0010793650793650793, |
| "loss": 0.0148, |
| "step": 76 |
| }, |
| { |
| "epoch": 32.0, |
| "learning_rate": 0.0010158730158730158, |
| "loss": 0.0115, |
| "step": 80 |
| }, |
| { |
| "epoch": 33.6, |
| "learning_rate": 0.0009523809523809524, |
| "loss": 0.0139, |
| "step": 84 |
| }, |
| { |
| "epoch": 35.2, |
| "learning_rate": 0.0008888888888888888, |
| "loss": 0.0105, |
| "step": 88 |
| }, |
| { |
| "epoch": 36.8, |
| "learning_rate": 0.0008253968253968254, |
| "loss": 0.0096, |
| "step": 92 |
| }, |
| { |
| "epoch": 38.4, |
| "learning_rate": 0.0007619047619047619, |
| "loss": 0.0129, |
| "step": 96 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 0.0006984126984126984, |
| "loss": 0.0069, |
| "step": 100 |
| }, |
| { |
| "epoch": 41.6, |
| "learning_rate": 0.0006349206349206349, |
| "loss": 0.0063, |
| "step": 104 |
| }, |
| { |
| "epoch": 43.2, |
| "learning_rate": 0.0005714285714285714, |
| "loss": 0.0053, |
| "step": 108 |
| }, |
| { |
| "epoch": 44.8, |
| "learning_rate": 0.0005079365079365079, |
| "loss": 0.0047, |
| "step": 112 |
| }, |
| { |
| "epoch": 46.4, |
| "learning_rate": 0.0004444444444444444, |
| "loss": 0.0055, |
| "step": 116 |
| }, |
| { |
| "epoch": 48.0, |
| "learning_rate": 0.00038095238095238096, |
| "loss": 0.0033, |
| "step": 120 |
| }, |
| { |
| "epoch": 49.6, |
| "learning_rate": 0.00031746031746031746, |
| "loss": 0.0038, |
| "step": 124 |
| }, |
| { |
| "epoch": 51.2, |
| "learning_rate": 0.00025396825396825396, |
| "loss": 0.0036, |
| "step": 128 |
| }, |
| { |
| "epoch": 52.8, |
| "learning_rate": 0.00019047619047619048, |
| "loss": 0.0029, |
| "step": 132 |
| }, |
| { |
| "epoch": 54.4, |
| "learning_rate": 0.00012698412698412698, |
| "loss": 0.0035, |
| "step": 136 |
| }, |
| { |
| "epoch": 56.0, |
| "learning_rate": 6.349206349206349e-05, |
| "loss": 0.0026, |
| "step": 140 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 140, |
| "num_train_epochs": 70, |
| "save_steps": 500, |
| "total_flos": 4.89773450877993e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|