| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 200, |
| "global_step": 530, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03784295175023652, |
| "grad_norm": 12.928278923034668, |
| "learning_rate": 1.6981132075471698e-06, |
| "loss": 1.4503, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07568590350047304, |
| "grad_norm": 4.828810214996338, |
| "learning_rate": 3.5849056603773586e-06, |
| "loss": 0.8761, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11352885525070956, |
| "grad_norm": 3.0466108322143555, |
| "learning_rate": 5.4716981132075475e-06, |
| "loss": 0.5782, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15137180700094607, |
| "grad_norm": 2.7793734073638916, |
| "learning_rate": 7.358490566037736e-06, |
| "loss": 0.4365, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1892147587511826, |
| "grad_norm": 2.6989638805389404, |
| "learning_rate": 9.245283018867926e-06, |
| "loss": 0.3779, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22705771050141912, |
| "grad_norm": 2.662125825881958, |
| "learning_rate": 9.99609654676786e-06, |
| "loss": 0.3345, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26490066225165565, |
| "grad_norm": 2.1630754470825195, |
| "learning_rate": 9.9722641784023e-06, |
| "loss": 0.3279, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30274361400189215, |
| "grad_norm": 2.4073753356933594, |
| "learning_rate": 9.92687124249773e-06, |
| "loss": 0.3057, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.34058656575212864, |
| "grad_norm": 2.1243770122528076, |
| "learning_rate": 9.860114570402055e-06, |
| "loss": 0.2971, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3784295175023652, |
| "grad_norm": 2.0397891998291016, |
| "learning_rate": 9.772283630189727e-06, |
| "loss": 0.2811, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4162724692526017, |
| "grad_norm": 2.167642116546631, |
| "learning_rate": 9.663759271479858e-06, |
| "loss": 0.2664, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.45411542100283825, |
| "grad_norm": 2.2073721885681152, |
| "learning_rate": 9.535012074008688e-06, |
| "loss": 0.2721, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.49195837275307475, |
| "grad_norm": 2.113102912902832, |
| "learning_rate": 9.386600307117293e-06, |
| "loss": 0.2549, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5298013245033113, |
| "grad_norm": 1.9171561002731323, |
| "learning_rate": 9.219167509002526e-06, |
| "loss": 0.2534, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5676442762535477, |
| "grad_norm": 1.8712971210479736, |
| "learning_rate": 9.033439696227966e-06, |
| "loss": 0.2447, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6054872280037843, |
| "grad_norm": 2.0216710567474365, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.2491, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6433301797540208, |
| "grad_norm": 1.623536229133606, |
| "learning_rate": 8.610396252024113e-06, |
| "loss": 0.2397, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6811731315042573, |
| "grad_norm": 1.9642530679702759, |
| "learning_rate": 8.374915007591053e-06, |
| "loss": 0.2172, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7190160832544938, |
| "grad_norm": 1.723118543624878, |
| "learning_rate": 8.124799568282418e-06, |
| "loss": 0.234, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7568590350047304, |
| "grad_norm": 1.7284762859344482, |
| "learning_rate": 7.861134476396903e-06, |
| "loss": 0.2217, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7568590350047304, |
| "eval_loss": 0.2220437228679657, |
| "eval_runtime": 93.3051, |
| "eval_samples_per_second": 8.821, |
| "eval_steps_per_second": 1.265, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7947019867549668, |
| "grad_norm": 1.6118355989456177, |
| "learning_rate": 7.58506302778873e-06, |
| "loss": 0.2129, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8325449385052034, |
| "grad_norm": 1.7828593254089355, |
| "learning_rate": 7.297782314345972e-06, |
| "loss": 0.2146, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8703878902554399, |
| "grad_norm": 1.4980388879776, |
| "learning_rate": 7.00053803320028e-06, |
| "loss": 0.1948, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9082308420056765, |
| "grad_norm": 1.5348376035690308, |
| "learning_rate": 6.694619085176159e-06, |
| "loss": 0.2056, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9460737937559129, |
| "grad_norm": 1.3809292316436768, |
| "learning_rate": 6.381351985901842e-06, |
| "loss": 0.1986, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9839167455061495, |
| "grad_norm": 1.3319153785705566, |
| "learning_rate": 6.062095113816069e-06, |
| "loss": 0.1962, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0189214758751182, |
| "grad_norm": 1.361677885055542, |
| "learning_rate": 5.738232820012407e-06, |
| "loss": 0.1666, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0567644276253547, |
| "grad_norm": 1.4107803106307983, |
| "learning_rate": 5.411169425461822e-06, |
| "loss": 0.1459, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0946073793755913, |
| "grad_norm": 1.373854637145996, |
| "learning_rate": 5.082323131642496e-06, |
| "loss": 0.1453, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1324503311258278, |
| "grad_norm": 1.3363025188446045, |
| "learning_rate": 4.753119870981486e-06, |
| "loss": 0.1401, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1702932828760644, |
| "grad_norm": 1.3609431982040405, |
| "learning_rate": 4.424987123773654e-06, |
| "loss": 0.132, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.208136234626301, |
| "grad_norm": 1.300492286682129, |
| "learning_rate": 4.0993477283888264e-06, |
| "loss": 0.1342, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2459791863765375, |
| "grad_norm": 1.1971514225006104, |
| "learning_rate": 3.777613711607087e-06, |
| "loss": 0.1322, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2838221381267738, |
| "grad_norm": 1.4877253770828247, |
| "learning_rate": 3.46118016583494e-06, |
| "loss": 0.1368, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3216650898770104, |
| "grad_norm": 1.2842473983764648, |
| "learning_rate": 3.1514191997517387e-06, |
| "loss": 0.1301, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.359508041627247, |
| "grad_norm": 1.3692116737365723, |
| "learning_rate": 2.8496739886173994e-06, |
| "loss": 0.1314, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3973509933774835, |
| "grad_norm": 1.254835605621338, |
| "learning_rate": 2.5572529500402365e-06, |
| "loss": 0.129, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.43519394512772, |
| "grad_norm": 1.2396975755691528, |
| "learning_rate": 2.275424070459803e-06, |
| "loss": 0.1205, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4730368968779564, |
| "grad_norm": 1.3641499280929565, |
| "learning_rate": 2.005409406946e-06, |
| "loss": 0.129, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.5108798486281931, |
| "grad_norm": 1.4750701189041138, |
| "learning_rate": 1.7483797881556175e-06, |
| "loss": 0.1209, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5108798486281931, |
| "eval_loss": 0.1692177653312683, |
| "eval_runtime": 94.0494, |
| "eval_samples_per_second": 8.751, |
| "eval_steps_per_second": 1.255, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5487228003784295, |
| "grad_norm": 1.1940029859542847, |
| "learning_rate": 1.5054497374238275e-06, |
| "loss": 0.1155, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.586565752128666, |
| "grad_norm": 1.2632642984390259, |
| "learning_rate": 1.277672640004936e-06, |
| "loss": 0.1193, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6244087038789026, |
| "grad_norm": 1.2763444185256958, |
| "learning_rate": 1.066036175418082e-06, |
| "loss": 0.115, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6622516556291391, |
| "grad_norm": 1.2933067083358765, |
| "learning_rate": 8.714580347039491e-07, |
| "loss": 0.1162, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.7000946073793757, |
| "grad_norm": 1.197369933128357, |
| "learning_rate": 6.947819411632223e-07, |
| "loss": 0.1138, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.737937559129612, |
| "grad_norm": 1.1830955743789673, |
| "learning_rate": 5.367739918315068e-07, |
| "loss": 0.112, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.7757805108798488, |
| "grad_norm": 1.2318562269210815, |
| "learning_rate": 3.98119335554687e-07, |
| "loss": 0.1098, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.8136234626300851, |
| "grad_norm": 1.2451008558273315, |
| "learning_rate": 2.7941920206915443e-07, |
| "loss": 0.1152, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.8514664143803217, |
| "grad_norm": 1.2878820896148682, |
| "learning_rate": 1.8118829496930557e-07, |
| "loss": 0.1051, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.8893093661305582, |
| "grad_norm": 1.2973670959472656, |
| "learning_rate": 1.0385255986682718e-07, |
| "loss": 0.1053, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9271523178807946, |
| "grad_norm": 1.1544054746627808, |
| "learning_rate": 4.774733741942206e-08, |
| "loss": 0.1044, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.9649952696310313, |
| "grad_norm": 1.1692767143249512, |
| "learning_rate": 1.3115909237734203e-08, |
| "loss": 0.1083, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.188850164413452, |
| "learning_rate": 1.0844297567258466e-10, |
| "loss": 0.1132, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 530, |
| "total_flos": 71607640850432.0, |
| "train_loss": 0.2318565127984533, |
| "train_runtime": 10363.9156, |
| "train_samples_per_second": 1.428, |
| "train_steps_per_second": 0.051 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 530, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 71607640850432.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|