| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9865470852017937, |
| "eval_steps": 500, |
| "global_step": 55, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017937219730941704, |
| "grad_norm": 0.6536183953285217, |
| "learning_rate": 4.995922759815339e-05, |
| "loss": 0.8371, |
| "num_input_tokens_seen": 2097152, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03587443946188341, |
| "grad_norm": 0.517680823802948, |
| "learning_rate": 4.9837043383713753e-05, |
| "loss": 0.7804, |
| "num_input_tokens_seen": 4194304, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.053811659192825115, |
| "grad_norm": 0.4423481225967407, |
| "learning_rate": 4.963384589619233e-05, |
| "loss": 0.7695, |
| "num_input_tokens_seen": 6291456, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.07174887892376682, |
| "grad_norm": 0.39828750491142273, |
| "learning_rate": 4.935029792355834e-05, |
| "loss": 0.7419, |
| "num_input_tokens_seen": 8388608, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08968609865470852, |
| "grad_norm": 0.31201115250587463, |
| "learning_rate": 4.898732434036244e-05, |
| "loss": 0.7166, |
| "num_input_tokens_seen": 10485760, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.10762331838565023, |
| "grad_norm": 0.2536958158016205, |
| "learning_rate": 4.854610909098812e-05, |
| "loss": 0.7194, |
| "num_input_tokens_seen": 12582912, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.12556053811659193, |
| "grad_norm": 0.2193588763475418, |
| "learning_rate": 4.802809132787125e-05, |
| "loss": 0.6975, |
| "num_input_tokens_seen": 14680064, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.14349775784753363, |
| "grad_norm": 0.18916621804237366, |
| "learning_rate": 4.743496071728396e-05, |
| "loss": 0.7168, |
| "num_input_tokens_seen": 16777216, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.16143497757847533, |
| "grad_norm": 0.1561172604560852, |
| "learning_rate": 4.6768651927994434e-05, |
| "loss": 0.6707, |
| "num_input_tokens_seen": 18874368, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.17937219730941703, |
| "grad_norm": 0.12857139110565186, |
| "learning_rate": 4.6031338320779534e-05, |
| "loss": 0.6769, |
| "num_input_tokens_seen": 20971520, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.19730941704035873, |
| "grad_norm": 0.11340289562940598, |
| "learning_rate": 4.522542485937369e-05, |
| "loss": 0.6873, |
| "num_input_tokens_seen": 23068672, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.21524663677130046, |
| "grad_norm": 0.10658581554889679, |
| "learning_rate": 4.4353540265977064e-05, |
| "loss": 0.6643, |
| "num_input_tokens_seen": 25165824, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.23318385650224216, |
| "grad_norm": 0.08937722444534302, |
| "learning_rate": 4.341852844691012e-05, |
| "loss": 0.6849, |
| "num_input_tokens_seen": 27262976, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.25112107623318386, |
| "grad_norm": 0.07756289094686508, |
| "learning_rate": 4.242343921638234e-05, |
| "loss": 0.6461, |
| "num_input_tokens_seen": 29360128, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.26905829596412556, |
| "grad_norm": 0.07581546157598495, |
| "learning_rate": 4.137151834863213e-05, |
| "loss": 0.6623, |
| "num_input_tokens_seen": 31457280, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.28699551569506726, |
| "grad_norm": 0.07386067509651184, |
| "learning_rate": 4.0266196990885955e-05, |
| "loss": 0.6751, |
| "num_input_tokens_seen": 33554432, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.30493273542600896, |
| "grad_norm": 0.06293580681085587, |
| "learning_rate": 3.911108047166924e-05, |
| "loss": 0.6472, |
| "num_input_tokens_seen": 35651584, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.32286995515695066, |
| "grad_norm": 0.06199085712432861, |
| "learning_rate": 3.790993654097405e-05, |
| "loss": 0.6728, |
| "num_input_tokens_seen": 37748736, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.34080717488789236, |
| "grad_norm": 0.060734592378139496, |
| "learning_rate": 3.6666683080641846e-05, |
| "loss": 0.7017, |
| "num_input_tokens_seen": 39845888, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.35874439461883406, |
| "grad_norm": 0.05623164027929306, |
| "learning_rate": 3.5385375325047166e-05, |
| "loss": 0.6502, |
| "num_input_tokens_seen": 41943040, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.37668161434977576, |
| "grad_norm": 0.0574677549302578, |
| "learning_rate": 3.4070192633766025e-05, |
| "loss": 0.6476, |
| "num_input_tokens_seen": 44040192, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.39461883408071746, |
| "grad_norm": 0.05185185372829437, |
| "learning_rate": 3.272542485937369e-05, |
| "loss": 0.6411, |
| "num_input_tokens_seen": 46137344, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.4125560538116592, |
| "grad_norm": 0.05139186978340149, |
| "learning_rate": 3.135545835483718e-05, |
| "loss": 0.6428, |
| "num_input_tokens_seen": 48234496, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.4304932735426009, |
| "grad_norm": 0.050159115344285965, |
| "learning_rate": 2.996476166614364e-05, |
| "loss": 0.6661, |
| "num_input_tokens_seen": 50331648, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4484304932735426, |
| "grad_norm": 0.04851464927196503, |
| "learning_rate": 2.8557870956832132e-05, |
| "loss": 0.6378, |
| "num_input_tokens_seen": 52428800, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.4663677130044843, |
| "grad_norm": 0.04896726831793785, |
| "learning_rate": 2.7139375211970996e-05, |
| "loss": 0.6532, |
| "num_input_tokens_seen": 54525952, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.484304932735426, |
| "grad_norm": 0.04698600620031357, |
| "learning_rate": 2.5713901269842404e-05, |
| "loss": 0.6403, |
| "num_input_tokens_seen": 56623104, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.5022421524663677, |
| "grad_norm": 0.048034097999334335, |
| "learning_rate": 2.42860987301576e-05, |
| "loss": 0.6248, |
| "num_input_tokens_seen": 58720256, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.5201793721973094, |
| "grad_norm": 0.044828303158283234, |
| "learning_rate": 2.2860624788029013e-05, |
| "loss": 0.6583, |
| "num_input_tokens_seen": 60817408, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.5381165919282511, |
| "grad_norm": 0.04563640430569649, |
| "learning_rate": 2.1442129043167874e-05, |
| "loss": 0.6579, |
| "num_input_tokens_seen": 62914560, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5560538116591929, |
| "grad_norm": 0.044318560510873795, |
| "learning_rate": 2.003523833385637e-05, |
| "loss": 0.6659, |
| "num_input_tokens_seen": 65011712, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5739910313901345, |
| "grad_norm": 0.04331167787313461, |
| "learning_rate": 1.8644541645162834e-05, |
| "loss": 0.6423, |
| "num_input_tokens_seen": 67108864, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.5919282511210763, |
| "grad_norm": 0.04475367069244385, |
| "learning_rate": 1.7274575140626318e-05, |
| "loss": 0.6509, |
| "num_input_tokens_seen": 69206016, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.6098654708520179, |
| "grad_norm": 0.045547887682914734, |
| "learning_rate": 1.5929807366233977e-05, |
| "loss": 0.6551, |
| "num_input_tokens_seen": 71303168, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.6278026905829597, |
| "grad_norm": 0.043985530734062195, |
| "learning_rate": 1.4614624674952842e-05, |
| "loss": 0.6232, |
| "num_input_tokens_seen": 73400320, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6457399103139013, |
| "grad_norm": 0.0414094403386116, |
| "learning_rate": 1.3333316919358157e-05, |
| "loss": 0.6137, |
| "num_input_tokens_seen": 75497472, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.6636771300448431, |
| "grad_norm": 0.041019294410943985, |
| "learning_rate": 1.2090063459025955e-05, |
| "loss": 0.6426, |
| "num_input_tokens_seen": 77594624, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6816143497757847, |
| "grad_norm": 0.04383592680096626, |
| "learning_rate": 1.0888919528330777e-05, |
| "loss": 0.6512, |
| "num_input_tokens_seen": 79691776, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6995515695067265, |
| "grad_norm": 0.040539514273405075, |
| "learning_rate": 9.733803009114045e-06, |
| "loss": 0.6269, |
| "num_input_tokens_seen": 81788928, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.7174887892376681, |
| "grad_norm": 0.04238974675536156, |
| "learning_rate": 8.628481651367876e-06, |
| "loss": 0.6201, |
| "num_input_tokens_seen": 83886080, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7354260089686099, |
| "grad_norm": 0.04115669056773186, |
| "learning_rate": 7.576560783617668e-06, |
| "loss": 0.642, |
| "num_input_tokens_seen": 85983232, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.7533632286995515, |
| "grad_norm": 0.04178008437156677, |
| "learning_rate": 6.5814715530898745e-06, |
| "loss": 0.648, |
| "num_input_tokens_seen": 88080384, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.7713004484304933, |
| "grad_norm": 0.04329155012965202, |
| "learning_rate": 5.646459734022938e-06, |
| "loss": 0.6442, |
| "num_input_tokens_seen": 90177536, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7892376681614349, |
| "grad_norm": 0.043740272521972656, |
| "learning_rate": 4.7745751406263165e-06, |
| "loss": 0.6488, |
| "num_input_tokens_seen": 92274688, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.8071748878923767, |
| "grad_norm": 0.04263562709093094, |
| "learning_rate": 3.968661679220468e-06, |
| "loss": 0.65, |
| "num_input_tokens_seen": 94371840, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8251121076233184, |
| "grad_norm": 0.041693028062582016, |
| "learning_rate": 3.2313480720055745e-06, |
| "loss": 0.6584, |
| "num_input_tokens_seen": 96468992, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.8430493273542601, |
| "grad_norm": 0.04151754826307297, |
| "learning_rate": 2.565039282716045e-06, |
| "loss": 0.6392, |
| "num_input_tokens_seen": 98566144, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.8609865470852018, |
| "grad_norm": 0.04260968416929245, |
| "learning_rate": 1.97190867212875e-06, |
| "loss": 0.6524, |
| "num_input_tokens_seen": 100663296, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.8789237668161435, |
| "grad_norm": 0.04022514820098877, |
| "learning_rate": 1.4538909090118846e-06, |
| "loss": 0.6276, |
| "num_input_tokens_seen": 102760448, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8968609865470852, |
| "grad_norm": 0.039072513580322266, |
| "learning_rate": 1.0126756596375686e-06, |
| "loss": 0.6282, |
| "num_input_tokens_seen": 104857600, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9147982062780269, |
| "grad_norm": 0.03952722251415253, |
| "learning_rate": 6.497020764416633e-07, |
| "loss": 0.6344, |
| "num_input_tokens_seen": 106954752, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.9327354260089686, |
| "grad_norm": 0.04045777767896652, |
| "learning_rate": 3.6615410380767544e-07, |
| "loss": 0.6464, |
| "num_input_tokens_seen": 109051904, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.9506726457399103, |
| "grad_norm": 0.03984501212835312, |
| "learning_rate": 1.6295661628624447e-07, |
| "loss": 0.6253, |
| "num_input_tokens_seen": 111149056, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.968609865470852, |
| "grad_norm": 0.040761884301900864, |
| "learning_rate": 4.07724018466088e-08, |
| "loss": 0.6375, |
| "num_input_tokens_seen": 113246208, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.9865470852017937, |
| "grad_norm": 0.04142209142446518, |
| "learning_rate": 0.0, |
| "loss": 0.6419, |
| "num_input_tokens_seen": 115343360, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9865470852017937, |
| "num_input_tokens_seen": 115343360, |
| "step": 55, |
| "total_flos": 5.104238176512246e+18, |
| "train_loss": 0.6637221011248502, |
| "train_runtime": 9208.1472, |
| "train_samples_per_second": 3.097, |
| "train_steps_per_second": 0.006 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 55, |
| "num_input_tokens_seen": 115343360, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.104238176512246e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|