| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.988610478359909, |
| "eval_steps": 500, |
| "global_step": 657, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04555808656036447, |
| "grad_norm": 4.052004444605685, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 0.5253, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09111617312072894, |
| "grad_norm": 1.4381960454873794, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 0.4498, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1366742596810934, |
| "grad_norm": 1.3543508392140766, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.4, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18223234624145787, |
| "grad_norm": 1.2520872546371757, |
| "learning_rate": 6.060606060606061e-06, |
| "loss": 0.3763, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22779043280182232, |
| "grad_norm": 1.02523284296708, |
| "learning_rate": 7.5757575757575764e-06, |
| "loss": 0.3626, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2733485193621868, |
| "grad_norm": 1.093558631219529, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.3496, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.31890660592255127, |
| "grad_norm": 1.1794828114399931, |
| "learning_rate": 9.998869765883566e-06, |
| "loss": 0.3328, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.36446469248291574, |
| "grad_norm": 0.9895352612973752, |
| "learning_rate": 9.986160499534318e-06, |
| "loss": 0.3323, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.41002277904328016, |
| "grad_norm": 1.2593162041954078, |
| "learning_rate": 9.959365197965824e-06, |
| "loss": 0.3476, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "grad_norm": 1.0247420858839205, |
| "learning_rate": 9.918559558613344e-06, |
| "loss": 0.3281, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5011389521640092, |
| "grad_norm": 1.1609463270495288, |
| "learning_rate": 9.863858858486736e-06, |
| "loss": 0.3213, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5466970387243736, |
| "grad_norm": 1.112608179278684, |
| "learning_rate": 9.795417628509857e-06, |
| "loss": 0.3226, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.592255125284738, |
| "grad_norm": 1.1442245883128161, |
| "learning_rate": 9.713429216966624e-06, |
| "loss": 0.3058, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6378132118451025, |
| "grad_norm": 1.0029316918162978, |
| "learning_rate": 9.618125243286989e-06, |
| "loss": 0.3114, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.683371298405467, |
| "grad_norm": 1.0408244849779362, |
| "learning_rate": 9.50977494371594e-06, |
| "loss": 0.3091, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7289293849658315, |
| "grad_norm": 1.0789631799175963, |
| "learning_rate": 9.388684410713977e-06, |
| "loss": 0.3078, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7744874715261959, |
| "grad_norm": 0.9628230660114916, |
| "learning_rate": 9.255195728237837e-06, |
| "loss": 0.3009, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8200455580865603, |
| "grad_norm": 0.9722514254536355, |
| "learning_rate": 9.109686005344258e-06, |
| "loss": 0.2947, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8656036446469249, |
| "grad_norm": 0.8777666899714902, |
| "learning_rate": 8.952566310846931e-06, |
| "loss": 0.2942, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "grad_norm": 0.9667172893305681, |
| "learning_rate": 8.784280512036235e-06, |
| "loss": 0.289, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9567198177676538, |
| "grad_norm": 0.9878477819397482, |
| "learning_rate": 8.60530402074241e-06, |
| "loss": 0.2868, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9727406585304033, |
| "learning_rate": 8.416142450284565e-06, |
| "loss": 0.2717, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0455580865603644, |
| "grad_norm": 0.7915636741448634, |
| "learning_rate": 8.217330187099689e-06, |
| "loss": 0.1748, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0911161731207288, |
| "grad_norm": 0.7837965142867958, |
| "learning_rate": 8.009428881086836e-06, |
| "loss": 0.1684, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1366742596810935, |
| "grad_norm": 0.8666778827303715, |
| "learning_rate": 7.793025858931317e-06, |
| "loss": 0.1692, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.182232346241458, |
| "grad_norm": 0.849266584139581, |
| "learning_rate": 7.568732464891293e-06, |
| "loss": 0.1742, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2277904328018223, |
| "grad_norm": 0.7696615783202124, |
| "learning_rate": 7.33718233373407e-06, |
| "loss": 0.1711, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2733485193621867, |
| "grad_norm": 0.8661434772503379, |
| "learning_rate": 7.099029600701144e-06, |
| "loss": 0.1697, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.3189066059225514, |
| "grad_norm": 0.8221397461631786, |
| "learning_rate": 6.854947053558849e-06, |
| "loss": 0.1667, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3644646924829158, |
| "grad_norm": 0.7959283850125798, |
| "learning_rate": 6.6056242319551315e-06, |
| "loss": 0.1646, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4100227790432802, |
| "grad_norm": 0.7934911795062435, |
| "learning_rate": 6.3517654794518156e-06, |
| "loss": 0.1625, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4555808656036446, |
| "grad_norm": 0.7890669930103191, |
| "learning_rate": 6.094087953735423e-06, |
| "loss": 0.1664, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.501138952164009, |
| "grad_norm": 0.849926754992192, |
| "learning_rate": 5.8333196006277536e-06, |
| "loss": 0.1608, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5466970387243735, |
| "grad_norm": 0.7920622918925933, |
| "learning_rate": 5.570197097619688e-06, |
| "loss": 0.1589, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.592255125284738, |
| "grad_norm": 0.8974340210426586, |
| "learning_rate": 5.305463772737812e-06, |
| "loss": 0.16, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6378132118451025, |
| "grad_norm": 0.8910155901512489, |
| "learning_rate": 5.039867504623084e-06, |
| "loss": 0.1661, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.683371298405467, |
| "grad_norm": 0.8438540298957333, |
| "learning_rate": 4.774158609753908e-06, |
| "loss": 0.1498, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7289293849658316, |
| "grad_norm": 0.8003749509299389, |
| "learning_rate": 4.5090877227822424e-06, |
| "loss": 0.1558, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.774487471526196, |
| "grad_norm": 0.7879659477066289, |
| "learning_rate": 4.245403675970877e-06, |
| "loss": 0.1583, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8200455580865604, |
| "grad_norm": 0.7653210529786019, |
| "learning_rate": 3.9838513837224814e-06, |
| "loss": 0.1495, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8656036446469249, |
| "grad_norm": 0.8205629178189233, |
| "learning_rate": 3.7251697381767373e-06, |
| "loss": 0.152, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.9111617312072893, |
| "grad_norm": 0.8015055799282049, |
| "learning_rate": 3.4700895218205026e-06, |
| "loss": 0.1454, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9567198177676537, |
| "grad_norm": 0.7703178443216857, |
| "learning_rate": 3.2193313430079737e-06, |
| "loss": 0.1456, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7965347444065239, |
| "learning_rate": 2.9736036002230332e-06, |
| "loss": 0.1401, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0455580865603644, |
| "grad_norm": 0.7224665974614691, |
| "learning_rate": 2.7336004808348094e-06, |
| "loss": 0.0746, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.091116173120729, |
| "grad_norm": 0.6344834885792635, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.0704, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.1366742596810933, |
| "grad_norm": 0.6333151400219024, |
| "learning_rate": 2.273462085252146e-06, |
| "loss": 0.0719, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.1822323462414577, |
| "grad_norm": 0.6345496356388078, |
| "learning_rate": 2.0546267121888863e-06, |
| "loss": 0.0683, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.2277904328018225, |
| "grad_norm": 0.663831749653779, |
| "learning_rate": 1.8441120965239912e-06, |
| "loss": 0.0707, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.273348519362187, |
| "grad_norm": 0.6978500583302198, |
| "learning_rate": 1.642512947611622e-06, |
| "loss": 0.0719, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.3189066059225514, |
| "grad_norm": 0.5899803755413382, |
| "learning_rate": 1.4503987883766857e-06, |
| "loss": 0.0655, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.364464692482916, |
| "grad_norm": 0.597275435306883, |
| "learning_rate": 1.2683123463975144e-06, |
| "loss": 0.0658, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.41002277904328, |
| "grad_norm": 0.6130036081797414, |
| "learning_rate": 1.0967680206861198e-06, |
| "loss": 0.0691, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.4555808656036446, |
| "grad_norm": 0.6291943478576331, |
| "learning_rate": 9.362504284973683e-07, |
| "loss": 0.0701, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.501138952164009, |
| "grad_norm": 0.6719915377398242, |
| "learning_rate": 7.872130362724422e-07, |
| "loss": 0.0689, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.5466970387243735, |
| "grad_norm": 0.6866035161561178, |
| "learning_rate": 6.500768785841482e-07, |
| "loss": 0.065, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.592255125284738, |
| "grad_norm": 0.6382818453339463, |
| "learning_rate": 5.252293687031196e-07, |
| "loss": 0.0661, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.6378132118451028, |
| "grad_norm": 0.6023391864806479, |
| "learning_rate": 4.130232041450866e-07, |
| "loss": 0.0613, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.6833712984054667, |
| "grad_norm": 0.6283362244939039, |
| "learning_rate": 3.1377537029107174e-07, |
| "loss": 0.065, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.7289293849658316, |
| "grad_norm": 0.6324038348983413, |
| "learning_rate": 2.2776624489530664e-07, |
| "loss": 0.0662, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.774487471526196, |
| "grad_norm": 0.6463288010084847, |
| "learning_rate": 1.55238806010668e-07, |
| "loss": 0.0637, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.8200455580865604, |
| "grad_norm": 0.5961299692551064, |
| "learning_rate": 9.639794556925041e-08, |
| "loss": 0.0645, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.865603644646925, |
| "grad_norm": 0.6588358857173194, |
| "learning_rate": 5.1409890557246876e-08, |
| "loss": 0.0644, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.9111617312072893, |
| "grad_norm": 0.5976452121379741, |
| "learning_rate": 2.0401733419315727e-08, |
| "loss": 0.0626, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.9567198177676537, |
| "grad_norm": 0.6600908299010216, |
| "learning_rate": 3.4610730190648423e-09, |
| "loss": 0.0641, |
| "step": 650 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 657, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 418097876697088.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|