| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 503, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0019880715705765406, |
| "grad_norm": 127.0667237907462, |
| "learning_rate": 5.882352941176471e-08, |
| "loss": 3.918, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.019880715705765408, |
| "grad_norm": 115.98143785016711, |
| "learning_rate": 5.882352941176471e-07, |
| "loss": 3.7431, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.039761431411530816, |
| "grad_norm": 65.61390946147996, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 3.0734, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05964214711729622, |
| "grad_norm": 41.009392563213375, |
| "learning_rate": 1.7647058823529412e-06, |
| "loss": 2.4079, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07952286282306163, |
| "grad_norm": 25.739828109506355, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 2.1062, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09940357852882704, |
| "grad_norm": 17.26914793098219, |
| "learning_rate": 2.941176470588235e-06, |
| "loss": 2.0659, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11928429423459244, |
| "grad_norm": 25.05959744665656, |
| "learning_rate": 2.9970662200387674e-06, |
| "loss": 2.0592, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13916500994035785, |
| "grad_norm": 15.43081495953451, |
| "learning_rate": 2.986939491128791e-06, |
| "loss": 2.0407, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15904572564612326, |
| "grad_norm": 12.58490370084457, |
| "learning_rate": 2.969632483038685e-06, |
| "loss": 2.0257, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17892644135188868, |
| "grad_norm": 20.125125391550814, |
| "learning_rate": 2.94522876954573e-06, |
| "loss": 1.9816, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1988071570576541, |
| "grad_norm": 8.501045071308724, |
| "learning_rate": 2.9138461936939467e-06, |
| "loss": 1.9523, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1988071570576541, |
| "eval_loss": 1.9134721755981445, |
| "eval_runtime": 63.5356, |
| "eval_samples_per_second": 26.662, |
| "eval_steps_per_second": 0.425, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21868787276341947, |
| "grad_norm": 22.082329977354508, |
| "learning_rate": 2.875636298742058e-06, |
| "loss": 1.9368, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23856858846918488, |
| "grad_norm": 15.818359711105115, |
| "learning_rate": 2.8307835963765403e-06, |
| "loss": 1.9224, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2584493041749503, |
| "grad_norm": 13.730610615135147, |
| "learning_rate": 2.779504675723508e-06, |
| "loss": 1.9146, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2783300198807157, |
| "grad_norm": 6.635422891471246, |
| "learning_rate": 2.722047157461906e-06, |
| "loss": 1.9164, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2982107355864811, |
| "grad_norm": 8.086513909738859, |
| "learning_rate": 2.6586884980885044e-06, |
| "loss": 1.9076, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.31809145129224653, |
| "grad_norm": 13.60628995566901, |
| "learning_rate": 2.5897346501087633e-06, |
| "loss": 1.9047, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3379721669980119, |
| "grad_norm": 18.644035876158316, |
| "learning_rate": 2.5155185846233844e-06, |
| "loss": 1.8866, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.35785288270377735, |
| "grad_norm": 17.47074603496702, |
| "learning_rate": 2.43639868344482e-06, |
| "loss": 1.8956, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.37773359840954274, |
| "grad_norm": 7.178728301299398, |
| "learning_rate": 2.3527570085080407e-06, |
| "loss": 1.9043, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3976143141153082, |
| "grad_norm": 12.253857653229236, |
| "learning_rate": 2.264997456932413e-06, |
| "loss": 1.9159, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3976143141153082, |
| "eval_loss": 1.8660345077514648, |
| "eval_runtime": 63.4283, |
| "eval_samples_per_second": 26.707, |
| "eval_steps_per_second": 0.426, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.41749502982107356, |
| "grad_norm": 11.988930445624767, |
| "learning_rate": 2.1735438106436967e-06, |
| "loss": 1.9004, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.43737574552683894, |
| "grad_norm": 10.753394166683348, |
| "learning_rate": 2.078837689974332e-06, |
| "loss": 1.9172, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4572564612326044, |
| "grad_norm": 9.351072895819225, |
| "learning_rate": 1.981336421123892e-06, |
| "loss": 1.9192, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.47713717693836977, |
| "grad_norm": 7.682035183703906, |
| "learning_rate": 1.8815108277774976e-06, |
| "loss": 1.8959, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4970178926441352, |
| "grad_norm": 11.950308335271014, |
| "learning_rate": 1.7798429575462477e-06, |
| "loss": 1.8733, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5168986083499006, |
| "grad_norm": 10.982955764593422, |
| "learning_rate": 1.6768237542084645e-06, |
| "loss": 1.8827, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.536779324055666, |
| "grad_norm": 11.904679442862472, |
| "learning_rate": 1.5729506869922447e-06, |
| "loss": 1.8765, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5566600397614314, |
| "grad_norm": 9.827244740110832, |
| "learning_rate": 1.4687253483472872e-06, |
| "loss": 1.8841, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5765407554671969, |
| "grad_norm": 7.306293618482458, |
| "learning_rate": 1.3646510318060986e-06, |
| "loss": 1.8773, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5964214711729622, |
| "grad_norm": 9.071120593329336, |
| "learning_rate": 1.2612303016308466e-06, |
| "loss": 1.875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5964214711729622, |
| "eval_loss": 1.8548645973205566, |
| "eval_runtime": 63.4556, |
| "eval_samples_per_second": 26.696, |
| "eval_steps_per_second": 0.425, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6163021868787276, |
| "grad_norm": 9.443760924559943, |
| "learning_rate": 1.1589625659817845e-06, |
| "loss": 1.8568, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6361829025844931, |
| "grad_norm": 11.674966838003883, |
| "learning_rate": 1.0583416653261663e-06, |
| "loss": 1.877, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6560636182902585, |
| "grad_norm": 8.948890555802585, |
| "learning_rate": 9.598534877329919e-07, |
| "loss": 1.8663, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6759443339960238, |
| "grad_norm": 7.926486654587874, |
| "learning_rate": 8.639736225690654e-07, |
| "loss": 1.8776, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6958250497017893, |
| "grad_norm": 6.133488811515441, |
| "learning_rate": 7.711650639264374e-07, |
| "loss": 1.8669, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7157057654075547, |
| "grad_norm": 5.995054734148766, |
| "learning_rate": 6.818759748711476e-07, |
| "loss": 1.8661, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.73558648111332, |
| "grad_norm": 6.532247908566974, |
| "learning_rate": 5.965375233094762e-07, |
| "loss": 1.8429, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7554671968190855, |
| "grad_norm": 8.52788084676906, |
| "learning_rate": 5.155617999220938e-07, |
| "loss": 1.883, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7753479125248509, |
| "grad_norm": 7.412951150651719, |
| "learning_rate": 4.3933982822017883e-07, |
| "loss": 1.8518, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7952286282306164, |
| "grad_norm": 8.704060015623933, |
| "learning_rate": 3.6823967633276183e-07, |
| "loss": 1.841, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7952286282306164, |
| "eval_loss": 1.834498643875122, |
| "eval_runtime": 63.4011, |
| "eval_samples_per_second": 26.719, |
| "eval_steps_per_second": 0.426, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8151093439363817, |
| "grad_norm": 10.837619200928202, |
| "learning_rate": 3.026046796432582e-07, |
| "loss": 1.8274, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8349900596421471, |
| "grad_norm": 9.650000403237328, |
| "learning_rate": 2.4275178285790973e-07, |
| "loss": 1.8457, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8548707753479126, |
| "grad_norm": 8.514388424330665, |
| "learning_rate": 1.889700095121219e-07, |
| "loss": 1.8333, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8747514910536779, |
| "grad_norm": 11.640897943920702, |
| "learning_rate": 1.4151906630527865e-07, |
| "loss": 1.8412, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8946322067594433, |
| "grad_norm": 13.422663396524422, |
| "learning_rate": 1.00628089003575e-07, |
| "loss": 1.8505, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9145129224652088, |
| "grad_norm": 6.751723671685878, |
| "learning_rate": 6.649453596676663e-08, |
| "loss": 1.8411, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9343936381709742, |
| "grad_norm": 7.821816393267081, |
| "learning_rate": 3.928323464188621e-08, |
| "loss": 1.8268, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9542743538767395, |
| "grad_norm": 7.257934599669054, |
| "learning_rate": 1.9125585628307407e-08, |
| "loss": 1.8413, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.974155069582505, |
| "grad_norm": 7.949497125452414, |
| "learning_rate": 6.118928157650749e-09, |
| "loss": 1.8531, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9940357852882704, |
| "grad_norm": 7.167162679479334, |
| "learning_rate": 3.260700525591909e-10, |
| "loss": 1.8309, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9940357852882704, |
| "eval_loss": 1.8293424844741821, |
| "eval_runtime": 63.4663, |
| "eval_samples_per_second": 26.691, |
| "eval_steps_per_second": 0.425, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 503, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 162675912867840.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|