| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9967721110393803, |
| "eval_steps": 1000000000, |
| "global_step": 386, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.051646223369916075, |
| "grad_norm": 2.108860915849221, |
| "learning_rate": 5e-06, |
| "loss": 0.5468, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10329244673983215, |
| "grad_norm": 1.6310118874003974, |
| "learning_rate": 1e-05, |
| "loss": 0.4418, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1549386701097482, |
| "grad_norm": 1.3450900424853298, |
| "learning_rate": 9.981591817238379e-06, |
| "loss": 0.405, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2065848934796643, |
| "grad_norm": 1.3151962345663961, |
| "learning_rate": 9.926502813430545e-06, |
| "loss": 0.3851, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2582311168495804, |
| "grad_norm": 1.2680899500012033, |
| "learning_rate": 9.835138623956603e-06, |
| "loss": 0.3907, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3098773402194964, |
| "grad_norm": 1.4083168813617775, |
| "learning_rate": 9.70817198829563e-06, |
| "loss": 0.3781, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3615235635894125, |
| "grad_norm": 1.3754071945041135, |
| "learning_rate": 9.54653779646118e-06, |
| "loss": 0.3869, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4131697869593286, |
| "grad_norm": 1.2511484445251255, |
| "learning_rate": 9.351426205150778e-06, |
| "loss": 0.3756, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4648160103292447, |
| "grad_norm": 1.1024319806871332, |
| "learning_rate": 9.124273874297123e-06, |
| "loss": 0.3714, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5164622336991608, |
| "grad_norm": 1.1917707780202444, |
| "learning_rate": 8.86675338854865e-06, |
| "loss": 0.3728, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5681084570690769, |
| "grad_norm": 1.2715677756941004, |
| "learning_rate": 8.580760941571968e-06, |
| "loss": 0.3672, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6197546804389928, |
| "grad_norm": 1.227519727921758, |
| "learning_rate": 8.26840237386003e-06, |
| "loss": 0.3788, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6714009038089089, |
| "grad_norm": 1.2634658378672423, |
| "learning_rate": 7.93197766685348e-06, |
| "loss": 0.3797, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.723047127178825, |
| "grad_norm": 1.3757986356984206, |
| "learning_rate": 7.5739640075491546e-06, |
| "loss": 0.3589, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7746933505487411, |
| "grad_norm": 1.4230554296275335, |
| "learning_rate": 7.1969975482957075e-06, |
| "loss": 0.3783, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8263395739186572, |
| "grad_norm": 1.1307040899887513, |
| "learning_rate": 6.803853996083918e-06, |
| "loss": 0.364, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8779857972885733, |
| "grad_norm": 1.1231026642597897, |
| "learning_rate": 6.397428174258048e-06, |
| "loss": 0.3696, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9296320206584894, |
| "grad_norm": 1.189629737482601, |
| "learning_rate": 5.980712707140985e-06, |
| "loss": 0.3762, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9812782440284055, |
| "grad_norm": 1.303138113665757, |
| "learning_rate": 5.556775984524044e-06, |
| "loss": 0.3793, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0361523563589412, |
| "grad_norm": 1.1705355616301976, |
| "learning_rate": 5.1287395682749444e-06, |
| "loss": 0.3015, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0877985797288574, |
| "grad_norm": 1.058647814135033, |
| "learning_rate": 4.699755207425259e-06, |
| "loss": 0.2478, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1394448030987734, |
| "grad_norm": 1.0800491931953469, |
| "learning_rate": 4.272981630981551e-06, |
| "loss": 0.234, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1910910264686896, |
| "grad_norm": 1.0382356810044766, |
| "learning_rate": 3.851561289341023e-06, |
| "loss": 0.2482, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2427372498386056, |
| "grad_norm": 0.9748763823021784, |
| "learning_rate": 3.4385972155710274e-06, |
| "loss": 0.2384, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2943834732085215, |
| "grad_norm": 0.9948291834059353, |
| "learning_rate": 3.0371301769291417e-06, |
| "loss": 0.2297, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3460296965784377, |
| "grad_norm": 1.0505131356235509, |
| "learning_rate": 2.6501162848634023e-06, |
| "loss": 0.2461, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3976759199483537, |
| "grad_norm": 1.010764453532433, |
| "learning_rate": 2.280405228356377e-06, |
| "loss": 0.2307, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.44932214331827, |
| "grad_norm": 1.0455587580487264, |
| "learning_rate": 1.93071929088694e-06, |
| "loss": 0.2359, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.500968366688186, |
| "grad_norm": 1.0088444060232913, |
| "learning_rate": 1.6036333055135345e-06, |
| "loss": 0.2442, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.552614590058102, |
| "grad_norm": 1.0021021613404428, |
| "learning_rate": 1.3015556956751669e-06, |
| "loss": 0.2291, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.604260813428018, |
| "grad_norm": 1.0533375999671128, |
| "learning_rate": 1.0267107413118743e-06, |
| "loss": 0.2421, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.655907036797934, |
| "grad_norm": 0.9805090445662388, |
| "learning_rate": 7.811222008840719e-07, |
| "loss": 0.2357, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.7075532601678503, |
| "grad_norm": 1.033728352578099, |
| "learning_rate": 5.665984098862992e-07, |
| "loss": 0.2368, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7591994835377665, |
| "grad_norm": 0.9971901057327158, |
| "learning_rate": 3.8471896557912005e-07, |
| "loss": 0.2229, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8108457069076824, |
| "grad_norm": 0.9982038746046318, |
| "learning_rate": 2.368230959830875e-07, |
| "loss": 0.2302, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8624919302775984, |
| "grad_norm": 1.0445799403567448, |
| "learning_rate": 1.2399979877708746e-07, |
| "loss": 0.2226, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9141381536475146, |
| "grad_norm": 1.0245236117970937, |
| "learning_rate": 4.7079822711015296e-08, |
| "loss": 0.2361, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9657843770174306, |
| "grad_norm": 0.9047755895156792, |
| "learning_rate": 6.629550575847355e-09, |
| "loss": 0.2411, |
| "step": 380 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 386, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 110643132760064.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|