| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.991097922848665, |
| "eval_steps": 500, |
| "global_step": 126, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11869436201780416, |
| "grad_norm": 0.6501234173774719, |
| "learning_rate": 0.0002, |
| "loss": 2.5793, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.23738872403560832, |
| "grad_norm": 0.9226903915405273, |
| "learning_rate": 0.0002, |
| "loss": 2.2797, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3560830860534125, |
| "grad_norm": 1.3594255447387695, |
| "learning_rate": 0.0002, |
| "loss": 1.8775, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.47477744807121663, |
| "grad_norm": 1.391525149345398, |
| "learning_rate": 0.0002, |
| "loss": 1.4896, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5934718100890207, |
| "grad_norm": 0.9402475357055664, |
| "learning_rate": 0.0002, |
| "loss": 1.2145, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.712166172106825, |
| "grad_norm": 0.516862690448761, |
| "learning_rate": 0.0002, |
| "loss": 1.0708, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8308605341246291, |
| "grad_norm": 0.39975354075431824, |
| "learning_rate": 0.0002, |
| "loss": 0.9909, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.9495548961424333, |
| "grad_norm": 0.4522175192832947, |
| "learning_rate": 0.0002, |
| "loss": 0.9651, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0682492581602374, |
| "grad_norm": 0.4957733452320099, |
| "learning_rate": 0.0002, |
| "loss": 0.9213, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.1869436201780414, |
| "grad_norm": 0.45304545760154724, |
| "learning_rate": 0.0002, |
| "loss": 0.9047, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.3056379821958457, |
| "grad_norm": 0.6747499108314514, |
| "learning_rate": 0.0002, |
| "loss": 0.8819, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.4243323442136497, |
| "grad_norm": 0.7882275581359863, |
| "learning_rate": 0.0002, |
| "loss": 0.8359, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.543026706231454, |
| "grad_norm": 0.42021647095680237, |
| "learning_rate": 0.0002, |
| "loss": 0.8254, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.6617210682492582, |
| "grad_norm": 0.41371551156044006, |
| "learning_rate": 0.0002, |
| "loss": 0.7991, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.7804154302670623, |
| "grad_norm": 0.45561087131500244, |
| "learning_rate": 0.0002, |
| "loss": 0.7887, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.8991097922848663, |
| "grad_norm": 0.40611913800239563, |
| "learning_rate": 0.0002, |
| "loss": 0.7941, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.0178041543026706, |
| "grad_norm": 0.5473902225494385, |
| "learning_rate": 0.0002, |
| "loss": 0.7779, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.136498516320475, |
| "grad_norm": 0.4852384924888611, |
| "learning_rate": 0.0002, |
| "loss": 0.7517, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.255192878338279, |
| "grad_norm": 0.4257807731628418, |
| "learning_rate": 0.0002, |
| "loss": 0.7545, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.373887240356083, |
| "grad_norm": 0.4694693386554718, |
| "learning_rate": 0.0002, |
| "loss": 0.7389, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.492581602373887, |
| "grad_norm": 0.46692556142807007, |
| "learning_rate": 0.0002, |
| "loss": 0.7348, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.6112759643916914, |
| "grad_norm": 0.38663822412490845, |
| "learning_rate": 0.0002, |
| "loss": 0.7368, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.7299703264094957, |
| "grad_norm": 0.4077226519584656, |
| "learning_rate": 0.0002, |
| "loss": 0.7335, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.8486646884272995, |
| "grad_norm": 0.4740726351737976, |
| "learning_rate": 0.0002, |
| "loss": 0.7462, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.9673590504451037, |
| "grad_norm": 0.40621665120124817, |
| "learning_rate": 0.0002, |
| "loss": 0.75, |
| "step": 125 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 126, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.310063237541069e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|