| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.04740670508585058, |
| "eval_steps": 500, |
| "global_step": 800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002962919067865661, |
| "grad_norm": 0.0781773254275322, |
| "learning_rate": 0.0004985778620526191, |
| "loss": 1.4945, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.005925838135731322, |
| "grad_norm": 0.08408337086439133, |
| "learning_rate": 0.0004970964683574307, |
| "loss": 1.1653, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.008888757203596984, |
| "grad_norm": 0.10583573579788208, |
| "learning_rate": 0.0004956150746622422, |
| "loss": 1.1169, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.011851676271462644, |
| "grad_norm": 0.08750592172145844, |
| "learning_rate": 0.0004941336809670538, |
| "loss": 1.0972, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.014814595339328307, |
| "grad_norm": 0.07067917287349701, |
| "learning_rate": 0.0004926522872718654, |
| "loss": 1.1012, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.017777514407193967, |
| "grad_norm": 0.12936587631702423, |
| "learning_rate": 0.0004911708935766769, |
| "loss": 1.0908, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02074043347505963, |
| "grad_norm": 0.09118826687335968, |
| "learning_rate": 0.0004896894998814885, |
| "loss": 1.0773, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02370335254292529, |
| "grad_norm": 0.08850109577178955, |
| "learning_rate": 0.00048820810618630005, |
| "loss": 1.1012, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.02666627161079095, |
| "grad_norm": 0.07888604700565338, |
| "learning_rate": 0.00048672671249111167, |
| "loss": 1.1343, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.029629190678656613, |
| "grad_norm": 0.0906878113746643, |
| "learning_rate": 0.00048524531879592323, |
| "loss": 1.1184, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.032592109746522276, |
| "grad_norm": 0.07720430195331573, |
| "learning_rate": 0.0004837639251007348, |
| "loss": 1.0968, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.035555028814387934, |
| "grad_norm": 0.0831717997789383, |
| "learning_rate": 0.00048228253140554636, |
| "loss": 1.0983, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.03851794788225359, |
| "grad_norm": 0.09815791249275208, |
| "learning_rate": 0.0004808011377103579, |
| "loss": 1.1235, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.04148086695011926, |
| "grad_norm": 0.07672577351331711, |
| "learning_rate": 0.0004793197440151695, |
| "loss": 1.0744, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04444378601798492, |
| "grad_norm": 0.08625241369009018, |
| "learning_rate": 0.0004778679781938848, |
| "loss": 1.0897, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.04740670508585058, |
| "grad_norm": 0.10188218951225281, |
| "learning_rate": 0.0004763865844986964, |
| "loss": 1.0737, |
| "step": 800 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 16876, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.4844182233088e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|