{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.9023885352909564, "epoch": 0.09624639076034648, "grad_norm": 0.0179443359375, "learning_rate": 0.00019801724878485438, "loss": 1.9801718139648437, "mean_token_accuracy": 0.5678877208381892, "num_tokens": 292384.0, "step": 25 }, { "entropy": 1.4295946584828199, "epoch": 0.19249278152069296, "grad_norm": 0.037841796875, "learning_rate": 0.00018721912643966055, "loss": 1.404669189453125, "mean_token_accuracy": 0.6763192970305681, "num_tokens": 578481.0, "step": 50 }, { "entropy": 0.9872728328220546, "epoch": 0.28873917228103946, "grad_norm": 0.0439453125, "learning_rate": 0.00016801727377709194, "loss": 0.9318624877929688, "mean_token_accuracy": 0.7822963754087686, "num_tokens": 865019.0, "step": 75 }, { "entropy": 0.7441379369003698, "epoch": 0.3849855630413859, "grad_norm": 0.058349609375, "learning_rate": 0.00014226182617406996, "loss": 0.709251937866211, "mean_token_accuracy": 0.8377231808751822, "num_tokens": 1152219.0, "step": 100 }, { "entropy": 0.5814053183561191, "epoch": 0.48123195380173245, "grad_norm": 0.0264892578125, "learning_rate": 0.00011243437046474853, "loss": 0.5287226486206055, "mean_token_accuracy": 0.8766528983414174, "num_tokens": 1434202.0, "step": 125 }, { "entropy": 0.5018330804118887, "epoch": 0.5774783445620789, "grad_norm": 0.02587890625, "learning_rate": 8.140883928370855e-05, "loss": 0.4683658599853516, "mean_token_accuracy": 0.8941586248576641, "num_tokens": 1713672.0, "step": 150 }, { "entropy": 0.5590004127845168, "epoch": 0.6737247353224254, "grad_norm": 0.050537109375, "learning_rate": 5.217460213786821e-05, "loss": 0.5369978713989257, "mean_token_accuracy": 0.8858267913013697, "num_tokens": 2001682.0, "step": 175 }, { "entropy": 0.38772805714048447, "epoch": 0.7699711260827719, "grad_norm": 0.025390625, "learning_rate": 2.7548433914072734e-05, "loss": 0.3627962112426758, "mean_token_accuracy": 0.9206545139104128, "num_tokens": 2287069.0, "step": 200 }, { "entropy": 0.4813358336733654, "epoch": 0.8662175168431184, "grad_norm": 0.0299072265625, "learning_rate": 9.903113209758096e-06, "loss": 0.4474001693725586, "mean_token_accuracy": 0.9008515448123217, "num_tokens": 2570183.0, "step": 225 }, { "entropy": 0.3910559167573229, "epoch": 0.9624639076034649, "grad_norm": 0.031982421875, "learning_rate": 9.388005586947191e-07, "loss": 0.3626411437988281, "mean_token_accuracy": 0.9204049988090992, "num_tokens": 2858118.0, "step": 250 } ], "logging_steps": 25, "max_steps": 260, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.266332656549417e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }