| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 260, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.9023885352909564, |
| "epoch": 0.09624639076034648, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 0.00019801724878485438, |
| "loss": 1.9801718139648437, |
| "mean_token_accuracy": 0.5678877208381892, |
| "num_tokens": 292384.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 1.4295946584828199, |
| "epoch": 0.19249278152069296, |
| "grad_norm": 0.037841796875, |
| "learning_rate": 0.00018721912643966055, |
| "loss": 1.404669189453125, |
| "mean_token_accuracy": 0.6763192970305681, |
| "num_tokens": 578481.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.9872728328220546, |
| "epoch": 0.28873917228103946, |
| "grad_norm": 0.0439453125, |
| "learning_rate": 0.00016801727377709194, |
| "loss": 0.9318624877929688, |
| "mean_token_accuracy": 0.7822963754087686, |
| "num_tokens": 865019.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.7441379369003698, |
| "epoch": 0.3849855630413859, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 0.00014226182617406996, |
| "loss": 0.709251937866211, |
| "mean_token_accuracy": 0.8377231808751822, |
| "num_tokens": 1152219.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.5814053183561191, |
| "epoch": 0.48123195380173245, |
| "grad_norm": 0.0264892578125, |
| "learning_rate": 0.00011243437046474853, |
| "loss": 0.5287226486206055, |
| "mean_token_accuracy": 0.8766528983414174, |
| "num_tokens": 1434202.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.5018330804118887, |
| "epoch": 0.5774783445620789, |
| "grad_norm": 0.02587890625, |
| "learning_rate": 8.140883928370855e-05, |
| "loss": 0.4683658599853516, |
| "mean_token_accuracy": 0.8941586248576641, |
| "num_tokens": 1713672.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.5590004127845168, |
| "epoch": 0.6737247353224254, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 5.217460213786821e-05, |
| "loss": 0.5369978713989257, |
| "mean_token_accuracy": 0.8858267913013697, |
| "num_tokens": 2001682.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.38772805714048447, |
| "epoch": 0.7699711260827719, |
| "grad_norm": 0.025390625, |
| "learning_rate": 2.7548433914072734e-05, |
| "loss": 0.3627962112426758, |
| "mean_token_accuracy": 0.9206545139104128, |
| "num_tokens": 2287069.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.4813358336733654, |
| "epoch": 0.8662175168431184, |
| "grad_norm": 0.0299072265625, |
| "learning_rate": 9.903113209758096e-06, |
| "loss": 0.4474001693725586, |
| "mean_token_accuracy": 0.9008515448123217, |
| "num_tokens": 2570183.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.3910559167573229, |
| "epoch": 0.9624639076034649, |
| "grad_norm": 0.031982421875, |
| "learning_rate": 9.388005586947191e-07, |
| "loss": 0.3626411437988281, |
| "mean_token_accuracy": 0.9204049988090992, |
| "num_tokens": 2858118.0, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 260, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.266332656549417e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|