{ "training_log_history": [ { "loss": 1.7147529602050782, "grad_norm": 0.5557429194450378, "learning_rate": 5e-05, "entropy": 1.3842266023159027, "num_tokens": 79716.0, "mean_token_accuracy": 0.6636141866445542, "epoch": 0.0010526315789473684, "step": 10 }, { "loss": 1.057517147064209, "grad_norm": 0.4807300567626953, "learning_rate": 9.982817869415808e-05, "entropy": 1.194282191991806, "num_tokens": 161880.0, "mean_token_accuracy": 0.7640660256147385, "epoch": 0.002105263157894737, "step": 20 }, { "loss": 0.8556486129760742, "grad_norm": 0.32483115792274475, "learning_rate": 9.810996563573883e-05, "entropy": 0.8799786627292633, "num_tokens": 242837.0, "mean_token_accuracy": 0.8109526604413986, "epoch": 0.003157894736842105, "step": 30 }, { "loss": 0.7565423011779785, "grad_norm": 0.28475627303123474, "learning_rate": 9.639175257731959e-05, "entropy": 0.7401942551136017, "num_tokens": 319859.0, "mean_token_accuracy": 0.8288935780525207, "epoch": 0.004210526315789474, "step": 40 }, { "loss": 0.7918671607971192, "grad_norm": 0.2301568239927292, "learning_rate": 9.467353951890035e-05, "entropy": 0.7903933167457581, "num_tokens": 397501.0, "mean_token_accuracy": 0.825095483660698, "epoch": 0.005263157894736842, "step": 50 }, { "loss": 0.8224129676818848, "grad_norm": 0.3627431392669678, "learning_rate": 9.29553264604811e-05, "entropy": 0.7655193030834198, "num_tokens": 473920.0, "mean_token_accuracy": 0.8176214396953583, "epoch": 0.00631578947368421, "step": 60 }, { "loss": 0.8464207649230957, "grad_norm": 0.2397315502166748, "learning_rate": 9.123711340206186e-05, "entropy": 0.8343378663063049, "num_tokens": 559247.0, "mean_token_accuracy": 0.8138128280639648, "epoch": 0.007368421052631579, "step": 70 }, { "loss": 0.7613609313964844, "grad_norm": 0.24598781764507294, "learning_rate": 8.951890034364262e-05, "entropy": 0.7148342356085777, "num_tokens": 647916.0, "mean_token_accuracy": 0.8272515207529068, "epoch": 0.008421052631578947, "step": 80 }, { "loss": 0.7937052249908447, "grad_norm": 0.3052941560745239, "learning_rate": 8.780068728522337e-05, "entropy": 0.7415616571903229, "num_tokens": 731904.0, "mean_token_accuracy": 0.818175607919693, "epoch": 0.009473684210526316, "step": 90 }, { "loss": 0.8092642784118652, "grad_norm": 0.21103884279727936, "learning_rate": 8.608247422680413e-05, "entropy": 0.7637654632329941, "num_tokens": 813854.0, "mean_token_accuracy": 0.8194937348365784, "epoch": 0.010526315789473684, "step": 100 }, { "eval_loss": 0.7767549753189087, "eval_runtime": 347.5891, "eval_samples_per_second": 11.508, "eval_steps_per_second": 2.877, "eval_entropy": 0.7429669258594513, "eval_num_tokens": 813854.0, "eval_mean_token_accuracy": 0.8241108784079552, "epoch": 0.010526315789473684, "step": 100 }, { "loss": 0.7675327777862548, "grad_norm": 0.2367396205663681, "learning_rate": 8.436426116838489e-05, "entropy": 0.7208087205886841, "num_tokens": 888287.0, "mean_token_accuracy": 0.8270161330699921, "epoch": 0.011578947368421053, "step": 110 }, { "loss": 0.8215232849121094, "grad_norm": 0.24846169352531433, "learning_rate": 8.264604810996563e-05, "entropy": 0.778062978386879, "num_tokens": 968928.0, "mean_token_accuracy": 0.8151075035333634, "epoch": 0.01263157894736842, "step": 120 }, { "loss": 0.8481046676635742, "grad_norm": 0.2661391496658325, "learning_rate": 8.092783505154639e-05, "entropy": 0.7925655305385589, "num_tokens": 1056013.0, "mean_token_accuracy": 0.8117670178413391, "epoch": 0.01368421052631579, "step": 130 }, { "loss": 0.8251470565795899, "grad_norm": 0.2490185648202896, "learning_rate": 7.920962199312715e-05, "entropy": 0.7860086858272552, "num_tokens": 1139592.0, "mean_token_accuracy": 0.8182792365550995, "epoch": 0.014736842105263158, "step": 140 }, { "loss": 0.8617103576660157, "grad_norm": 0.19872666895389557, "learning_rate": 7.749140893470791e-05, "entropy": 0.7972662836313248, "num_tokens": 1236037.0, "mean_token_accuracy": 0.8090041935443878, "epoch": 0.015789473684210527, "step": 150 }, { "loss": 0.7977757453918457, "grad_norm": 0.19242972135543823, "learning_rate": 7.577319587628867e-05, "entropy": 0.7521506130695343, "num_tokens": 1324602.0, "mean_token_accuracy": 0.8219349443912506, "epoch": 0.016842105263157894, "step": 160 }, { "loss": 0.8367820739746094, "grad_norm": 0.2736385762691498, "learning_rate": 7.405498281786943e-05, "entropy": 0.7833776384592056, "num_tokens": 1412212.0, "mean_token_accuracy": 0.8115390419960022, "epoch": 0.017894736842105262, "step": 170 }, { "loss": 0.7536766052246093, "grad_norm": 0.20205377042293549, "learning_rate": 7.233676975945017e-05, "entropy": 0.7218452334403992, "num_tokens": 1506219.0, "mean_token_accuracy": 0.8291705161333084, "epoch": 0.018947368421052633, "step": 180 }, { "loss": 0.7809348106384277, "grad_norm": 0.22076274454593658, "learning_rate": 7.061855670103093e-05, "entropy": 0.7482779085636139, "num_tokens": 1587426.0, "mean_token_accuracy": 0.8284277468919754, "epoch": 0.02, "step": 190 } ], "generation_eval": { "json_validity_rate": 0.5, "avg_required_field_completion": 0.171875, "num_generation_eval_samples": 8 }, "generation_eval_no_think": { "inference_mode": "tokenizer.apply_chat_template(..., enable_thinking=False) when supported", "num_generation_eval_samples": 24, "json_validity_rate": 1.0, "listing_num_samples": 13, "listing_json_validity_rate": 1.0, "listing_avg_required_field_completion": 0.7980769230769231 } }